<a href="https://colab.research.google.com/github/jmsarmiento11/csc612m-rabin-karp-CUDA/blob/main/Rabin_Karp_Algorithm_%5BFinal%5D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Rabin-Karp C++ Program

In [22]:
%%writefile c_rabinKarp.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <limits.h>
#include <time.h>

const int BASE = 256;

// Custom power function to calculate BASE^(m-1) for integers.
unsigned long long int customPow(int base, int exponent) {
    unsigned long long int result = 1;
    while (exponent > 0) {
        if (exponent & 1) {
            result *= base;
        }
        base *= base;
        exponent >>= 1;
    }
    return result;
}

// Function to calculate the hash value for a given substring using prefix sum.
unsigned long long int calculateHash(const char* str, int start, int end) {
    unsigned long long int hashValue = 0;
    for (int i = start; i <= end; ++i) {
        hashValue = (hashValue * BASE + str[i]) % INT_MAX;
    }
    return hashValue;
}

// Function to find the occurrences of a pattern in the text using Rabin-Karp algorithm.
void rabinKarpSearch(const char* text, const char* patterns[], int numPatterns) {
    int n = strlen(text);

    // Start the timer.
    clock_t start = clock();

    // Loop through each pattern.
    for (int k = 0; k < numPatterns; ++k) {
        const char* pattern = patterns[k];
        int m = strlen(pattern);
        unsigned long long int patternHash = calculateHash(pattern, 0, m - 1);

        bool patternFound = false;

        // Loop through the text with a sliding window of length m.
        for (int i = 0; i <= n - m; ++i) {
            unsigned long long int currHash = calculateHash(text, i, i + m - 1);

            // Check if the hash value matches for the current window.
            if (patternHash == currHash) {
                bool found = true;

                // If the hash values match, compare the pattern with the text for an exact match.
                for (int j = 0; j < m; ++j) {
                    if (text[i + j] != pattern[j]) {
                        found = false;
                        break;
                    }
                }

                // If the pattern is found, print its position and set the flag to true.
                if (found) {
                    printf("Pattern \"%s\" found at position: %d\n", pattern, i);
                    patternFound = true;
                }
            }
        }

        // If the pattern is not found in the text, display a message.
        if (!patternFound) {
            printf("Pattern \"%s\" not found in the text.\n", pattern);
        }
    }

    // End the timer and calculate elapsed time.
    clock_t end = clock();
    double elapsed_time = (double)(end - start)*1e6 / CLOCKS_PER_SEC;
    printf("Searching process is done. Elapsed time: %.6f microseconds\n", elapsed_time);
}

int main() {
    FILE* inputFile = fopen("text2.txt", "r");
    if (!inputFile) {
        printf("Error opening the file.\n");
        return 1;
    }

    char* text = NULL;
    size_t len = 0;
    ssize_t read;

    // Read the entire text from the file.
    read = getline(&text, &len, inputFile);
    fclose(inputFile);

    const char* patterns[] = {
    "flncufrnyd", "yosqvoxrqr", "mldgpkfifa", "wrxxozxwur",
    "zzxzxrmkrz", "okwtlzpkbz", "bmjnqqnhab", "qigjguyjhq",
    "aeyvmbfdvd", "soixvqasok", "fukvklqejx", "frtkbkxvkt",
    "fuejlqgkdh", "smywmdwqzm", "jodpfpvlsm", "rrqwmautza"
};
    int numPatterns = sizeof(patterns) / sizeof(patterns[0]);

    // Call the Rabin-Karp search function to find all patterns in the text.
    rabinKarpSearch(text, patterns, numPatterns);

    free(text);
    return 0;
}


Overwriting c_rabinKarp.c


In [23]:
%%shell
g++ c_rabinKarp.c -o c_rabinKarp



In [25]:
%%shell
./c_rabinKarp

Pattern "flncufrnyd" found at position: 1499
Pattern "flncufrnyd" found at position: 19352
Pattern "flncufrnyd" found at position: 37205
Pattern "flncufrnyd" found at position: 55058
Pattern "flncufrnyd" found at position: 72911
Pattern "flncufrnyd" found at position: 90764
Pattern "flncufrnyd" found at position: 108617
Pattern "flncufrnyd" found at position: 126470
Pattern "flncufrnyd" found at position: 144323
Pattern "flncufrnyd" found at position: 162176
Pattern "flncufrnyd" found at position: 180029
Pattern "flncufrnyd" found at position: 197882
Pattern "flncufrnyd" found at position: 215735
Pattern "flncufrnyd" found at position: 233588
Pattern "flncufrnyd" found at position: 251441
Pattern "flncufrnyd" found at position: 269294
Pattern "flncufrnyd" found at position: 287147
Pattern "flncufrnyd" found at position: 305000
Pattern "flncufrnyd" found at position: 322853
Pattern "flncufrnyd" found at position: 340706
Pattern "flncufrnyd" found at position: 358559
Pattern "flncufrnyd"



Rabin-Karp CUDA V1.0

In [26]:
%%writefile rabinKarp.cu
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

const int BASE = 256;

__device__
unsigned long long int customPow(int base, int exponent) {
    unsigned long long int result = 1;
    while (exponent > 0) {
        if (exponent & 1) {
            result *= base;
        }
        base *= base;
        exponent >>= 1;
    }
    return result;
}

__device__
unsigned long long int calculateHash(const char* str, int start, int end) {
    unsigned long long int hashValue = 0;
    for (int i = start; i <= end; ++i) {
        hashValue = (hashValue * BASE + str[i]) % INT_MAX;
    }
    return hashValue;
}

__global__
void rabinKarpSearch(const char* text, const char* pattern, int patternIndex, int textLength, int patternLength, bool* patternFound) {
    int tid = blockIdx.x * blockDim.x + threadIdx.x;
    int numThreads = gridDim.x * blockDim.x;
    int n = textLength;
    int m = patternLength;

    // Calculate the pattern hash value outside the loop since it is constant.
    unsigned long long int patternHash = calculateHash(pattern, 0, m - 1);

    // Loop over the text with grid-stride.
    for (int i = tid; i <= n - m; i += numThreads) {
        unsigned long long int currHash = calculateHash(text, i, i + m - 1);

        if (patternHash == currHash) {
            bool found = true;
            for (int j = 0; j < m; ++j) {
                if (text[i + j] != pattern[j]) {
                    found = false;
                    break;
                }
            }

            if (found) {
                printf("Pattern %d found at position: %d\n", patternIndex, i);
                patternFound[patternIndex] = true;
                return; // Return immediately when pattern is found.
            }
        }
    }
}

int main() {
    FILE* inputFile = fopen("text2.txt", "r");
    if (!inputFile) {
        printf("Error opening the file.\n");
        return 1;
    }

    char* text = NULL;
    size_t len = 0;
    ssize_t read;

    // Read the entire text from the file.
    read = getline(&text, &len, inputFile);
    fclose(inputFile);

    const char* patterns[] = {
    "flncufrnyd", "yosqvoxrqr", "mldgpkfifa", "wrxxozxwur",
    "zzxzxrmkrz", "okwtlzpkbz", "bmjnqqnhab", "qigjguyjhq",
    "aeyvmbfdvd", "soixvqasok", "fukvklqejx", "frtkbkxvkt",
    "fuejlqgkdh", "smywmdwqzm", "jodpfpvlsm", "rrqwmautza"
};
    int numPatterns = sizeof(patterns) / sizeof(patterns[0]);

    char* cudaText;
    int textLength = strlen(text);

    // Allocate managed memory for text and patterns.
    cudaMallocManaged((void**)&cudaText, textLength);
    memcpy(cudaText, text, textLength);

    bool* patternFound;
    cudaMallocManaged((void**)&patternFound, numPatterns * sizeof(bool));
    for (int i = 0; i < numPatterns; i++) {
        patternFound[i] = false;
    }

    for (int i = 0; i < numPatterns; i++) {
        int patternLength = strlen(patterns[i]);
        char* cudaPattern;
        cudaMallocManaged((void**)&cudaPattern, patternLength);
        memcpy(cudaPattern, patterns[i], patternLength);

        // Launch the kernel with appropriate thread configuration.
        int blockSize = 256;
        int numBlocks = (textLength + blockSize - 1) / blockSize;
        rabinKarpSearch<<<numBlocks, blockSize>>>(cudaText, cudaPattern, i, textLength, patternLength, patternFound);

        cudaFree(cudaPattern);
    }

    // Synchronize to ensure kernel execution is finished.
    cudaDeviceSynchronize();

    // Print message for patterns that are not found.
    for (int i = 0; i < numPatterns; i++) {
        if (!patternFound[i]) {
            printf("Pattern %d not found.\n", i);
        }
    }

    // Free managed memory.
    cudaFree(cudaText);
    cudaFree(patternFound);

    free(text);

    return 0;
}


Writing rabinKarp.cu


In [27]:
%%shell
nvcc rabinKarp.cu -o rabinKarp






In [35]:
%%shell
nvprof ./rabinKarp

==16366== NVPROF is profiling process 16366, command: ./rabinKarp
Pattern 0 found at position: 19352
Pattern 0 found at position: 37205
Pattern 0 found at position: 1499
Pattern 0 found at position: 55058
Pattern 0 found at position: 72911
Pattern 0 found at position: 126470
Pattern 0 found at position: 108617
Pattern 0 found at position: 90764
Pattern 0 found at position: 144323
Pattern 0 found at position: 162176
Pattern 0 found at position: 197882
Pattern 0 found at position: 180029
Pattern 0 found at position: 215735
Pattern 0 found at position: 233588
Pattern 0 found at position: 251441
Pattern 0 found at position: 269294
Pattern 0 found at position: 287147
Pattern 0 found at position: 305000
Pattern 0 found at position: 322853
Pattern 0 found at position: 340706
Pattern 0 found at position: 358559
Pattern 0 found at position: 376412
Pattern 0 found at position: 412118
Pattern 0 found at position: 394265
Pattern 0 found at position: 429971
Pattern 0 found at position: 447824
Patte



Rabin-Karp CUDA V2.0 (With Thrust Library)

In [32]:
%%writefile rabinKarp2.cu
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <thrust/device_vector.h>

const int BASE = 256;

__device__
unsigned long long int customPow(int base, int exponent) {
    unsigned long long int result = 1;
    while (exponent > 0) {
        if (exponent & 1) {
            result *= base;
        }
        base *= base;
        exponent >>= 1;
    }
    return result;
}

__device__
unsigned long long int calculateHash(const char* str, int start, int end) {
    unsigned long long int hashValue = 0;
    for (int i = start; i <= end; ++i) {
        hashValue = (hashValue * BASE + str[i]) % INT_MAX;
    }
    return hashValue;
}

__global__
void rabinKarpSearch(const char* text, const char* pattern, int patternIndex, int textLength, int patternLength, int* patternFound) {
    int tid = blockIdx.x * blockDim.x + threadIdx.x;
    int numThreads = gridDim.x * blockDim.x;
    int n = textLength;
    int m = patternLength;

    unsigned long long int patternHash = calculateHash(pattern, 0, m - 1);

    for (int i = tid; i <= n - m; i += numThreads) {
        unsigned long long int currHash = calculateHash(text, i, i + m - 1);

        if (patternHash == currHash) {
            bool found = true;
            for (int j = 0; j < m; ++j) {
                if (text[i + j] != pattern[j]) {
                    found = false;
                    break;
                }
            }

            if (found) {
                printf("Pattern %d found at position: %d\n", patternIndex, i);
                atomicAdd(&patternFound[patternIndex], 1);
            }
        }
    }
}

int main() {
    FILE* inputFile = fopen("text2.txt", "r");
    if (!inputFile) {
        printf("Error opening the file.\n");
        return 1;
    }

    char* text = NULL;
    size_t len = 0;
    ssize_t read;

    // Read the entire text from the file.
    read = getline(&text, &len, inputFile);
    fclose(inputFile);

    const char* patterns[] = {
        "flncufrnyd", "yosqvoxrqr", "mldgpkfifa", "wrxxozxwur",
        "zzxzxrmkrz", "okwtlzpkbz", "bmjnqqnhab", "qigjguyjhq",
        "aeyvmbfdvd", "soixvqasok", "fukvklqejx", "frtkbkxvkt",
        "fuejlqgkdh", "smywmdwqzm", "jodpfpvlsm", "rrqwmautza"
    };
    int numPatterns = sizeof(patterns) / sizeof(patterns[0]);

    char* cudaText;
    cudaMallocManaged((void**)&cudaText, len - 1);
    memcpy(cudaText, text, len - 1);

    int* patternFound;
    cudaMallocManaged((void**)&patternFound, numPatterns * sizeof(int));
    for (int i = 0; i < numPatterns; i++) {
        patternFound[i] = 0;
    }

    for (int i = 0; i < numPatterns; i++) {
        int patternLength = strlen(patterns[i]);
        char* cudaPattern;
        cudaMallocManaged((void**)&cudaPattern, patternLength);
        memcpy(cudaPattern, patterns[i], patternLength);

        int blockSize = 256;
        int numBlocks = (len + blockSize - 1) / blockSize;
        rabinKarpSearch<<<numBlocks, blockSize>>>(cudaText, cudaPattern, i, len - 1, patternLength, patternFound);

        cudaPattern = NULL;
    }

    cudaDeviceSynchronize();

    for (int i = 0; i < numPatterns; i++) {
        printf("Pattern %d found %d times.\n", i, patternFound[i]);
    }

    cudaFree(cudaText);
    cudaFree(patternFound);

    free(text);

    return 0;
}


Overwriting rabinKarp2.cu


In [33]:
%%shell
nvcc rabinKarp2.cu -o rabinKarp2






In [36]:
%%shell
nvprof ./rabinKarp2

==17445== NVPROF is profiling process 17445, command: ./rabinKarp2
Pattern 0 found at position: 1499
Pattern 0 found at position: 19352
Pattern 0 found at position: 55058
Pattern 0 found at position: 37205
Pattern 0 found at position: 72911
Pattern 0 found at position: 90764
Pattern 0 found at position: 108617
Pattern 0 found at position: 126470
Pattern 0 found at position: 144323
Pattern 0 found at position: 162176
Pattern 0 found at position: 180029
Pattern 0 found at position: 197882
Pattern 0 found at position: 215735
Pattern 0 found at position: 233588
Pattern 0 found at position: 251441
Pattern 0 found at position: 269294
Pattern 0 found at position: 287147
Pattern 0 found at position: 305000
Pattern 0 found at position: 322853
Pattern 0 found at position: 340706
Pattern 0 found at position: 358559
Pattern 0 found at position: 376412
Pattern 0 found at position: 394265
Pattern 0 found at position: 412118
Pattern 0 found at position: 429971
Pattern 0 found at position: 447824
Patt

