<a href="https://colab.research.google.com/github/jmsarmiento11/csc612m-rabin-karp-CUDA/blob/main/Rabin_Karp_Algorithm_%5BFinal%5D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Rabin-Karp C++ Program

In [None]:
%%writefile c_rabinKarp.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <limits.h>
#include <time.h>

const int BASE = 256;

// Custom power function to calculate BASE^(m-1) for integers.
unsigned long long int customPow(int base, int exponent) {
    unsigned long long int result = 1;
    while (exponent > 0) {
        if (exponent & 1) {
            result *= base;
        }
        base *= base;
        exponent >>= 1;
    }
    return result;
}

// Function to calculate the hash value for a given substring using prefix sum.
unsigned long long int calculateHash(const char* str, int start, int end) {
    unsigned long long int hashValue = 0;
    for (int i = start; i <= end; ++i) {
        hashValue = (hashValue * BASE + str[i]) % INT_MAX;
    }
    return hashValue;
}

// Function to find the occurrences of a pattern in the text using Rabin-Karp algorithm.
void rabinKarpSearch(const char* text, const char* patterns[], int numPatterns) {
    int n = strlen(text);

    // Start the timer.
    clock_t start = clock();

    // Loop through each pattern.
    for (int k = 0; k < numPatterns; ++k) {
        const char* pattern = patterns[k];
        int m = strlen(pattern);
        unsigned long long int patternHash = calculateHash(pattern, 0, m - 1);

        bool patternFound = false;

        // Loop through the text with a sliding window of length m.
        for (int i = 0; i <= n - m; ++i) {
            unsigned long long int currHash = calculateHash(text, i, i + m - 1);

            // Check if the hash value matches for the current window.
            if (patternHash == currHash) {
                bool found = true;

                // If the hash values match, compare the pattern with the text for an exact match.
                for (int j = 0; j < m; ++j) {
                    if (text[i + j] != pattern[j]) {
                        found = false;
                        break;
                    }
                }

                // If the pattern is found, print its position and set the flag to true.
                if (found) {
                    printf("Pattern \"%s\" found at position: %d\n", pattern, i);
                    patternFound = true;
                }
            }
        }

        // If the pattern is not found in the text, display a message.
        if (!patternFound) {
            printf("Pattern \"%s\" not found in the text.\n", pattern);
        }
    }

    // End the timer and calculate elapsed time.
    clock_t end = clock();
    double elapsed_time = (double)(end - start)*1e6 / CLOCKS_PER_SEC;
    printf("Searching process is done. Elapsed time: %.6f microseconds\n", elapsed_time);
}

int main() {
    FILE* inputFile = fopen("text2.txt", "r");
    if (!inputFile) {
        printf("Error opening the file.\n");
        return 1;
    }

    char* text = NULL;
    size_t len = 0;
    ssize_t read;

    // Read the entire text from the file.
    read = getline(&text, &len, inputFile);
    fclose(inputFile);

    const char* patterns[] = {
        "flncufrnyd", "yosqvoxrqr", "mldgpkfifa", "wrxxozxwur",
    };
    int numPatterns = sizeof(patterns) / sizeof(patterns[0]);

    // Call the Rabin-Karp search function to find all patterns in the text.
    rabinKarpSearch(text, patterns, numPatterns);

    free(text);
    return 0;
}


Overwriting c_rabinKarp.c


In [None]:
%%shell
g++ c_rabinKarp.c -o c_rabinKarp



In [None]:
%%shell
./c_rabinKarp

Pattern "flncufrnyd" found at position: 1519
Pattern "flncufrnyd" found at position: 19372
Pattern "flncufrnyd" found at position: 37225
Pattern "flncufrnyd" found at position: 55078
Pattern "flncufrnyd" found at position: 72931
Pattern "flncufrnyd" found at position: 90784
Pattern "flncufrnyd" found at position: 108637
Pattern "flncufrnyd" found at position: 126490
Pattern "flncufrnyd" found at position: 144343
Pattern "flncufrnyd" found at position: 162196
Pattern "flncufrnyd" found at position: 180049
Pattern "flncufrnyd" found at position: 197902
Pattern "flncufrnyd" found at position: 215755
Pattern "flncufrnyd" found at position: 233608
Pattern "flncufrnyd" found at position: 251461
Pattern "flncufrnyd" found at position: 269314
Pattern "flncufrnyd" found at position: 287167
Pattern "flncufrnyd" found at position: 305020
Pattern "flncufrnyd" found at position: 322873
Pattern "flncufrnyd" found at position: 340726
Pattern "flncufrnyd" found at position: 358579
Pattern "flncufrnyd"



Rabin-Karp CUDA V1.0

In [None]:
%%writefile rabinKarp.cu
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

const int BASE = 256;

__device__
unsigned long long int customPow(int base, int exponent) {
    unsigned long long int result = 1;
    while (exponent > 0) {
        if (exponent & 1) {
            result *= base;
        }
        base *= base;
        exponent >>= 1;
    }
    return result;
}

__device__
unsigned long long int calculateHash(const char* str, int start, int end) {
    unsigned long long int hashValue = 0;
    for (int i = start; i <= end; ++i) {
        hashValue = (hashValue * BASE + str[i]) % INT_MAX;
    }
    return hashValue;
}

__global__
void rabinKarpSearch(const char* text, const char* pattern, int patternIndex, int textLength, int patternLength, bool* patternFound) {
    int tid = blockIdx.x * blockDim.x + threadIdx.x;
    int numThreads = gridDim.x * blockDim.x;
    int n = textLength;
    int m = patternLength;

    // Calculate the pattern hash value outside the loop since it is constant.
    unsigned long long int patternHash = calculateHash(pattern, 0, m - 1);

    // Loop over the text with grid-stride.
    for (int i = tid; i <= n - m; i += numThreads) {
        unsigned long long int currHash = calculateHash(text, i, i + m - 1);

        if (patternHash == currHash) {
            bool found = true;
            for (int j = 0; j < m; ++j) {
                if (text[i + j] != pattern[j]) {
                    found = false;
                    break;
                }
            }

            if (found) {
                printf("Pattern %d found at position: %d\n", patternIndex, i);
                patternFound[patternIndex] = true;
                return; // Return immediately when pattern is found.
            }
        }
    }
}

int main() {
    FILE* inputFile = fopen("text2.txt", "r");
    if (!inputFile) {
        printf("Error opening the file.\n");
        return 1;
    }

    char* text = NULL;
    size_t len = 0;
    ssize_t read;

    // Read the entire text from the file.
    read = getline(&text, &len, inputFile);
    fclose(inputFile);

    const char* patterns[] = {
        "flncufrnyd", "yosqvoxrqr", "mldgpkfifa", "wrxxozxwur",
    };
    int numPatterns = sizeof(patterns) / sizeof(patterns[0]);

    char* cudaText;
    int textLength = strlen(text);

    // Allocate managed memory for text and patterns.
    cudaMallocManaged((void**)&cudaText, textLength);
    memcpy(cudaText, text, textLength);

    bool* patternFound;
    cudaMallocManaged((void**)&patternFound, numPatterns * sizeof(bool));
    for (int i = 0; i < numPatterns; i++) {
        patternFound[i] = false;
    }

    for (int i = 0; i < numPatterns; i++) {
        int patternLength = strlen(patterns[i]);
        char* cudaPattern;
        cudaMallocManaged((void**)&cudaPattern, patternLength);
        memcpy(cudaPattern, patterns[i], patternLength);

        // Launch the kernel with appropriate thread configuration.
        int blockSize = 256;
        int numBlocks = (textLength + blockSize - 1) / blockSize;
        rabinKarpSearch<<<numBlocks, blockSize>>>(cudaText, cudaPattern, i, textLength, patternLength, patternFound);

        cudaFree(cudaPattern);
    }

    // Synchronize to ensure kernel execution is finished.
    cudaDeviceSynchronize();

    // Print message for patterns that are not found.
    for (int i = 0; i < numPatterns; i++) {
        if (!patternFound[i]) {
            printf("Pattern %d not found.\n", i);
        }
    }

    // Free managed memory.
    cudaFree(cudaText);
    cudaFree(patternFound);

    free(text);

    return 0;
}


Overwriting rabinKarp.cu


In [None]:
%%shell
nvcc rabinKarp.cu -o rabinKarp






In [None]:
%%shell
nvprof ./rabinKarp

==43498== NVPROF is profiling process 43498, command: ./rabinKarp
Pattern 0 found at position: 1519
Pattern 0 found at position: 37225
Pattern 0 found at position: 19372
Pattern 0 found at position: 55078
Pattern 0 found at position: 90784
Pattern 0 found at position: 72931
Pattern 0 found at position: 126490
Pattern 0 found at position: 108637
Pattern 0 found at position: 144343
Pattern 0 found at position: 162196
Pattern 0 found at position: 180049
Pattern 0 found at position: 197902
Pattern 0 found at position: 215755
Pattern 0 found at position: 233608
Pattern 0 found at position: 251461
Pattern 0 found at position: 269314
Pattern 0 found at position: 287167
Pattern 0 found at position: 305020
Pattern 0 found at position: 322873
Pattern 0 found at position: 340726
Pattern 0 found at position: 358579
Pattern 0 found at position: 376432
Pattern 0 found at position: 394285
Pattern 0 found at position: 412138
Pattern 0 found at position: 429991
Pattern 0 found at position: 447844
Patte



Rabin-Karp CUDA V2.0 (With Thrust Library)

In [None]:
%%writefile rabinKarp2.cu
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>

const int BASE = 256;

__device__
unsigned long long int customPow(int base, int exponent) {
    unsigned long long int result = 1;
    while (exponent > 0) {
        if (exponent & 1) {
            result *= base;
        }
        base *= base;
        exponent >>= 1;
    }
    return result;
}

__device__
unsigned long long int calculateHash(const char* str, int start, int end) {
    unsigned long long int hashValue = 0;
    for (int i = start; i <= end; ++i) {
        hashValue = (hashValue * BASE + str[i]) % INT_MAX;
    }
    return hashValue;
}

__global__
void rabinKarpSearch(const char* text, const char* pattern, int patternIndex, int textLength, int patternLength, int* patternFound) {
    int tid = blockIdx.x * blockDim.x + threadIdx.x;
    int numThreads = gridDim.x * blockDim.x;
    int n = textLength;
    int m = patternLength;

    // Calculate the pattern hash value outside the loop since it is constant.
    unsigned long long int patternHash = calculateHash(pattern, 0, m - 1);

    // Loop over the text with grid-stride.
    for (int i = tid; i <= n - m; i += numThreads) {
        unsigned long long int currHash = calculateHash(reinterpret_cast<const char*>(text), i, i + m - 1);

        if (patternHash == currHash) {
            bool found = true;
            for (int j = 0; j < m; ++j) {
                if (text[i + j] != pattern[j]) {
                    found = false;
                    break;
                }
            }

            if (found) {
                printf("Pattern %d found at position: %d\n", patternIndex, i);
                if (atomicCAS(&patternFound[patternIndex], 0, 1) == 0) {
                    return; // Return immediately when pattern is found.
                }
            }
        }
    }
}

int main() {
    FILE* inputFile = fopen("text2.txt", "r");
    if (!inputFile) {
        printf("Error opening the file.\n");
        return 1;
    }

    char* text = NULL;
    size_t len = 0;
    ssize_t read;

    // Read the entire text from the file.
    read = getline(&text, &len, inputFile);
    fclose(inputFile);

    const char* patterns[] = {
        "flncufrnyd", "yosqvoxrqr", "mldgpkfifa", "wrxxozxwur",
    };
    int numPatterns = sizeof(patterns) / sizeof(patterns[0]);

    thrust::device_vector<char> cudaText(text, text + len - 1);
    thrust::device_vector<int> patternFound(numPatterns, 0);

    for (int i = 0; i < numPatterns; i++) {
        int patternLength = strlen(patterns[i]);
        thrust::device_vector<char> cudaPattern(patterns[i], patterns[i] + patternLength);

        // Launch the kernel with appropriate thread configuration.
        int blockSize = 256;
        int numBlocks = (len + blockSize - 1) / blockSize;
        rabinKarpSearch<<<numBlocks, blockSize>>>(thrust::raw_pointer_cast(cudaText.data()), thrust::raw_pointer_cast(cudaPattern.data()), i, len - 1, patternLength, thrust::raw_pointer_cast(patternFound.data()));
    }

    // Synchronize to ensure kernel execution is finished.
    cudaDeviceSynchronize();

    // Print message for patterns that are not found.
    for (int i = 0; i < numPatterns; i++) {
        if (patternFound[i] == 0) {
            printf("Pattern %d not found.\n", i);
        }
    }

    // No need to manually free memory with Thrust device vectors

    free(text);

    return 0;
}


Overwriting rabinKarp2.cu


In [None]:
%%shell
nvcc rabinKarp2.cu -o rabinKarp2






In [None]:
%%shell
nvprof ./rabinKarp2

==44543== NVPROF is profiling process 44543, command: ./rabinKarp2
Pattern 0 found at position: 19372
Pattern 0 found at position: 1519
Pattern 0 found at position: 37225
Pattern 0 found at position: 55078
Pattern 0 found at position: 72931
Pattern 0 found at position: 90784
Pattern 0 found at position: 108637
Pattern 0 found at position: 126490
Pattern 0 found at position: 144343
Pattern 0 found at position: 162196
Pattern 0 found at position: 180049
Pattern 0 found at position: 197902
Pattern 0 found at position: 215755
Pattern 0 found at position: 233608
Pattern 0 found at position: 269314
Pattern 0 found at position: 251461
Pattern 0 found at position: 287167
Pattern 0 found at position: 305020
Pattern 0 found at position: 322873
Pattern 0 found at position: 340726
Pattern 0 found at position: 358579
Pattern 0 found at position: 376432
Pattern 0 found at position: 394285
Pattern 0 found at position: 412138
Pattern 0 found at position: 429991
Pattern 0 found at position: 447844
Patt

