<a href="https://colab.research.google.com/github/jmsarmiento11/csc612m-rabin-karp-CUDA/blob/main/Rabin_Karp_Algorithm_%5BFinal%5D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Rabin-Karp C++ Program

In [None]:
%%writefile c_rabinKarp.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <limits.h>
#include <time.h>

const int BASE = 256;

// Custom power function to calculate BASE^(m-1) for integers.
unsigned long long int customPow(int base, int exponent) {
    unsigned long long int result = 1;
    while (exponent > 0) {
        if (exponent & 1) {
            result *= base;
        }
        base *= base;
        exponent >>= 1;
    }
    return result;
}

// Function to calculate the hash value for a given substring using prefix sum.
unsigned long long int calculateHash(const char* str, int start, int end) {
    unsigned long long int hashValue = 0;
    for (int i = start; i <= end; ++i) {
        hashValue = (hashValue * BASE + str[i]) % INT_MAX;
    }
    return hashValue;
}

// Function to find the occurrences of a pattern in the text using Rabin-Karp algorithm.
void rabinKarpSearch(const char* text, const char* patterns[], int numPatterns) {
    int n = strlen(text);

    // Start the timer.
    clock_t start = clock();

    // Loop through each pattern.
    for (int k = 0; k < numPatterns; ++k) {
        const char* pattern = patterns[k];
        int m = strlen(pattern);
        unsigned long long int patternHash = calculateHash(pattern, 0, m - 1);

        bool patternFound = false;

        // Loop through the text with a sliding window of length m.
        for (int i = 0; i <= n - m; ++i) {
            unsigned long long int currHash = calculateHash(text, i, i + m - 1);

            // Check if the hash value matches for the current window.
            if (patternHash == currHash) {
                bool found = true;

                // If the hash values match, compare the pattern with the text for an exact match.
                for (int j = 0; j < m; ++j) {
                    if (text[i + j] != pattern[j]) {
                        found = false;
                        break;
                    }
                }

                // If the pattern is found, print its position and set the flag to true.
                if (found) {
                    printf("Pattern \"%s\" found at position: %d\n", pattern, i);
                    patternFound = true;
                }
            }
        }

        // If the pattern is not found in the text, display a message.
        if (!patternFound) {
            printf("Pattern \"%s\" not found in the text.\n", pattern);
        }
    }

    // End the timer and calculate elapsed time.
    clock_t end = clock();
    double elapsed_time = (double)(end - start)*1e6 / CLOCKS_PER_SEC;
    printf("Searching process is done. Elapsed time: %.6f microseconds\n", elapsed_time);
}

int main() {
    FILE* inputFile = fopen("text.txt", "r");
    if (!inputFile) {
        printf("Error opening the file.\n");
        return 1;
    }

    char* text = NULL;
    size_t len = 0;
    ssize_t read;

    // Read the entire text from the file.
    read = getline(&text, &len, inputFile);
    fclose(inputFile);

    const char* patterns[] = {"mark", "sarmiento"};
    int numPatterns = sizeof(patterns) / sizeof(patterns[0]);

    // Call the Rabin-Karp search function to find all patterns in the text.
    rabinKarpSearch(text, patterns, numPatterns);

    free(text);
    return 0;
}


Overwriting c_rabinKarp.c


In [None]:
%%shell
g++ c_rabinKarp.c -o c_rabinKarp



In [None]:
%%shell
./c_rabinKarp

Pattern "mark" not found in the text.
Pattern "sarmiento" not found in the text.
Searching process is done. Elapsed time: 153.000000 microseconds




Rabin-Karp CUDA V1.0

In [1]:
%%writefile rabinKarp.cu
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

const int BASE = 256;

__device__ unsigned long long int customPow(int base, int exponent) {
    unsigned long long int result = 1;
    while (exponent > 0) {
        if (exponent & 1) {
            result *= base;
        }
        base *= base;
        exponent >>= 1;
    }
    return result;
}

__device__ unsigned long long int calculateHash(const char* str, int start, int end) {
    unsigned long long int hashValue = 0;
    for (int i = start; i <= end; ++i) {
        hashValue = (hashValue * BASE + str[i]) % INT_MAX;
    }
    return hashValue;
}

__global__ void rabinKarpSearch(const char* text, const char* pattern, int patternIndex, int textLength, int patternLength, bool* patternFound) {
    int tid = blockIdx.x * blockDim.x + threadIdx.x;
    int numThreads = gridDim.x * blockDim.x;
    int n = textLength;
    int m = patternLength;

    // Calculate the pattern hash value outside the loop since it is constant.
    unsigned long long int patternHash = calculateHash(pattern, 0, m - 1);

    // Loop over the text with grid-stride.
    for (int i = tid; i <= n - m; i += numThreads) {
        unsigned long long int currHash = calculateHash(text, i, i + m - 1);

        if (patternHash == currHash) {
            bool found = true;
            for (int j = 0; j < m; ++j) {
                if (text[i + j] != pattern[j]) {
                    found = false;
                    break;
                }
            }

            if (found) {
                printf("Pattern %d found at position: %d\n", patternIndex, i);
                patternFound[patternIndex] = true;
                return; // Return immediately when pattern is found.
            }
        }
    }
}

int main() {
    FILE* inputFile = fopen("text.txt", "r");
    if (!inputFile) {
        printf("Error opening the file.\n");
        return 1;
    }

    char* text = NULL;
    size_t len = 0;
    ssize_t read;

    // Read the entire text from the file.
    read = getline(&text, &len, inputFile);
    fclose(inputFile);

    const char* patterns[] = {"dog", "fox", "lazy", "jump", "over", "quick", "brown", "jumps", "the", "sleepy"};
    int numPatterns = sizeof(patterns) / sizeof(patterns[0]);

    char* cudaText;
    int textLength = strlen(text);

    // Allocate memory and copy text to the GPU.
    cudaMalloc((void**)&cudaText, textLength);
    cudaMemcpy(cudaText, text, textLength, cudaMemcpyHostToDevice);

    bool patternFound[numPatterns];
    for (int i = 0; i < numPatterns; i++) {
        patternFound[i] = false;
    }

    bool* cudaPatternFound;
    cudaMalloc((void**)&cudaPatternFound, numPatterns * sizeof(bool));
    cudaMemcpy(cudaPatternFound, patternFound, numPatterns * sizeof(bool), cudaMemcpyHostToDevice);

    for (int i = 0; i < numPatterns; i++) {
        int patternLength = strlen(patterns[i]);
        char* cudaPattern;
        cudaMalloc((void**)&cudaPattern, patternLength);
        cudaMemcpy(cudaPattern, patterns[i], patternLength, cudaMemcpyHostToDevice);

        // Launch the kernel with appropriate thread configuration.
        int blockSize = 256;
        int numBlocks = (textLength + blockSize - 1) / blockSize;
        rabinKarpSearch<<<numBlocks, blockSize>>>(cudaText, cudaPattern, i, textLength, patternLength, cudaPatternFound);

        cudaFree(cudaPattern);
    }

    cudaMemcpy(patternFound, cudaPatternFound, numPatterns * sizeof(bool), cudaMemcpyDeviceToHost);
    cudaFree(cudaPatternFound);

    // Print message for patterns that are not found.
    for (int i = 0; i < numPatterns; i++) {
        if (!patternFound[i]) {
            printf("Pattern %d not found.\n", i);
        }
    }

    // Free GPU memory.
    cudaFree(cudaText);

    free(text);

    return 0;
}


Writing rabinKarp.cu


In [2]:
%%shell
nvcc rabinKarp.cu -o rabinKarp






In [3]:
%%shell
nvprof ./rabinKarp

==729== NVPROF is profiling process 729, command: ./rabinKarp
Pattern 0 found at position: 520
Pattern 0 found at position: 325
Pattern 0 found at position: 784
Pattern 0 found at position: 818
Pattern 0 found at position: 178
Pattern 0 found at position: 40
Pattern 0 found at position: 49
Pattern 0 found at position: 123
Pattern 1 found at position: 708
Pattern 1 found at position: 806
Pattern 1 found at position: 16
Pattern 1 found at position: 246
Pattern 1 found at position: 88
Pattern 1 found at position: 471
Pattern 1 found at position: 219
Pattern 1 found at position: 62
Pattern 1 found at position: 144
Pattern 1 found at position: 157
Pattern 2 found at position: 35
Pattern 3 found at position: 20
Pattern 4 found at position: 26
Pattern 5 found at position: 4
Pattern 6 found at position: 10
Pattern 7 found at position: 20
Pattern 8 found at position: 616
Pattern 8 found at position: 868
Pattern 8 found at position: 649
Pattern 8 found at position: 780
Pattern 8 found at positio



Rabin-Karp CUDA V2.0 (With Thrust Library)

In [1]:
%%writefile rabinKarp.cu
#include <thrust/device_vector.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/functional.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

const int BASE = 256;

struct RabinKarpHash {
    unsigned long long int operator()(const char* str, int start, int end) {
        unsigned long long int hashValue = 0;
        for (int i = start; i <= end; ++i) {
            hashValue = (hashValue * BASE + str[i]) % INT_MAX;
        }
        return hashValue;
    }
};

int main() {
    FILE* inputFile = fopen("text.txt", "r");
    if (!inputFile) {
        printf("Error opening the file.\n");
        return 1;
    }

    char* text = NULL;
    size_t len = 0;
    ssize_t read;

    // Read the entire text from the file.
    read = getline(&text, &len, inputFile);
    fclose(inputFile);

    const char* patterns[] = {"dog", "fox", "lazy", "jump", "over", "quick", "brown", "jumps", "the", "sleepy"};
    int numPatterns = sizeof(patterns) / sizeof(patterns[0]);

    // Create thrust vectors for the text and patterns.
    thrust::device_vector<char> d_text(text, text + strlen(text));
    thrust::device_vector<char> d_patterns(patterns, patterns + numPatterns);

    // Create a transform iterator that calculates the Rabin-Karp hash of each pattern.
    thrust::transform_iterator<RabinKarpHash, const char*, unsigned long long int> d_patternHashes(d_patterns.begin(), RabinKarpHash());

    // Use Thrust to find all of the patterns in the text.
    thrust::equal_to<unsigned long long int> equal_to;
    auto found = thrust::search(d_patternHashes, d_patternHashes + numPatterns, d_text.begin(), d_text.end(), equal_to);

    // Print the positions of the found patterns.
    for (auto it = found.begin(); it != found.end(); ++it) {
        printf("Pattern found at position: %d\n", *it);
    }

    return 0;
}


Writing rabinKarp.cu


In [2]:
%%shell
nvcc rabinKarp.cu -o rabinKarp

[01m[0m[01mrabinKarp.cu(44)[0m: [01;31merror[0m: no instance of constructor [01m"thrust::transform_iterator<AdaptableUnaryFunction, Iterator, Reference, Value>::transform_iterator [with AdaptableUnaryFunction=RabinKarpHash, Iterator=const char *, Reference=unsigned long long, Value=thrust::use_default]"[0m matches the argument list
            argument types are: (thrust::detail::normal_iterator<thrust::device_ptr<char>>, RabinKarpHash)

[01m[0m[01mrabinKarp.cu(48)[0m: [01;31merror[0m: namespace [01m"thrust"[0m has no member "[01msearch[0m"


[01m[0m[01m/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/system/cuda/detail/transform.h(119)[0m: [01;31merror[0m: no operator "[01m=[0m" matches these operands
            operand types are: thrust::device_reference<char> = const char *const
          detected during:
            instantiation of [01m"void thrust::cuda_cub::__transform::unary_transform_f<InputIt, OutputIt, thrust::cuda_cub::__transform::no_s

CalledProcessError: ignored