In [None]:
Name: UNEEB ULLAH      SP22-Bcs-120
      KHIZAR LODHI     Sp22-Bcs-081
LAB FINAL TEXT DETECTION.

In [None]:
%%writefile pattern_detection_cuda.cu
#include <iostream>
#include <string>
#include <cuda_runtime.h>
#include <vector>
#include <cstring>

using namespace std;

__global__ void findWordKernel(const char* text, const char* word, int textLen, int wordLen,
                               int* positions, int* count) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    if (idx + wordLen > textLen) return;

    bool match = true;
    for (int i = 0; i < wordLen; i++) {
        if (text[idx + i] != word[i]) {
            match = false;
            break;
        }
    }

    if (match) {
        int pos = atomicAdd(count, 1);
        positions[pos] = idx;
    }
}

void Past_Text() {
    cin.ignore(numeric_limits<streamsize>::max(), '\n');

    string sent, word;

    cout << "Paste Your Text Here:\n";
    getline(cin, sent);

    cout << "\nEnter Word to Find:\n";
    getline(cin, word);

    int textLen = sent.length();
    int wordLen = word.length();

    if(textLen == 0 || wordLen == 0){
        cout << "Text or word cannot be empty!\n";
        return;
    }

    char* h_text = new char[textLen];
    char* h_word = new char[wordLen];

    memcpy(h_text, sent.c_str(), textLen);
    memcpy(h_word, word.c_str(), wordLen);

    char *d_text, *d_word;
    int *d_positions, *d_count;
    cudaMalloc(&d_text, textLen * sizeof(char));
    cudaMalloc(&d_word, wordLen * sizeof(char));
    cudaMalloc(&d_positions, textLen * sizeof(int));
    cudaMalloc(&d_count, sizeof(int));
    cudaMemset(d_count, 0, sizeof(int));

    cudaMemcpy(d_text, h_text, textLen * sizeof(char), cudaMemcpyHostToDevice);
    cudaMemcpy(d_word, h_word, wordLen * sizeof(char), cudaMemcpyHostToDevice);

    int threads = 256;
    int blocks = (textLen + threads - 1) / threads;

    findWordKernel<<<blocks, threads>>>(d_text, d_word, textLen, wordLen, d_positions, d_count);
    cudaDeviceSynchronize();

    int h_count;
    cudaMemcpy(&h_count, d_count, sizeof(int), cudaMemcpyDeviceToHost);

    int* h_positions = new int[h_count];
    cudaMemcpy(h_positions, d_positions, h_count * sizeof(int), cudaMemcpyDeviceToHost);

    cout << "\n========== RESULT ==========\n";
    if (h_count > 0) {
        cout << "WORD FOUND! Total occurrences: " << h_count << endl;
        for (int i = 0; i < h_count; i++) {
            cout << "Word: '" << word << "' at index: " << h_positions[i] << endl;
        }
    } else {
        cout << "WORD NOT FOUND!" << endl;
    }
    cout << "============================\n";

    delete[] h_text;
    delete[] h_word;
    delete[] h_positions;
    cudaFree(d_text);
    cudaFree(d_word);
    cudaFree(d_positions);
    cudaFree(d_count);

    cout << "Press Enter to continue...";
    cin.ignore(numeric_limits<streamsize>::max(), '\n');
}


int main() {
    int cmd;
    while (true) {
        cout << "\n1- Paste Your Text\n";
        cout << "6- About\n";
        cout << "7- Exit\n";
        cout << "Enter choice: ";
        cin >> cmd;

        switch (cmd) {
            case 1: Past_Text(); break;
            case 7: exit(0);
            default: cout << "Option not implemented or invalid, try again.\n"; break;
        }
    }
    return 0;
}


Overwriting pattern_detection_cuda.cu


In [None]:
!nvcc pattern_detection_cuda.cu -o pattern_detection_cuda -arch=sm_75
!./pattern_detection_cuda


1- Paste Your Text
6- About
7- Exit
Enter choice: 1
Paste Your Text Here:
bomb bomb 

Enter Word to Find:
killer

WORD NOT FOUND!
Press Enter to continue...

1- Paste Your Text
6- About
7- Exit
Enter choice: 1
Paste Your Text Here:
bomb bomb bomb bomb

Enter Word to Find:
bomb

WORD FOUND! Total occurrences: 4
Word: 'bomb' at index: 0
Word: 'bomb' at index: 5
Word: 'bomb' at index: 10
Word: 'bomb' at index: 15
Press Enter to continue...

1- Paste Your Text
6- About
7- Exit
Enter choice: 7


In [18]:
!pip install  pytesseract

Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Downloading pytesseract-0.3.13-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract
Successfully installed pytesseract-0.3.13


In [20]:
import cupy as cp
from io import BytesIO
import ipywidgets as widgets
from IPython.display import display, clear_output
import re


uploader = widgets.FileUpload(accept='.txt', multiple=False)
display(widgets.HTML("<b>Upload a text file:</b>"))
display(uploader)

output = widgets.Output()
display(output)

text_data = cp.array([], dtype=cp.uint8)

def on_upload_change(change):
    global text_data
    if uploader.value:
        file_info = next(iter(uploader.value.values()))
        content = file_info['content'].decode('utf-8')
        text_data = cp.array(list(content.encode('utf-8')), dtype=cp.uint8)
        with output:
            clear_output()
            print("Text file loaded! Length:", len(text_data))

uploader.observe(on_upload_change, names='value')


pattern_input = widgets.Text(
    description='Pattern:',
    placeholder='Enter word, phrase, or regex'
)
display(pattern_input)


case_checkbox = widgets.Checkbox(
    value=False,
    description='Case-Insensitive'
)
display(case_checkbox)

regex_checkbox = widgets.Checkbox(
    value=False,
    description='Use Regex'
)
display(regex_checkbox)


detect_button = widgets.Button(description="Detect Pattern")
display(detect_button)

def detect_pattern(b):
    if text_data.size == 0:
        with output:
            clear_output()
            print("Please upload a text file first!")
        return

    pattern = pattern_input.value
    if not pattern:
        with output:
            clear_output()
            print("Please enter a pattern!")
        return


    text_str = "".join([chr(c) for c in cp.asnumpy(text_data)])

    flags = re.IGNORECASE if case_checkbox.value else 0

    matches = []
    if regex_checkbox.value:
        matches = [(m.start(), m.end()) for m in re.finditer(pattern, text_str, flags)]
    else:
        search_text = text_str.lower() if case_checkbox.value else text_str
        search_pattern = pattern.lower() if case_checkbox.value else pattern
        start = 0
        while True:
            idx = search_text.find(search_pattern, start)
            if idx == -1:
                break
            matches.append((idx, idx + len(pattern)))
            start = idx + 1

    with output:
        clear_output()
        if matches:
            print(f"Pattern found! Total occurrences: {len(matches)}")
            for i, (start, end) in enumerate(matches[:10]):
                print(f"{i+1}. Start: {start}, End: {end}")
            if len(matches) > 10:
                print(f"...and {len(matches)-10} more occurrences")
        else:
            print("Pattern not found.")

detect_button.on_click(detect_pattern)


HTML(value='<b>Upload a text file:</b>')

FileUpload(value={}, accept='.txt', description='Upload')

Output()

Text(value='', description='Pattern:', placeholder='Enter word, phrase, or regex')

Checkbox(value=False, description='Case-Insensitive')

Checkbox(value=False, description='Use Regex')

Button(description='Detect Pattern', style=ButtonStyle())

In [7]:
!sudo apt update
!sudo apt install openmpi-bin openmpi-common libopenmpi-dev


[33m0% [Working][0m            Get:1 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
[33m0% [Connecting to archive.ubuntu.com (91.189.92.24)] [1 InRelease 14.2 kB/129 k[0m[33m0% [Waiting for headers] [Waiting for headers] [Connecting to r2u.stat.illinois[0m                                                                               Hit:2 http://archive.ubuntu.com/ubuntu jammy InRelease
                                                                               Get:3 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
[33m0% [Waiting for headers] [Connecting to r2u.stat.illinois.edu] [Connected to de[0m                                                                               Get:4 https://cli.github.com/packages stable InRelease [3,917 B]
[33m0% [Waiting for headers] [Connecting to r2u.stat.illinois.edu] [Connected to de[0m[33m0% [Waiting for headers] [Connecting to r2u.stat.illinois.edu] [Waiting for hea

In [15]:
!nvcc pattern_detection_cuda_mpi.cu -o cudampi -lmpi
!./cudampi



[01m[Kpattern_detection_cuda_mpi.cu:6:10:[m[K [01;31m[Kfatal error: [m[Kmpi.h: No such file or directory
    6 | #include [01;31m[K<mpi.h>[m[K
      |          [01;31m[K^~~~~~~[m[K
compilation terminated.
/bin/bash: line 1: ./cudampi: No such file or directory
