<a href="https://colab.research.google.com/github/fragmede/wc-gpu/blob/main/wc_gpu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import timeit
import numpy as np
from numba import cuda

In [33]:
# Step 1: Define the CUDA kernel
@cuda.jit
def count_lwc_kernel(data, line_count, word_count, char_count):
    idx = cuda.grid(1)
    if idx < data.size:
        # Count characters
        cuda.atomic.add(char_count, 0, 1)
        # Count lines
        if data[idx] == 10:  # ASCII for newline
            cuda.atomic.add(line_count, 0, 1)
        # Count words
        if idx < data.size - 1:
            if data[idx] not in [32, 9, 10] and data[idx + 1] in [32, 9, 10]:
                cuda.atomic.add(word_count, 0, 1)
        if idx == data.size - 1 and data[idx] not in [32, 9, 10]:
            cuda.atomic.add(word_count, 0, 1)

# Step 2: Convert string to NumPy array
def load_data_from_string(input_string):
    # Encode the string as UTF-8
    data = np.frombuffer(input_string.encode('utf-8'), dtype=np.uint8)
    return data

# Step 3: Allocate memory on GPU and copy data
def count_lwc(data):
    line_count = np.zeros(1, dtype=np.int32)
    word_count = np.zeros(1, dtype=np.int32)
    char_count = np.zeros(1, dtype=np.int32)

    d_data = cuda.to_device(data)
    d_line_count = cuda.to_device(line_count)
    d_word_count = cuda.to_device(word_count)
    d_char_count = cuda.to_device(char_count)

    # Step 4: Define the grid and block dimensions
    threads_per_block = 256
    blocks_per_grid = (data.size + (threads_per_block - 1)) // threads_per_block

    # Launch the kernel
    count_lwc_kernel[blocks_per_grid, threads_per_block](d_data, d_line_count, d_word_count, d_char_count)

    # Step 5: Copy the results back to host
    line_count = d_line_count.copy_to_host()
    word_count = d_word_count.copy_to_host()
    char_count = d_char_count.copy_to_host()

    return line_count[0]+1, word_count[0], char_count[0]+1


In [29]:
def test_string(string, spec):
    data = load_data_from_string(string)
    calculated_counts = count_lwc(data)
    if calculated_counts != spec:
        print("counted", string, "wrong", calculated_counts, spec)

In [26]:
input_string = "     word1      word2     word3       "

data = load_data_from_string(input_string)
counts = count_lwc(data)
print(f"Number of words: {counts}")


Number of words: (0, 3, 38)


In [34]:
test_string('now with 23 a number',                   (1, 5, 21))
test_string('now with 23.17',                         (1, 3, 15))
test_string("emoji 😍😍 do not count",                (1, 5, 28))
test_string("possessive's are one word",              (1, 4, 26))
test_string('some "quoted text" does not impact',     (1, 6, 35))
test_string("also 'single quotes' are ok",            (1, 5, 28))
test_string("don't do contractions",                  (1, 3, 22))
test_string('hyphenated words-are considered whole',  (1, 4, 38))
test_string('underbars are_too just one',             (1, 4, 27))
test_string('n-dash ranges 1–3 are NOT',              (1, 5, 28))
test_string('m-dash connected—bits also are not',     (1, 5, 37))





In [6]:
# Step 2: Load data from file
def load_data(file_path):
    with open(file_path, 'rb') as f:
        data = np.frombuffer(f.read(), dtype=np.uint8)
    return data

In [36]:
def test_file(filename):
    data = load_data(filename)
    counts = count_lwc(data)
    #print(f"Number of words: {word_count}")

In [38]:
files = [ 'ascii.txt',
          'pocorgtfo18.pdf',
          'space.txt',
          'utf8.txt',
          'word.txt',
        ]
for file in files:
    duration = timeit.timeit(lambda: test_file(file), number=1)
    print(f"{file}: {duration} seconds")

ascii.txt: 0.1808300140000938 seconds
pocorgtfo18.pdf: 0.17899091699973724 seconds
space.txt: 0.18513659300015206 seconds
utf8.txt: 0.17958779099990352 seconds
word.txt: 0.17560361200003172 seconds
