In [31]:
%%writefile naive.cu

Overwriting naive.cu


In [32]:
%%writefile naive.cu
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include "NaiveBayesKernel.cuh"
#include "NaiveBayesTrain.cuh"

#define SHARED_SIZE 20

// CUDA Kernel to compute priors (P(Y = c)) and likelihoods (P(X | Y = c)).
__global__ void computePriorsAndLikelihood(
    int* d_Dataset, int* d_priors, int* d_likelihoods,
    int numSamples, int numFeatures, int numClasses, int numFeatureValues
) {
    int threadId = blockIdx.x * blockDim.x + threadIdx.x;

    __shared__ int local_d_priors[SHARED_SIZE];
    __shared__ int local_d_likelihoods[SHARED_SIZE];

    // If the thread is within bounds
    if (threadId < numSamples) {
        // Each thread processes one data sample
        int classLabel = d_Dataset[threadId * (numFeatures + 1) + numFeatures]; // Class label is in the last column

        // Atomic update to calculate the prior
        atomicAdd(&local_d_priors[classLabel], 1);

        // Compute likelihood for each feature
        for (int fIdx = 0; fIdx < numFeatures; ++fIdx) {
            int featureValue = d_Dataset[threadId * (numFeatures + 1) + fIdx];
            int likelihoodIndex = classLabel * numFeatures * numFeatureValues + (fIdx * numFeatureValues) + featureValue;

            // Atomic update to the likelihood matrix
            atomicAdd(&local_d_likelihoods[likelihoodIndex], 1);
        }
    }

    // Synchronize threads before writing shared results back to global memory
    __syncthreads();

    // Write local results to global memory (only one thread needs to do this)
    if (threadIdx.x == 0) {
        for (int c = 0; c < numClasses; ++c) {
            atomicAdd(&d_priors[c], local_d_priors[c]);
        }

        for (int l = 0; l < numClasses * numFeatures * numFeatureValues; ++l) {
            atomicAdd(&d_likelihoods[l], local_d_likelihoods[l]);
        }
    }
}

Overwriting naive.cu


In [33]:
%%writefile NaiveBayesKernel.cuh
#ifndef NAIVE_BAYES_KERNEL_CUH
#define NAIVE_BAYES_KERNEL_CUH

__global__ void computePriorsAndLikelihood(
    int* d_Dataset, int* d_priors, int* d_likelihoods,
    int numSamples, int numFeatures, int numClasses, int numFeatureValues
);

#endif

Overwriting NaiveBayesKernel.cuh


In [34]:
%%writefile NaiveBayesTrain.cuh
#include <cuda_runtime.h>
#include "NaiveBayesTrain.cuh"
#include "NaiveBayesKernel.cuh"

void trainNaiveBayes(
    int* h_Dataset, int* h_priors, int* h_likelihoods,
    int numSamples, int numFeatures, int numClasses, int numFeatureValues
) {
    // Device pointers
    int* d_Dataset;
    int* d_priors;
    int* d_likelihoods;

    // Allocate memory on the GPU
    int datasetSize = numSamples * (numFeatures + 1) * sizeof(int); // +1 for the class label
    int priorsSize = numClasses * sizeof(int);
    int likelihoodsSize = numClasses * numFeatures * numFeatureValues * sizeof(int);

    cudaMalloc((void**)&d_Dataset, datasetSize);
    cudaMalloc((void**)&d_priors, priorsSize);
    cudaMalloc((void**)&d_likelihoods, likelihoodsSize);

    // Copy data from host to device
    cudaMemcpy(d_Dataset, h_Dataset, datasetSize, cudaMemcpyHostToDevice);
    cudaMemcpy(d_priors, h_priors, priorsSize, cudaMemcpyHostToDevice);
    cudaMemcpy(d_likelihoods, h_likelihoods, likelihoodsSize, cudaMemcpyHostToDevice);

    // Number of threads and blocks
    int threadsPerBlock = 256;
    int numBlocks = (numSamples + threadsPerBlock - 1) / threadsPerBlock;

    // Launch the CUDA kernel
    computePriorsAndLikelihood<<<numBlocks, threadsPerBlock>>>(
        d_Dataset, d_priors, d_likelihoods,
        numSamples, numFeatures, numClasses, numFeatureValues
    );

    // Copy results back from device to host
    cudaMemcpy(h_priors, d_priors, priorsSize, cudaMemcpyDeviceToHost);
    cudaMemcpy(h_likelihoods, d_likelihoods, likelihoodsSize, cudaMemcpyDeviceToHost);

    // Free GPU memory
    cudaFree(d_Dataset);
    cudaFree(d_priors);
    cudaFree(d_likelihoods);
}

Overwriting NaiveBayesTrain.cuh


In [35]:
%%writefile NaiveBayesTrain.cuh
#ifndef NAIVE_BAYES_TRAIN_CUH
#define NAIVE_BAYES_TRAIN_CUH

void trainNaiveBayes(
    int* h_Dataset, int* h_priors, int* h_likelihoods,
    int numSamples, int numFeatures, int numClasses, int numFeatureValues
);

#endif

Overwriting NaiveBayesTrain.cuh


In [36]:
%%writefile naive.cu
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include "NaiveBayesKernel.cuh"
#include "NaiveBayesTrain.cuh"

#define SHARED_SIZE 20

// CUDA Kernel to compute priors (P(Y = c)) and likelihoods (P(X | Y = c)).
__global__ void computePriorsAndLikelihood(
    int* d_Dataset, int* d_priors, int* d_likelihoods,
    int numSamples, int numFeatures, int numClasses, int numFeatureValues
) {
    int threadId = blockIdx.x * blockDim.x + threadIdx.x;

    __shared__ int local_d_priors[SHARED_SIZE];
    __shared__ int local_d_likelihoods[SHARED_SIZE];

    // If the thread is within bounds
    if (threadId < numSamples) {
        // Each thread processes one data sample
        int classLabel = d_Dataset[threadId * (numFeatures + 1) + numFeatures]; // Class label is in the last column

        // Add printf statement to see thread ID and class label
        printf("Thread %d processing sample with class label %d\n", threadId, classLabel);

        // Atomic update to calculate the prior
        atomicAdd(&local_d_priors[classLabel], 1);

        // Compute likelihood for each feature
        for (int fIdx = 0; fIdx < numFeatures; ++fIdx) {
            int featureValue = d_Dataset[threadId * (numFeatures + 1) + fIdx];
            int likelihoodIndex = classLabel * numFeatures * numFeatureValues + (fIdx * numFeatureValues) + featureValue;

            // Add printf statement to see feature index and value
            printf("  Thread %d processing feature %d with value %d\n", threadId, fIdx, featureValue);

            // Atomic update to the likelihood matrix
            atomicAdd(&local_d_likelihoods[likelihoodIndex], 1);
        }
    }

    // Synchronize threads before writing shared results back to global memory
    __syncthreads();

    // Write local results to global memory (only one thread needs to do this)
    if (threadIdx.x == 0) {
        for (int c = 0; c < numClasses; ++c) {
            atomicAdd(&d_priors[c], local_d_priors[c]);
        }

        for (int l = 0; l < numClasses * numFeatures * numFeatureValues; ++l) {
            atomicAdd(&d_likelihoods[l], local_d_likelihoods[l]);
        }
    }
}

int main() {
    // Example Dataset: Each row is a sample with features, last column is the class label
    const int numSamples = 6;
    const int numFeatures = 2;
    const int numClasses = 2;
    const int numFeatureValues = 3; // Assuming features can take values 0, 1, 2

    int h_Dataset[numSamples][numFeatures + 1] = {
        {0, 1, 1}, // Feature 0=0, Feature 1=1, Class Label=1
        {1, 1, 1}, // Feature 0=1, Feature 1=1, Class Label=1
        {2, 2, 0}, // etc.
        {1, 0, 1},
        {0, 2, 0},
        {2, 1, 1}
    };

    int h_priors[numClasses] = {0};
    int h_likelihoods[numClasses * numFeatures * numFeatureValues] = {0};

    // Train the Naive Bayes model
    trainNaiveBayes(
        (int*)h_Dataset, h_priors, h_likelihoods,
        numSamples, numFeatures, numClasses, numFeatureValues
    );

    // Print priors
    printf("Priors:\n");
    for (int c = 0; c < numClasses; ++c) {
        printf("Class %d: %f\n", c, (float)h_priors[c] / numSamples);
    }

    // Print likelihoods
    printf("\nLikelihoods:\n");
    for (int c = 0; c < numClasses; ++c) {
        printf("Class %d:\n", c);
        for (int f = 0; f < numFeatures; ++f) {
            for (int v = 0; v < numFeatureValues; ++v) {
                int index = c * numFeatures * numFeatureValues + f * numFeatureValues + v;
                printf("Feature %d Value %d: %f\n", f, v, (float)h_likelihoods[index] / h_priors[c]);
            }
        }
        printf("\n");
    }

    return 0;
}

Overwriting naive.cu


In [37]:

%%writefile NaiveBayesTrain.cu
#include "NaiveBayesTrain.cuh"
#include "NaiveBayesKernel.cuh"
#include <cuda_runtime.h>

void trainNaiveBayes(
    int* h_Dataset, int* h_priors, int* h_likelihoods,
    int numSamples, int numFeatures, int numClasses, int numFeatureValues
) {
    // Device pointers
    int* d_Dataset;
    int* d_priors;
    int* d_likelihoods;

    // Allocate memory on the GPU
    int datasetSize = numSamples * (numFeatures + 1) * sizeof(int); // +1 for the class label
    int priorsSize = numClasses * sizeof(int);
    int likelihoodsSize = numClasses * numFeatures * numFeatureValues * sizeof(int);

    cudaMalloc((void**)&d_Dataset, datasetSize);
    cudaMalloc((void**)&d_priors, priorsSize);
    cudaMalloc((void**)&d_likelihoods, likelihoodsSize);

    // Copy data from host to device
    cudaMemcpy(d_Dataset, h_Dataset, datasetSize, cudaMemcpyHostToDevice);
    cudaMemcpy(d_priors, h_priors, priorsSize, cudaMemcpyHostToDevice);
    cudaMemcpy(d_likelihoods, h_likelihoods, likelihoodsSize, cudaMemcpyHostToDevice);

    // Number of threads and blocks
    int threadsPerBlock = 256;
    int numBlocks = (numSamples + threadsPerBlock - 1) / threadsPerBlock;

    // Launch the CUDA kernel
    computePriorsAndLikelihood<<<numBlocks, threadsPerBlock>>>(
        d_Dataset, d_priors, d_likelihoods,
        numSamples, numFeatures, numClasses, numFeatureValues
    );

    // Copy results back from device to host
    cudaMemcpy(h_priors, d_priors, priorsSize, cudaMemcpyDeviceToHost);
    cudaMemcpy(h_likelihoods, d_likelihoods, likelihoodsSize, cudaMemcpyDeviceToHost);

    // Free GPU memory
    cudaFree(d_Dataset);
    cudaFree(d_priors);
    cudaFree(d_likelihoods);
}

Overwriting NaiveBayesTrain.cu


In [41]:
%%writefile NaiveBayesTrain.cuh
#ifndef NAIVE_BAYES_TRAIN_CUH
#define NAIVE_BAYES_TRAIN_CUH

#include <iostream>

void trainNaiveBayes(
    int* h_Dataset, int* h_priors, int* h_likelihoods,
    int numSamples, int numFeatures, int numClasses, int numFeatureValues
);

#endif

Overwriting NaiveBayesTrain.cuh


In [44]:
%%writefile main.cpp
#include <iostream>
#include "NaiveBayesTrain.cuh"

int main() {
    int dataset[6] = {1,2,3,4,5,6};
    int priors[2] = {0};
    int likelihoods[4] = {0};

    trainNaiveBayes(dataset, priors, likelihoods, 6, 2, 2, 2);

    std::cout << "Program finished!" << std::endl;
    return 0;
}

Writing main.cpp
