In [None]:
#Upload spam.csv file containing two columns email body and email classification respectively
from google.colab import files
uploaded = files.upload()

In [None]:
#CUPY PREDICTION
import cupy as cp
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
import time

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + cp.exp(-z))

# Logistic regression using CUDA
def logistic_regression(X, y, learning_rate=0.01, num_iterations=1000):
    num_samples, num_features = X.shape
    weights = cp.zeros(num_features)

    for _ in range(num_iterations):
        scores = X.dot(weights)
        predictions = sigmoid(scores)
        errors = predictions - y
        gradients = X.T.dot(errors)
        weights -= learning_rate * gradients

    return weights

# Load the email data
data = pd.read_csv('spam.csv')

# Extract features and target
X = data['email'].values
y = data['label'].values

# Replace missing values with empty strings
X = np.where(pd.isnull(X), '', X)

# Transfer target to numerical values
y = y.astype(np.int32)

# Convert features to numerical representation
vectorizer = CountVectorizer()
X_vectorized = vectorizer.fit_transform(X).toarray()

# Transfer data to the GPU
X_gpu = cp.asarray(X_vectorized)
y_gpu = cp.asarray(y)

# Train the logistic regression model using CUDA
start = time.time_ns()
weights_gpu = logistic_regression(X_gpu, y_gpu)
end = time.time_ns()
execution_time = (end - start) / 1000000000
print("Execution time:", execution_time, "s")

# Transfer weights back to the CPU
weights = cp.asnumpy(weights_gpu)

# Example prediction on new email
new_email = ["Get exclusive offers now! Limited time offer!"]
new_email_vectorized = vectorizer.transform(new_email).toarray()
new_email_gpu = cp.asarray(new_email_vectorized)
prediction_scores = new_email_gpu.dot(cp.asarray(weights))
prediction_prob = sigmoid(prediction_scores)
prediction = "SPAM" if prediction_prob > 0.5 else "NOT SPAM"
print("Prediction:", prediction)



Execution time: 42.610064586 s
Prediction: SPAM


In [None]:
# Example prediction on new email CUPY (Run after making the model once)
new_email = ["I would like to have a meeting with you to discuss our final year project and deep learning course project."]
new_email_vectorized = vectorizer.transform(new_email).toarray()
new_email_gpu = cp.asarray(new_email_vectorized)
prediction_scores = new_email_gpu.dot(cp.asarray(weights))
prediction_prob = sigmoid(prediction_scores)
prediction = 1 if prediction_prob > 0.5 else 0
print("Prediction:", prediction)

In [None]:
#CUPY ACCURACY TEST
import cupy as cp
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
import time

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + cp.exp(-z))

# Logistic regression using CUDA
def logistic_regression(X, y, learning_rate=0.01, num_iterations=1000):
    num_samples, num_features = X.shape
    weights = cp.zeros(num_features)

    for _ in range(num_iterations):
        scores = X.dot(weights)
        predictions = sigmoid(scores)
        errors = predictions - y
        gradients = X.T.dot(errors)
        weights -= learning_rate * gradients

    return weights

# Load the email data
data = pd.read_csv('spam.csv')

# Extract features and target
X = data['email'].values
y = data['label'].values

# Replace missing values with empty strings
X = np.where(pd.isnull(X), '', X)

# Transfer target to numerical values
y = y.astype(np.int32)

# Convert features to numerical representation
vectorizer = CountVectorizer()
X_vectorized = vectorizer.fit_transform(X).toarray()

# Define the train-test split ratio
test_ratio = 0.2

# Generate random indices for splitting the data
np.random.seed(12)
num_samples = len(X_vectorized)
num_test_samples = int(test_ratio * num_samples)
test_indices = np.random.choice(num_samples, size=num_test_samples, replace=False)
train_indices = np.setdiff1d(np.arange(num_samples), test_indices)

# Split the data into training and test sets
X_train = X_vectorized[train_indices]
y_train = y[train_indices]
X_test = X_vectorized[test_indices]
y_test = y[test_indices]

# Transfer data to the GPU
X_train_gpu = cp.asarray(X_train)
y_train_gpu = cp.asarray(y_train)
X_test_gpu = cp.asarray(X_test)
y_test_gpu = cp.asarray(y_test)

# Train the logistic regression model using CUDA
start = time.time_ns()
weights_gpu = logistic_regression(X_train_gpu, y_train_gpu)
end = time.time_ns()
execution_time = (end - start) / 1000000000
print("Training execution time:", execution_time, "s")

# Transfer weights back to the CPU
weights = cp.asnumpy(weights_gpu)

# Evaluate on the test set
test_scores = X_test_gpu.dot(cp.asarray(weights))
test_predictions = sigmoid(test_scores)
test_predictions = cp.asnumpy(test_predictions)
test_predictions = np.round(test_predictions)

accuracy = np.mean(test_predictions == y_test)
print("Test Accuracy:", accuracy)


Training execution time: 29.562222803 s
Test Accuracy: 0.975
