In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
#from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score
from sklearn.metrics import mean_squared_error, r2_score, classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from imblearn.over_sampling import SMOTE




# Load the dataset
data = pd.read_csv("ProcessedData.csv")

X = data.iloc[:, 2:].values  
y = data.iloc[:, 1].values

# SMOTE
smote = SMOTE(sampling_strategy=0.75)  # adjust sampling_strategy as needed
X_smote, y_smote = smote.fit_resample(X, y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_smote, y_smote, test_size=0.2, random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

# Count the number of 0s and 1s in the label after SMOTE
label_counts_smote = pd.Series(y_smote).value_counts()

print("Number of 0s after SMOTE:", label_counts_smote[0])
print("Number of 1s after SMOTE:", label_counts_smote[1])

(395760, 148) (98940, 148) (395760,) (98940,)
Number of 0s after SMOTE: 282686
Number of 1s after SMOTE: 212014


# KNN

In [3]:
# Initialize the KNN classifier
k = 8  # You can choose any value for k
knn_classifier = KNeighborsClassifier(n_neighbors=k)

# Train the classifier
knn_classifier.fit(X_train, y_train)

# Predictions on the test set
y_pred = knn_classifier.predict(X_test)

# Evaluate the model

accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred)

# Print the metrics
print("Accuracy:", accuracy)
print("Recall:", recall)
print("Precision:", precision)
print("F1 Score:", f1)
print("AUC:", auc)

# Additional evaluation metrics
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.7695472003234284
Recall: 0.989975762995035
Precision: 0.6528101045836825
F1 Score: 0.7867929644764035
AUC: 0.7967790690672782

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.60      0.75     56443
           1       0.65      0.99      0.79     42497

    accuracy                           0.77     98940
   macro avg       0.82      0.80      0.77     98940
weighted avg       0.84      0.77      0.77     98940



# Linear Regression

In [13]:
# Create a Linear Regression model
linear_reg = LinearRegression()

# Train the Linear Regression model
linear_reg.fit(X_train, y_train)

# Make predictions using Linear Regression
linear_reg_y_pred = linear_reg.predict(X_test)

# Convert predictions to binary using a threshold of 0.5
threshold = 0.5
binary_predictions = (linear_reg_y_pred > threshold).astype(int)

# Evaluate the Linear Regression model
linear_reg_mse = mean_squared_error(y_test, binary_predictions)
linear_reg_r2 = r2_score(y_test, binary_predictions)

print("\nLinear Regression Mean Squared Error:", linear_reg_mse)
print("Linear Regression R-squared Score:", linear_reg_r2)

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Linear Regression Mean Squared Error: 0.34385486153224176
Linear Regression R-squared Score: -0.40330030376804427

Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.23      0.35     56443
           1       0.47      0.89      0.61     42497

    accuracy                           0.51     98940
   macro avg       0.60      0.56      0.48     98940
weighted avg       0.62      0.51      0.46     98940



# Logistic Regression

In [17]:
logistic_reg = LogisticRegression(max_iter=5000)

# Train the Logistic Regression model
logistic_reg.fit(X_train, y_train)

# Make predictions using Logistic Regression
logistic_reg_y_pred = logistic_reg.predict(X_test)

conf_matrix = confusion_matrix(y_test, logistic_reg_y_pred)
tn, fp, fn, tp = conf_matrix.ravel()

# Output TP, FP, TN, FN
print("\nTrue Positives (TP):", tp)
print("False Positives (FP):", fp)
print("True Negatives (TN):", tn)
print("False Negatives (FN):", fn)

positive_predictions = (logistic_reg_y_pred == 1).sum()
negative_predictions = (logistic_reg_y_pred == 0).sum()
print("\nPositive Predictions:", positive_predictions)
print("Negative Predictions:", negative_predictions)

# Evaluate Logistic Regression model
logistic_reg_accuracy = accuracy_score(y_test, logistic_reg_y_pred)
logistic_reg_accuracy = accuracy_score(y_test, logistic_reg_y_pred)
logistic_reg_recall = recall_score(y_test, logistic_reg_y_pred)
logistic_reg_precision = precision_score(y_test, logistic_reg_y_pred)
logistic_reg_f1 = f1_score(y_test, logistic_reg_y_pred)
logistic_reg_auc = roc_auc_score(y_test, logistic_reg_y_pred)

print("\nLogistic Regression Accuracy:", logistic_reg_accuracy)
print("Logistic Regression Recall:", logistic_reg_recall)
print("Logistic Regression Precision:", logistic_reg_precision)
print("Logistic Regression F1 Score:", logistic_reg_f1)
print("Logistic Regression AUC:", logistic_reg_auc)

print("\nLogistic Regression Classification Report:")
print(classification_report(y_test, logistic_reg_y_pred))

mse = mean_squared_error(y_test, y_pred)
r2_sc = r2_score(y_test, y_pred)

print("\nMean Squared Error:", mse)
print("R-squared Score:", r2_sc)


True Positives (TP): 21939
False Positives (FP): 13491
True Negatives (TN): 42952
False Negatives (FN): 20558

Positive Predictions: 35430
Negative Predictions: 63510

Logistic Regression Accuracy: 0.655862138669901
Logistic Regression Recall: 0.5162482057557004
Logistic Regression Precision: 0.6192209991532599
Logistic Regression F1 Score: 0.56306543303348
Logistic Regression AUC: 0.6386141547886274

Logistic Regression Classification Report:
              precision    recall  f1-score   support

           0       0.68      0.76      0.72     56443
           1       0.62      0.52      0.56     42497

    accuracy                           0.66     98940
   macro avg       0.65      0.64      0.64     98940
weighted avg       0.65      0.66      0.65     98940


Mean Squared Error: 0.485961188599151
R-squared Score: -0.9832480498948102


# Decision Trees

In [6]:
# Create a Random Forest classifier
dt_classifier = DecisionTreeClassifier()

# Train the classifier on the training data
dt_classifier.fit(X_train, y_train)

# Predict on the testing data
y_pred = dt_classifier.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred)

# Print the evaluation metrics
print("Decision Tree Classifier Metrics:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("AUC:", auc)

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

mse = mean_squared_error(y_test, y_pred)
r2_sc = r2_score(y_test, y_pred)

print("\nMean Squared Error:", mse)
print("R-squared Score:", r2_sc)

Decision Tree Classifier Metrics:
Accuracy: 0.8885991510006064
Precision: 0.8614077391204501
Recall: 0.8826505400381204
F1 Score: 0.8718997698798262
AUC: 0.88786425625296

Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.89      0.90     56443
           1       0.86      0.88      0.87     42497

    accuracy                           0.89     98940
   macro avg       0.89      0.89      0.89     98940
weighted avg       0.89      0.89      0.89     98940


Mean Squared Error: 0.11140084899939358
R-squared Score: 0.5453638650206818


# Random Forest

In [7]:
# Create a Random Forest classifier
rf_classifier = RandomForestClassifier()

# Train the classifier on the training data
rf_classifier.fit(X_train, y_train)

# Predict on the testing data
y_pred = rf_classifier.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred)

# Print the evaluation metrics
print("Random Forest Classifier Metrics:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("AUC:", auc)

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

mse = mean_squared_error(y_test, y_pred)
r2_sc = r2_score(y_test, y_pred)

print("\nMean Squared Error:", mse)
print("R-squared Score:", r2_sc)

Random Forest Classifier Metrics:
Accuracy: 0.95252678390944
Precision: 0.9986281131279021
Recall: 0.8906981669294303
F1 Score: 0.9415803286028781
AUC: 0.9448884417553801

Classification Report:
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     56443
           1       1.00      0.89      0.94     42497

    accuracy                           0.95     98940
   macro avg       0.96      0.94      0.95     98940
weighted avg       0.96      0.95      0.95     98940


Mean Squared Error: 0.04747321609055993
R-squared Score: 0.806257854654522


# Naive Bayes

In [12]:
# Create a Gaussian Naive Bayes classifier
nb_classifier = GaussianNB()

# Train the classifier on the training data
nb_classifier.fit(X_train, y_train)

# Predict on the testing data
y_pred = nb_classifier.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred)

# Print the evaluation metrics
print("Naive Bayes Classifier Metrics:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("AUC:", auc)

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

mse = mean_squared_error(y_test, y_pred)
r2_sc = r2_score(y_test, y_pred)

print("\nMean Squared Error:", mse)
print("R-squared Score:", r2_sc)


Naive Bayes Classifier Metrics:
Accuracy: 0.514038811400849
Precision: 0.46546349669726134
Recall: 0.8854507376991317
F1 Score: 0.6101719650718751
AUC: 0.5599232498977029

Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.23      0.35     56443
           1       0.47      0.89      0.61     42497

    accuracy                           0.51     98940
   macro avg       0.60      0.56      0.48     98940
weighted avg       0.62      0.51      0.46     98940


Mean Squared Error: 0.485961188599151
R-squared Score: -0.9832480498948102


# Bayesian Networks

In [17]:
import numpy as np
from pomegranate.bayesian_network import BayesianNetwork
from pgmpy.estimators import HillClimbSearch, BicScore
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator

# Convert X_train and y_train to pandas DataFrames
X_train_df = pd.DataFrame(X_train)
y_train_df = pd.DataFrame(y_train, columns=['label'])

# Combine X_train_df and y_train_df into a single DataFrame for model training
train_data = pd.concat([X_train_df, y_train_df], axis=1)

# Use Hill Climb Search to find the best model structure
hc = HillClimbSearch(train_data)

# Create a BicScore instance
bic = BicScore(train_data)

# Estimate the best model structure
best_model_structure = hc.estimate(scoring_method=bic)

# Create a Bayesian Model with the best structure
model = BayesianModel(best_model_structure.edges())

# Fit the model to the data
model.fit(train_data, estimator=MaximumLikelihoodEstimator)

# Now you can use the model for inference, prediction, etc.

# Perform inference (e.g., predicting on test data)
y_pred = model.predict(X_test)

# Now you can proceed with calculating evaluation metrics, printing results, etc.

# Calculate the evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred)

# Print the evaluation metrics
print("Naive Bayes Classifier Metrics:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("AUC:", auc)

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

  0%|          | 0/1000000 [00:00<?, ?it/s]

: 

# Neural Networks

In [18]:
from sklearn.neural_network import MLPClassifier

# Create a Multilayer Perceptron classifier
mlp_classifier = MLPClassifier()

# Train the classifier on the training data
mlp_classifier.fit(X_train, y_train)

# Predict on the testing data
y_pred = mlp_classifier.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred)

# Print the evaluation metrics
print("Neural Network Classifier Metrics:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("AUC:", auc)

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

mse = mean_squared_error(y_test, y_pred)
r2_sc = r2_score(y_test, y_pred)

print("\nMean Squared Error:", mse)
print("R-squared Score:", r2_sc)




Neural Network Classifier Metrics:
Accuracy: 0.8877501516070345
Precision: 0.9064191201222196
Recall: 0.823705202720192
F1 Score: 0.8630849647418511
AUC: 0.8798380025613078

Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.94      0.90     56443
           1       0.91      0.82      0.86     42497

    accuracy                           0.89     98940
   macro avg       0.89      0.88      0.88     98940
weighted avg       0.89      0.89      0.89     98940


Mean Squared Error: 0.11224984839296544
R-squared Score: 0.541899027846098


# Convolutional Neural Networks

# Support Vector Machines

In [8]:
svm_model = SVC(kernel='rbf', C=1.0, gamma='auto', random_state=42, n_jobs=3)
svm_model.fit(X_train, y_train)

# Predictions
y_pred_train = svm_model.predict(X_train)
y_pred_test = svm_model.predict(X_test)

# Evaluate the model
train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

print("Train Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)

# Classification report
print("\nClassification Report on Test Data:")
print(classification_report(y_test, y_pred_test))

Train Accuracy: 0.7825854052961391
Test Accuracy: 0.7801596927430766

Classification Report on Test Data:
              precision    recall  f1-score   support

           0       0.77      0.87      0.82     56443
           1       0.80      0.66      0.72     42497

    accuracy                           0.78     98940
   macro avg       0.78      0.76      0.77     98940
weighted avg       0.78      0.78      0.78     98940



In [18]:
from cuml.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report
from sklearn.preprocessing import LabelEncoder
import numpy as np
import cudf


le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)
# Initialize and train the SVM classifier on GPU
print("Unique values in y_train:", np.unique(y_train))
print("Unique values in y_test:", np.unique(y_test))
svm = SVC()
svm.fit(X_train, y_train)

# Predict on the test set
y_pred = svm.predict(X_test)

# Calculate the evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred)

# Print the evaluation metrics
print("SVM Classifier Metrics:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("AUC:", auc)

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Unique values in y_train: [0 1]
Unique values in y_test: [0 1]


RuntimeError: exception occurred! file=/opt/conda/conda-bld/work/cpp/src/svm/svc_impl.cuh line=74: Only binary classification is implemented at the moment
Obtained 64 stack frames
#0 in /home/rustom/anaconda3/envs/rapids-24.02/lib/python3.10/site-packages/cuml/internals/../../../../libcuml++.so(_ZN4raft9exception18collect_call_stackEv+0x84) [0x7fc2b7f9a734]
#1 in /home/rustom/anaconda3/envs/rapids-24.02/lib/python3.10/site-packages/cuml/internals/../../../../libcuml++.so(_ZN2ML3SVM7svcFitXIdNSt12experimental6mdspanIdNS2_7extentsIiJLm18446744073709551615ELm18446744073709551615EEEENS2_13layout_strideEN4raft20host_device_accessorINS2_16default_accessorIdEELNS7_11memory_typeE2EEEEEEEvRKNS7_8handle_tET0_iiPT_RKNS0_12SvmParameterERNS7_8distance7kernels12KernelParamsERNS0_8SvmModelISI_EEPKSI_+0x457) [0x7fc2b8b57397]
#2 in /home/rustom/anaconda3/envs/rapids-24.02/lib/python3.10/site-packages/cuml/internals/../../../../libcuml++.so(_ZN2ML3SVM6svcFitIdEEvRKN4raft8handle_tEPT_iiS7_RKNS0_12SvmParameterERNS2_8distance7kernels12KernelParamsERNS0_8SvmModelIS6_EEPKS6_+0x4b) [0x7fc2b8b5793b]
#3 in /home/rustom/anaconda3/envs/rapids-24.02/lib/python3.10/site-packages/cuml/svm/svc.cpython-310-x86_64-linux-gnu.so(+0x49e42) [0x7fc285dede42]
#4 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x2d83) [0x557a813cd2b3]
#5 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x150582) [0x557a813e6582]
#6 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x4c12) [0x557a813cf142]
#7 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x1d7c60) [0x557a8146dc60]
#8 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(PyEval_EvalCode+0x87) [0x557a8146dba7]
#9 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x1dedaa) [0x557a81474daa]
#10 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x144bf3) [0x557a813dabf3]
#11 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x320) [0x557a813ca850]
#12 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x1e1384) [0x557a81477384]
#13 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x1bbd) [0x557a813cc0ed]
#14 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x1e1384) [0x557a81477384]
#15 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x1bbd) [0x557a813cc0ed]
#16 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x1e1384) [0x557a81477384]
#17 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x1f5a37) [0x557a8148ba37]
#18 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x14f53d) [0x557a813e553d]
#19 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x72c) [0x557a813cac5c]
#20 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyFunction_Vectorcall+0x6c) [0x557a813daa2c]
#21 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x320) [0x557a813ca850]
#22 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyFunction_Vectorcall+0x6c) [0x557a813daa2c]
#23 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x72c) [0x557a813cac5c]
#24 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x150582) [0x557a813e6582]
#25 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(PyObject_Call+0xbc) [0x557a813e6f1c]
#26 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x2d83) [0x557a813cd2b3]
#27 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x150582) [0x557a813e6582]
#28 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x13ca) [0x557a813cb8fa]
#29 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x1e1384) [0x557a81477384]
#30 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x1bbd) [0x557a813cc0ed]
#31 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x1e1384) [0x557a81477384]
#32 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x1bbd) [0x557a813cc0ed]
#33 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x1e1384) [0x557a81477384]
#34 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x1bbd) [0x557a813cc0ed]
#35 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x1e1384) [0x557a81477384]
#36 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x1bbd) [0x557a813cc0ed]
#37 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x1e1384) [0x557a81477384]
#38 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x1bbd) [0x557a813cc0ed]
#39 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x1e1384) [0x557a81477384]
#40 in /home/rustom/anaconda3/envs/rapids-24.02/lib/python3.10/lib-dynload/_asyncio.cpython-310-x86_64-linux-gnu.so(+0x7bf6) [0x7fc3c9147bf6]
#41 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x143e8a) [0x557a813d9e8a]
#42 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x25f60c) [0x557a814f560c]
#43 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0xfdd90) [0x557a81393d90]
#44 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x13c2a3) [0x557a813d22a3]
#45 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x5cd5) [0x557a813d0205]
#46 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyFunction_Vectorcall+0x6c) [0x557a813daa2c]
#47 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x72c) [0x557a813cac5c]
#48 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyFunction_Vectorcall+0x6c) [0x557a813daa2c]
#49 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x72c) [0x557a813cac5c]
#50 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyFunction_Vectorcall+0x6c) [0x557a813daa2c]
#51 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x72c) [0x557a813cac5c]
#52 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyFunction_Vectorcall+0x6c) [0x557a813daa2c]
#53 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x72c) [0x557a813cac5c]
#54 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyFunction_Vectorcall+0x6c) [0x557a813daa2c]
#55 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x72c) [0x557a813cac5c]
#56 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x150582) [0x557a813e6582]
#57 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x4c12) [0x557a813cf142]
#58 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x1d7c60) [0x557a8146dc60]
#59 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(PyEval_EvalCode+0x87) [0x557a8146dba7]
#60 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x1dedaa) [0x557a81474daa]
#61 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(+0x144bf3) [0x557a813dabf3]
#62 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyEval_EvalFrameDefault+0x320) [0x557a813ca850]
#63 in /home/rustom/anaconda3/envs/rapids-24.02/bin/python(_PyFunction_Vectorcall+0x6c) [0x557a813daa2c]


In [5]:
import cudf
import numpy as np
from cuml.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the dataset
data_pd = pd.read_csv("ProcessedData.csv")

# Convert the pandas DataFrame to a cuDF DataFrame
data = cudf.DataFrame.from_pandas(data_pd)

print("Shape of data:", data.shape)

# Extract features and target variable
X = data.iloc[:, 2:].values  
y = data.iloc[:, 1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to cuDF DataFrame
X_train_cudf = cudf.DataFrame(X_train)
X_test_cudf = cudf.DataFrame(X_test)
y_train_cudf = cudf.Series(y_train)
y_test_cudf = cudf.Series(y_test)

# Initialize and train the KNN classifier on GPU
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_cudf, y_train_cudf)

# Predict on the test set
y_pred = knn.predict(X_test_cudf)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy
print("Accuracy:", accuracy)

RuntimeError: This program was not compiled for SM 61  
: cudaErrorInvalidDevice: invalid device ordinal