# Machine Learning algorithms

In [27]:
import pandas as pd
import sklearn
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

In [15]:
data=pd.read_csv('df.csv')
print(data)

            AA        AR        AN        AD        AC        AQ        AE  \
0     0.014414  0.007207  0.000000  0.007207  0.007207  0.007207  0.000000   
1     0.000000  0.002920  0.011679  0.002920  0.002920  0.000000  0.002920   
2     0.000000  0.000000  0.000000  0.000000  0.000000  0.003591  0.000000   
3     0.000000  0.000000  0.000000  0.000000  0.000000  0.004301  0.000000   
4     0.006502  0.002890  0.001445  0.003251  0.001084  0.005418  0.003973   
...        ...       ...       ...       ...       ...       ...       ...   
1440  0.026667  0.000000  0.000000  0.026667  0.000000  0.013333  0.026667   
1441  0.033333  0.011111  0.000000  0.000000  0.000000  0.022222  0.011111   
1442  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
1443  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
1444  0.000000  0.000000  0.000000  0.000000  0.000000  0.012987  0.012987   

            AG        AH        AI  ...        VK        VM    

In [19]:
X = data.iloc[:,:-1]
y = data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print(y)
print(X)

0       0
1       0
2       0
3       0
4       0
       ..
1440    1
1441    1
1442    1
1443    1
1444    1
Name: target, Length: 1445, dtype: int64
            AA        AR        AN        AD        AC        AQ        AE  \
0     0.014414  0.007207  0.000000  0.007207  0.007207  0.007207  0.000000   
1     0.000000  0.002920  0.011679  0.002920  0.002920  0.000000  0.002920   
2     0.000000  0.000000  0.000000  0.000000  0.000000  0.003591  0.000000   
3     0.000000  0.000000  0.000000  0.000000  0.000000  0.004301  0.000000   
4     0.006502  0.002890  0.001445  0.003251  0.001084  0.005418  0.003973   
...        ...       ...       ...       ...       ...       ...       ...   
1440  0.026667  0.000000  0.000000  0.026667  0.000000  0.013333  0.026667   
1441  0.033333  0.011111  0.000000  0.000000  0.000000  0.022222  0.011111   
1442  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000   
1443  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.0

### SVM

In [35]:
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
svm_scores = cross_val_score(svm_model, X, y, cv=skf, scoring='accuracy')
print("SVM Cross-Validation Accuracy: ", round(np.mean(svm_scores),3))

SVM Cross-Validation Accuracy:  0.803


In [34]:
svm_model.fit(X_train, y_train)
svm_predictions = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_predictions)
svm_precision = precision_score(y_test, svm_predictions, average='weighted')
svm_recall = recall_score(y_test, svm_predictions, average='weighted')
svm_f1 = f1_score(y_test, svm_predictions, average='weighted')
print("SVM Accuracy: ",round(svm_accuracy,3))
print("SVM Precision: ", round(svm_precision,3))
print("SVM Recall: ", round(svm_recall,3))
print("SVM F1-Score: ", round(svm_f1,3))

SVM Accuracy:  0.818
SVM Precision:  0.669
SVM Recall:  0.818
SVM F1-Score:  0.736


  _warn_prf(average, modifier, msg_start, len(result))


### Random Forests 

In [24]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
rf_scores = cross_val_score(rf_model, X, y, cv=skf, scoring='accuracy')
print("Random Forest Cross-Validation Accuracy: ", np.mean(rf_scores))

Random Forest Cross-Validation Accuracy:  0.9902969348659003


In [33]:
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)
rf_precision = precision_score(y_test, rf_predictions, average='weighted')
rf_recall = recall_score(y_test, rf_predictions, average='weighted')
rf_f1 = f1_score(y_test, rf_predictions, average='weighted')
print("Random Forest Accuracy: ", round(rf_accuracy,3))
print("Random Forest Precision: ", round(rf_precision,3))
print("Random Forest Recall: ", round(rf_recall,3))
print("Random Forest F1-Score: ", round(rf_f1,3))

Random Forest Accuracy:  0.995
Random Forest Precision:  0.995
Random Forest Recall:  0.995
Random Forest F1-Score:  0.995


### NaiveBayes

In [25]:
nb_model = GaussianNB()
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
nb_scores = cross_val_score(nb_model, X, y, cv=skf, scoring='accuracy')
print("Naive Bayes Cross-Validation Accuracy: ", np.mean(nb_scores))

Naive Bayes Cross-Validation Accuracy:  0.9688553639846743


In [32]:
nb_model.fit(X_train, y_train)
nb_predictions = nb_model.predict(X_test)
nb_accuracy = accuracy_score(y_test, nb_predictions)
nb_precision = precision_score(y_test, nb_predictions, average='weighted')
nb_recall = recall_score(y_test, nb_predictions, average='weighted')
nb_f1 = f1_score(y_test, nb_predictions, average='weighted')
print("Naive Bayes Accuracy: ", round(nb_accuracy,3))
print("Naive Bayes Precision: ", round(nb_precision,3))
print("Naive Bayes Recall: ", round(nb_recall,3))
print("Naive Bayes F1-Score: ", round(nb_f1,3))

Naive Bayes Accuracy:  0.965
Naive Bayes Precision:  0.971
Naive Bayes Recall:  0.965
Naive Bayes F1-Score:  0.967
