In [1]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, cross_val_score, cross_validate, KFold 
import numpy as np
import pickle
import joblib
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, ConfusionMatrixDisplay, confusion_matrix, roc_auc_score, recall_score, f1_score, precision_score, accuracy_score


In [2]:
data_train = pd.read_csv('trainsample_ROS2class.csv')
data_test = pd.read_csv('data_test.csv')
with open('selected_featuresROS2.pkl', 'rb') as file:
    sf_rbf = pickle.load(file)
with open('selected_featurespolyROS2.pkl', 'rb') as file:
    sf_poly = pickle.load(file)
with open('selected_featureslinearROS2.pkl', 'rb') as file:
    sf_linear = pickle.load(file)

In [3]:
X_train = data_train.drop('label', axis = 1)
y_train = data_train['label']

In [4]:
X_train_poly = X_train[sf_poly]
X_train_rbf = X_train[sf_rbf]
X_train_linear = X_train[sf_linear]

In [5]:
X_test = data_test.drop('label', axis = 1)
y_test = data_test['label']

In [6]:
X_test_poly = X_test[sf_poly]
X_test_rbf = X_test[sf_rbf]
X_test_linear = X_test[sf_linear]

In [7]:
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100,1000],
              'gamma':  ['scale', 'auto'],
              'degree' : [2, 3, 4, 5], 
              'kernel': ['rbf']}

grid_rbf = GridSearchCV(SVC(), param_grid, refit=True,cv=10, scoring='accuracy')

grid_rbf.fit(X_train_rbf, y_train)
pickle.dump(grid_rbf.best_estimator_, open("grid_rbfROS.best_estimator2.pkl", 'wb'))

print('Best Params rbf', grid_rbf.best_params_)

# kfold = KFold(n_splits=10, shuffle=True, random_state = 5)

score_rbf = cross_val_score(grid_rbf.best_estimator_, X_train_rbf, y_train, cv=10, scoring='accuracy')
score_rbf = np.average(score_rbf)

print('Score: ',score_rbf)

Best Params rbf {'C': 100, 'degree': 2, 'gamma': 'auto', 'kernel': 'rbf'}
Score:  0.9423076923076923


In [9]:
model = SVC(kernel='rbf')
# kfold = KFold(n_splits=10, shuffle= True, random_state = 5)

score_rbf_before = cross_val_score(model, X_train_rbf, y_train, cv=10, scoring='accuracy')
score_rbf_before = np.average(score_rbf_before)
score_rbf_before

0.673076923076923

In [10]:
model = SVC(kernel='rbf', C = 100, degree =  2, gamma = 'auto')
kfold = KFold(n_splits=10, shuffle= True, random_state = 5)

score_rbf_after = cross_val_score(model, X_train_rbf, y_train, cv=10, scoring='accuracy')
score_rbf_after = np.average(score_rbf_after)
score_rbf_after

0.9423076923076923

In [13]:
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100,1000],
              'gamma':  ['scale', 'auto'],
              'degree' : [2, 3, 4, 5], 
              'kernel': ['linear']}

grid_linear = GridSearchCV(SVC(), param_grid, refit=True, cv=10, scoring='accuracy')

grid_linear.fit(X_train_linear, y_train)
pickle.dump(grid_linear.best_estimator_, open("grid_linearROS.best_estimator2.pkl", 'wb'))

print('Best Params linear', grid_linear.best_params_)

# kfold = KFold(n_splits=10, shuffle=True, random_state = 5)

score_linear = cross_val_score(grid_linear.best_estimator_, X_train_linear, y_train, cv=10, scoring='accuracy')
score_linear = np.average(score_linear)

print('Score: ',score_linear)

Best Params linear {'C': 1, 'degree': 2, 'gamma': 'scale', 'kernel': 'linear'}
Score:  0.976923076923077


In [15]:
model = SVC(kernel='linear')
# kfold = KFold(n_splits=10, shuffle= True, random_state = 5)

score_linear_before = cross_val_score(model, X_train_linear, y_train, cv=10, scoring='accuracy')
score_linear_before = np.average(score_linear_before)
score_linear_before

0.976923076923077

In [16]:
model = SVC(kernel='linear', C = 1, degree =  2, gamma = 'scale')
kfold = KFold(n_splits=10, shuffle= True, random_state = 5)

score_linear_after = cross_val_score(model, X_train_linear, y_train, cv=10, scoring='accuracy')
score_linear_after = np.average(score_linear_after)
score_linear_after

0.976923076923077

In [19]:
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100,1000],
              'gamma':  ['scale', 'auto'],
              'degree' : [2, 3, 4, 5], 
              'kernel': ['poly']}

grid_poly = GridSearchCV(SVC(), param_grid, refit=True, cv=10, scoring='accuracy')

grid_poly.fit(X_train_poly, y_train)
pickle.dump(grid_poly.best_estimator_, open("grid_polyROS.best_estimator2.pkl", 'wb'))

print('Best Params polynomial', grid_poly.best_params_)

# kfold = KFold(n_splits=10, shuffle=True, random_state = 5)

score_poly = cross_val_score(grid_poly.best_estimator_, X_train_poly, y_train, cv=10, scoring='accuracy')
score_poly = np.average(score_poly)

print('Score: ',score_poly)

Best Params polynomial {'C': 0.001, 'degree': 3, 'gamma': 'auto', 'kernel': 'poly'}
Score:  0.9692307692307693


In [21]:
model = SVC(kernel='poly')
# kfold = KFold(n_splits=10, shuffle= True, random_state = 5)

score_poly_before = cross_val_score(model, X_train_poly, y_train, cv=10, scoring='accuracy')
score_poly_before = np.average(score_poly_before)
score_poly_before

0.9692307692307693

In [22]:
model = SVC(kernel='poly', C = 0.001, degree =  3, gamma = 'auto')
# kfold = KFold(n_splits=10, shuffle= True, random_state = 5)

score_poly_after = cross_val_score(model, X_train_poly, y_train, cv=10, scoring='accuracy')
score_poly_after = np.average(score_poly_after)
score_poly_after

0.9692307692307693

In [31]:
best_estimator_rbf = joblib.load("grid_rbfROS.best_estimator2.pkl")
y_train_pred_rbf = best_estimator_rbf.predict(X_train_rbf)




from sklearn.metrics import precision_score, \
    recall_score, confusion_matrix, classification_report, \
    accuracy_score, f1_score

print('Accuracy:', accuracy_score(y_train, y_train_pred_rbf))
print('F1 score:', f1_score(y_train, y_train_pred_rbf))
print('Recall:', recall_score(y_train, y_train_pred_rbf))
print('Precision:', precision_score(y_train, y_train_pred_rbf))
print('\n clasification report:\n', classification_report(y_train,y_train_pred_rbf))
print('\n confussion matrix:\n',confusion_matrix(y_train, y_train_pred_rbf))

Accuracy: 1.0
F1 score: 1.0
Recall: 1.0
Precision: 1.0

 clasification report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       130
           1       1.00      1.00      1.00       130

    accuracy                           1.00       260
   macro avg       1.00      1.00      1.00       260
weighted avg       1.00      1.00      1.00       260


 confussion matrix:
 [[130   0]
 [  0 130]]


In [32]:
best_estimator_rbf = joblib.load("grid_rbfROS.best_estimator2.pkl")
y_test_pred_rbf = best_estimator_rbf.predict(X_test_rbf)

from sklearn.metrics import precision_score, \
    recall_score, confusion_matrix, classification_report, \
    accuracy_score, f1_score

print('Accuracy:', accuracy_score(y_test, y_test_pred_rbf))
print('F1 score:', f1_score(y_test, y_test_pred_rbf))
print('Recall:', recall_score(y_test, y_test_pred_rbf))
print('Precision:', precision_score(y_test, y_test_pred_rbf))
print('\n clasification report:\n', classification_report(y_test,y_test_pred_rbf))
print('\n confussion matrix:\n',confusion_matrix(y_test, y_test_pred_rbf))

Accuracy: 0.8095238095238095
F1 score: 0.8787878787878787
Recall: 0.90625
Precision: 0.8529411764705882

 clasification report:
               precision    recall  f1-score   support

           0       0.62      0.50      0.56        10
           1       0.85      0.91      0.88        32

    accuracy                           0.81        42
   macro avg       0.74      0.70      0.72        42
weighted avg       0.80      0.81      0.80        42


 confussion matrix:
 [[ 5  5]
 [ 3 29]]


In [33]:
best_estimator_poly = joblib.load("grid_polyROS.best_estimator2.pkl")
y_train_pred_poly = best_estimator_poly.predict(X_train_poly)




from sklearn.metrics import precision_score, \
    recall_score, confusion_matrix, classification_report, \
    accuracy_score, f1_score

print('Accuracy:', accuracy_score(y_train, y_train_pred_poly))
print('F1 score:', f1_score(y_train, y_train_pred_poly))
print('Recall:', recall_score(y_train, y_train_pred_poly))
print('Precision:', precision_score(y_train, y_train_pred_poly))
print('\n clasification report:\n', classification_report(y_train,y_train_pred_poly))
print('\n confussion matrix:\n',confusion_matrix(y_train, y_train_pred_poly))

Accuracy: 0.9884615384615385
F1 score: 0.9884169884169884
Recall: 0.9846153846153847
Precision: 0.9922480620155039

 clasification report:
               precision    recall  f1-score   support

           0       0.98      0.99      0.99       130
           1       0.99      0.98      0.99       130

    accuracy                           0.99       260
   macro avg       0.99      0.99      0.99       260
weighted avg       0.99      0.99      0.99       260


 confussion matrix:
 [[129   1]
 [  2 128]]


In [34]:
best_estimator_poly = joblib.load("grid_polyROS.best_estimator2.pkl")
y_test_pred_poly = best_estimator_poly.predict(X_test_poly)

from sklearn.metrics import precision_score, \
    recall_score, confusion_matrix, classification_report, \
    accuracy_score, f1_score

print('Accuracy:', accuracy_score(y_test, y_test_pred_poly))
print('F1 score:', f1_score(y_test, y_test_pred_poly))
print('Recall:', recall_score(y_test, y_test_pred_poly))
print('Precision:', precision_score(y_test, y_test_pred_poly))
print('\n clasification report:\n', classification_report(y_test,y_test_pred_poly))
print('\n confussion matrix:\n',confusion_matrix(y_test, y_test_pred_poly))

Accuracy: 0.7619047619047619
F1 score: 0.8333333333333334
Recall: 0.78125
Precision: 0.8928571428571429

 clasification report:
               precision    recall  f1-score   support

           0       0.50      0.70      0.58        10
           1       0.89      0.78      0.83        32

    accuracy                           0.76        42
   macro avg       0.70      0.74      0.71        42
weighted avg       0.80      0.76      0.77        42


 confussion matrix:
 [[ 7  3]
 [ 7 25]]


In [35]:
best_estimator_linear = joblib.load("grid_linearROS.best_estimator2.pkl")
y_train_pred_linear = best_estimator_linear.predict(X_train_linear)




from sklearn.metrics import precision_score, \
    recall_score, confusion_matrix, classification_report, \
    accuracy_score, f1_score

print('Accuracy:', accuracy_score(y_train, y_train_pred_linear))
print('F1 score:', f1_score(y_train, y_train_pred_linear))
print('Recall:', recall_score(y_train, y_train_pred_linear))
print('Precision:', precision_score(y_train, y_train_pred_linear))
print('\n clasification report:\n', classification_report(y_train,y_train_pred_linear))
print('\n confussion matrix:\n',confusion_matrix(y_train, y_train_pred_linear))

Accuracy: 0.9923076923076923
F1 score: 0.9922480620155039
Recall: 0.9846153846153847
Precision: 1.0

 clasification report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99       130
           1       1.00      0.98      0.99       130

    accuracy                           0.99       260
   macro avg       0.99      0.99      0.99       260
weighted avg       0.99      0.99      0.99       260


 confussion matrix:
 [[130   0]
 [  2 128]]


In [36]:
best_estimator_linear = joblib.load("grid_linearROS.best_estimator2.pkl")
y_test_pred_linear = best_estimator_linear.predict(X_test_linear)

from sklearn.metrics import precision_score, \
    recall_score, confusion_matrix, classification_report, \
    accuracy_score, f1_score

print('Accuracy:', accuracy_score(y_test, y_test_pred_linear))
print('F1 score:', f1_score(y_test, y_test_pred_linear))
print('Recall:', recall_score(y_test, y_test_pred_linear))
print('Precision:', precision_score(y_test, y_test_pred_linear))
print('\n clasification report:\n', classification_report(y_test,y_test_pred_linear))
print('\n confussion matrix:\n',confusion_matrix(y_test, y_test_pred_linear))

Accuracy: 0.7857142857142857
F1 score: 0.8571428571428571
Recall: 0.84375
Precision: 0.8709677419354839

 clasification report:
               precision    recall  f1-score   support

           0       0.55      0.60      0.57        10
           1       0.87      0.84      0.86        32

    accuracy                           0.79        42
   macro avg       0.71      0.72      0.71        42
weighted avg       0.79      0.79      0.79        42


 confussion matrix:
 [[ 6  4]
 [ 5 27]]
