In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
#loading training and testing sets
projectDirPath = os.path.abspath("")

X_train = pd.read_csv(projectDirPath + "\\ready data\\X_train.csv").values
X_test = pd.read_csv(projectDirPath + "\\ready data\\X_test.csv").values
y_train = pd.read_csv(projectDirPath + "\\ready data\\y_train.csv").values.reshape(-1,)
y_test = pd.read_csv(projectDirPath + "\\ready data\\y_test.csv").values.reshape(-1,)

In [3]:
#training the model
from sklearn.svm import SVC

svc = SVC(kernel = "linear")
svc.fit(X_train, y_train)

SVC(kernel='linear')

In [4]:
#validation of the model using k-cross validation
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer, recall_score

accuracies = cross_val_score(estimator = svc, X = X_train, y = y_train, cv = 10)

recall_scorer = make_scorer(recall_score, pos_label = 4)
recalls = cross_val_score(estimator = svc, X = X_train, y = y_train, scoring = recall_scorer, cv = 10)

accMean = accuracies.mean()
accStdDev = accuracies.std()
recMean = recalls.mean()
recStdDev = recalls.std()

print("mean of 10 accuracies: ", accMean)
print("standard deviation of accuracies: ", recStdDev)
print("mean of 10 recalls: ", accMean)
print("standard deviation of recalls: ", recStdDev)

mean of 10 accuracies:  0.9655660377358488
standard deviation of accuracies:  0.047589010591440065
mean of 10 recalls:  0.9655660377358488
standard deviation of recalls:  0.047589010591440065


In [5]:
#testing of the model on the test set and computing : precision, recall and confusion matrix
from sklearn.metrics import accuracy_score, confusion_matrix

y_pred = svc.predict(X_test)
acc = accuracy_score(y_test, y_pred)
rec = recall_score(y_test, y_pred, pos_label = 4)
cm = confusion_matrix(y_test, y_pred)

print("accuracy on the test set: ", acc)
print("recall on the test set: ", rec)
print("confusion matrix:\n ", cm)

accuracy on the test set:  0.9657142857142857
recall on the test set:  0.9655172413793104
confusion matrix:
  [[113   4]
 [  2  56]]


In [6]:
#saving the model
from joblib import dump

dump(svc, projectDirPath + "\\models\svc.joblib")

['C:\\Users\\misla\\Desktop\\breast cancer tumor classification\\models\\svc.joblib']

In [None]:
#saving evaluation data for all tried out models
import json

#evaluation data for all the used models (will be used in streamlit app)
svcData = {"mean10Acc" : accMean, "accStd" : accStdDev, "mean10Rec" : recMean, "recStd" : recStdDev, "acc": acc, "rec": rec}

with open(projectDirPath + "\\modelsData\\svc.json") as file:
    json.dump(svcData, file)