In [53]:
from sklearn import tree
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate

import pickle
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [54]:
# Load the data
cancer = load_breast_cancer()
x_train, x_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.2)

In [55]:
clf = tree.DecisionTreeClassifier()
clf = clf.fit(x_train, y_train)

In [56]:
tree_text = tree.export_text(clf, feature_names=list(cancer.feature_names))
print(tree_text)

|--- mean concave points <= 0.05
|   |--- worst area <= 893.65
|   |   |--- symmetry error <= 0.01
|   |   |   |--- class: 0
|   |   |--- symmetry error >  0.01
|   |   |   |--- smoothness error <= 0.00
|   |   |   |   |--- fractal dimension error <= 0.00
|   |   |   |   |   |--- class: 0
|   |   |   |   |--- fractal dimension error >  0.00
|   |   |   |   |   |--- class: 1
|   |   |   |--- smoothness error >  0.00
|   |   |   |   |--- worst texture <= 33.35
|   |   |   |   |   |--- class: 1
|   |   |   |   |--- worst texture >  33.35
|   |   |   |   |   |--- mean texture <= 23.20
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- mean texture >  23.20
|   |   |   |   |   |   |--- class: 1
|   |--- worst area >  893.65
|   |   |--- mean texture <= 19.54
|   |   |   |--- concave points error <= 0.01
|   |   |   |   |--- class: 0
|   |   |   |--- concave points error >  0.01
|   |   |   |   |--- class: 1
|   |   |--- mean texture >  19.54
|   |   |   |--- class: 0
|--- mean c

In [57]:
pickle.dump(clf, open('models/tree_model.sav', 'wb'))

In [58]:
tree_model = pickle.load(open('models/tree_model.sav', 'rb'))
y_pred_tree = tree_model.predict(x_test)

print("Decision Tree Classifier")

print("Accuracy: ", accuracy_score(y_test, y_pred_tree))
print("Precision: ", precision_score(y_test, y_pred_tree))
print("Recall: ", recall_score(y_test, y_pred_tree))
print("F1 Score: ", f1_score(y_test, y_pred_tree))
print("Confusion matrix: ", confusion_matrix(y_test, y_pred_tree))

Decision Tree Classifier
Accuracy:  0.8859649122807017
Precision:  0.890625
Recall:  0.9047619047619048
F1 Score:  0.8976377952755906
Confusion matrix:  [[44  7]
 [ 6 57]]


In [59]:
kmeans = KMeans(n_clusters=2, random_state=0).fit(x_train)
pickle.dump(kmeans, open('models/kmeans_model.sav', 'wb'))

In [60]:
kmeans_model = pickle.load(open('models/kmeans_model.sav', 'rb'))
y_pred_kmeans = kmeans_model.predict(x_test)

print("KMeans Classifier")

print("Accuracy: ", accuracy_score(y_test, y_pred_kmeans))
print("Precision: ", precision_score(y_test, y_pred_kmeans))
print("Recall: ", recall_score(y_test, y_pred_kmeans))
print("F1 Score: ", f1_score(y_test, y_pred_kmeans))
print("Confusion matrix: ", confusion_matrix(y_test, y_pred_kmeans))

KMeans Classifier
Accuracy:  0.8157894736842105
Precision:  0.7560975609756098
Recall:  0.9841269841269841
F1 Score:  0.8551724137931035
Confusion matrix:  [[31 20]
 [ 1 62]]


In [61]:
logisticRegr = LogisticRegression(max_iter=10000).fit(x_train, y_train)
pickle.dump(logisticRegr, open('models/logistic_model.sav', 'wb'))

In [62]:
logistic_model = pickle.load(open('models/logistic_model.sav', 'rb'))
y_pred_logistic = logistic_model.predict(x_test)

print("Logistic Regression Classifier")

print("Accuracy: ", accuracy_score(y_test, y_pred_logistic))
print("Precision: ", precision_score(y_test, y_pred_logistic))
print("Recall: ", recall_score(y_test, y_pred_logistic))
print("F1 Score: ", f1_score(y_test, y_pred_logistic))
print("Confusion matrix: ", confusion_matrix(y_test, y_pred_logistic))

Logistic Regression Classifier
Accuracy:  0.956140350877193
Precision:  0.9833333333333333
Recall:  0.9365079365079365
F1 Score:  0.9593495934959351
Confusion matrix:  [[50  1]
 [ 4 59]]


In [63]:
neural_network = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1, max_iter=1000).fit(x_train, y_train)
pickle.dump(neural_network, open('models/neural_model.sav', 'wb'))

In [64]:
neural_model = pickle.load(open('models/neural_model.sav', 'rb'))
y_pred_neural = neural_model.predict(x_test)

print("Neural Network Classifier")

print("Accuracy: ", accuracy_score(y_test, y_pred_neural))
print("Precision: ", precision_score(y_test, y_pred_neural))
print("Recall: ", recall_score(y_test, y_pred_neural))
print("F1 Score: ", f1_score(y_test, y_pred_neural))
print("Confusion matrix: ", confusion_matrix(y_test, y_pred_neural))

Neural Network Classifier
Accuracy:  0.9298245614035088
Precision:  0.9365079365079365
Recall:  0.9365079365079365
F1 Score:  0.9365079365079365
Confusion matrix:  [[47  4]
 [ 4 59]]


In [65]:
svm = make_pipeline(StandardScaler(), SVC(gamma='auto')).fit(x_train, y_train)
pickle.dump(svm, open('models/svm_model.sav', 'wb'))

In [66]:
svm_model = pickle.load(open('models/svm_model.sav', 'rb'))
y_pred_svm = svm_model.predict(x_test)

print("SVM Classifier")

print("Accuracy: ", accuracy_score(y_test, y_pred_svm))
print("Precision: ", precision_score(y_test, y_pred_svm))
print("Recall: ", recall_score(y_test, y_pred_svm))
print("F1 Score: ", f1_score(y_test, y_pred_svm))
print("Confusion matrix: ", confusion_matrix(y_test, y_pred_svm))

SVM Classifier
Accuracy:  0.9649122807017544
Precision:  0.9682539682539683
Recall:  0.9682539682539683
F1 Score:  0.9682539682539683
Confusion matrix:  [[49  2]
 [ 2 61]]


In [67]:
kfold = cross_validate(clf, x_train, y_train, cv=10, scoring=('accuracy', 'precision', 'recall', 'f1'))
print("Decision Tree Classifier Base (from before)")

print("Accuracy: ", accuracy_score(y_test, y_pred_tree))
print("Precision: ", precision_score(y_test, y_pred_tree))
print("Recall: ", recall_score(y_test, y_pred_tree))
print("F1 Score: ", f1_score(y_test, y_pred_tree))

print("Decision Tree Classifier KFold (from now)")
print("Accuracy: ", kfold['test_accuracy'].mean())
print("Precision: ", kfold['test_precision'].mean())
print("Recall: ", kfold['test_recall'].mean())
print("F1 Score: ", kfold['test_f1'].mean())


Decision Tree Classifier Base (from before)
Accuracy:  0.8859649122807017
Precision:  0.890625
Recall:  0.9047619047619048
F1 Score:  0.8976377952755906
Decision Tree Classifier KFold (from now)
Accuracy:  0.9319806763285025
Precision:  0.942150977035571
Recall:  0.9559770114942528
F1 Score:  0.9482413735247498
