In [40]:
from sklearn import tree
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

import pickle
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [2]:
# Load the data
cancer = load_breast_cancer()
x_train, x_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.2)

In [3]:
clf = tree.DecisionTreeClassifier()
clf = clf.fit(x_train, y_train)

In [4]:
tree_text = tree.export_text(clf, feature_names=list(cancer.feature_names))
print(tree_text)

|--- worst area <= 874.85
|   |--- worst concave points <= 0.14
|   |   |--- area error <= 48.98
|   |   |   |--- smoothness error <= 0.00
|   |   |   |   |--- worst concave points <= 0.09
|   |   |   |   |   |--- class: 1
|   |   |   |   |--- worst concave points >  0.09
|   |   |   |   |   |--- class: 0
|   |   |   |--- smoothness error >  0.00
|   |   |   |   |--- worst texture <= 33.35
|   |   |   |   |   |--- class: 1
|   |   |   |   |--- worst texture >  33.35
|   |   |   |   |   |--- worst concave points <= 0.09
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- worst concave points >  0.09
|   |   |   |   |   |   |--- class: 0
|   |   |--- area error >  48.98
|   |   |   |--- mean perimeter <= 78.51
|   |   |   |   |--- class: 0
|   |   |   |--- mean perimeter >  78.51
|   |   |   |   |--- class: 1
|   |--- worst concave points >  0.14
|   |   |--- worst texture <= 25.67
|   |   |   |--- worst concave points <= 0.14
|   |   |   |   |--- class: 0
|   |   |   |--- wor

In [5]:
pickle.dump(clf, open('models/tree_model.sav', 'wb'))

In [6]:
tree_model = pickle.load(open('models/tree_model.sav', 'rb'))
y_pred_tree = tree_model.predict(x_test)

print("Decision Tree Classifier")

print("Accuracy: ", accuracy_score(y_test, y_pred_tree))
print("Precision: ", precision_score(y_test, y_pred_tree))
print("Recall: ", recall_score(y_test, y_pred_tree))
print("F1 Score: ", f1_score(y_test, y_pred_tree))
print("Confusion matrix: ", confusion_matrix(y_test, y_pred_tree))

Decision Tree Classifier
Accuracy:  0.9385964912280702
Precision:  0.9552238805970149
Recall:  0.9411764705882353
F1 Score:  0.9481481481481482
Confusion matrix:  [[43  3]
 [ 4 64]]


In [28]:
kmeans = KMeans(n_clusters=2, random_state=0).fit(x_train)
pickle.dump(kmeans, open('models/kmeans_model.sav', 'wb'))

In [29]:
kmeans_model = pickle.load(open('models/kmeans_model.sav', 'rb'))
y_pred_kmeans = kmeans_model.predict(x_test)

print("KMeans Classifier")

print("Accuracy: ", accuracy_score(y_test, y_pred_kmeans))
print("Precision: ", precision_score(y_test, y_pred_kmeans))
print("Recall: ", recall_score(y_test, y_pred_kmeans))
print("F1 Score: ", f1_score(y_test, y_pred_kmeans))
print("Confusion matrix: ", confusion_matrix(y_test, y_pred_kmeans))

KMeans Classifier
Accuracy:  0.17543859649122806
Precision:  0.03571428571428571
Recall:  0.014705882352941176
F1 Score:  0.020833333333333332
Confusion matrix:  [[19 27]
 [67  1]]


In [30]:
logisticRegr = LogisticRegression(max_iter=10000).fit(x_train, y_train)
pickle.dump(logisticRegr, open('models/logistic_model.sav', 'wb'))

In [31]:
logistic_model = pickle.load(open('models/logistic_model.sav', 'rb'))
y_pred_logistic = logistic_model.predict(x_test)

print("Logistic Regression Classifier")

print("Accuracy: ", accuracy_score(y_test, y_pred_logistic))
print("Precision: ", precision_score(y_test, y_pred_logistic))
print("Recall: ", recall_score(y_test, y_pred_logistic))
print("F1 Score: ", f1_score(y_test, y_pred_logistic))
print("Confusion matrix: ", confusion_matrix(y_test, y_pred_logistic))

Logistic Regression Classifier
Accuracy:  0.9649122807017544
Precision:  0.9705882352941176
Recall:  0.9705882352941176
F1 Score:  0.9705882352941176
Confusion matrix:  [[44  2]
 [ 2 66]]


In [36]:
neural_network = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1, max_iter=1000).fit(x_train, y_train)
pickle.dump(neural_network, open('models/neural_model.sav', 'wb'))

In [37]:
neural_model = pickle.load(open('models/neural_model.sav', 'rb'))
y_pred_neural = neural_model.predict(x_test)

print("Neural Network Classifier")

print("Accuracy: ", accuracy_score(y_test, y_pred_neural))
print("Precision: ", precision_score(y_test, y_pred_neural))
print("Recall: ", recall_score(y_test, y_pred_neural))
print("F1 Score: ", f1_score(y_test, y_pred_neural))
print("Confusion matrix: ", confusion_matrix(y_test, y_pred_neural))

Neural Network Classifier
Accuracy:  0.9210526315789473
Precision:  0.8933333333333333
Recall:  0.9852941176470589
F1 Score:  0.9370629370629371
Confusion matrix:  [[38  8]
 [ 1 67]]


In [43]:
svm = make_pipeline(StandardScaler(), SVC(gamma='auto')).fit(x_train, y_train)
pickle.dump(svm, open('models/svm_model.sav', 'wb'))

In [44]:
svm_model = pickle.load(open('models/svm_model.sav', 'rb'))
y_pred_svm = svm_model.predict(x_test)

print("SVM Classifier")

print("Accuracy: ", accuracy_score(y_test, y_pred_svm))
print("Precision: ", precision_score(y_test, y_pred_svm))
print("Recall: ", recall_score(y_test, y_pred_svm))
print("F1 Score: ", f1_score(y_test, y_pred_svm))
print("Confusion matrix: ", confusion_matrix(y_test, y_pred_svm))

SVM Classifier
Accuracy:  0.9824561403508771
Precision:  0.9852941176470589
Recall:  0.9852941176470589
F1 Score:  0.9852941176470589
Confusion matrix:  [[45  1]
 [ 1 67]]
