In [15]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pickle

# Load the breast cancer dataset
cancer = load_breast_cancer()

# Allocate training data 80% and test data 20%
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.2, random_state=0)

## Decision Tree Classifier

In [42]:
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.model_selection import cross_validate
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# initialize DT classifier
clf = DecisionTreeClassifier()

# train the model
clf.fit(X_train, y_train)

# export the decision tree in text format
r = export_text(clf, feature_names=cancer.feature_names.tolist())

# show decision tree
print("decision tree:\n", r)

decision tree:
 |--- worst concave points <= 0.14
|   |--- worst area <= 957.45
|   |   |--- worst perimeter <= 107.75
|   |   |   |--- area error <= 91.56
|   |   |   |   |--- area error <= 48.98
|   |   |   |   |   |--- mean concavity <= 0.14
|   |   |   |   |   |   |--- smoothness error <= 0.00
|   |   |   |   |   |   |   |--- worst compactness <= 0.20
|   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |--- worst compactness >  0.20
|   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- smoothness error >  0.00
|   |   |   |   |   |   |   |--- worst texture <= 32.83
|   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |   |--- worst texture >  32.83
|   |   |   |   |   |   |   |   |--- worst texture <= 33.81
|   |   |   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |   |   |--- worst texture >  33.81
|   |   |   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- mean concavity >  0.14
|   |   |  

In [34]:
# save model using pickle
with open('models/decisionTreeClassifier.pkl', 'wb') as file:
    pickle.dump(clf, file)

#  load model using pickle
with open('models/decisionTreeClassifier.pkl', 'rb') as file:
    clf = pickle.load(file)

# predict datasets with model
predictions = clf.predict(X_test)

# evaluate metrics
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions)
recall = recall_score(y_test, predictions)
f1 = f1_score(y_test, predictions)
cm = confusion_matrix(y_test, predictions)

# measure model's performance using 10-fold cross validation
cv = cross_validate(clf, cancer.data, cancer.target, cv=10, scoring=['accuracy', 'f1'])

# show metrics
print('Accuracy: ', accuracy)
print('Precision: ', precision)
print('Recall: ', recall)
print('F1: ', f1)
print('Confusion Matrix:\n', cm)

# show cross validation metrics
print('Cross Validation Accuracy:\n', cv['test_accuracy'])
print('Cross Validation F1:\n', cv['test_f1'])

Accuracy:  0.9122807017543859
Precision:  0.9672131147540983
Recall:  0.8805970149253731
F1:  0.9218749999999999
Confusion Matrix:
 [[45  2]
 [ 8 59]]
Cross Validation Accuracy:
 [0.89473684 0.87719298 0.9122807  0.89473684 0.92982456 0.89473684
 0.89473684 0.94736842 0.9122807  0.89285714]
Cross Validation F1:
 [0.91428571 0.90140845 0.93150685 0.91666667 0.94285714 0.91666667
 0.92105263 0.95774648 0.92537313 0.90909091]


Analisis metrik pada model pembelajaran Decision Tree :<br>
lorem ipsum

## ID3 Estimator

In [None]:
from id3 import Id3Estimator, export_graphviz
import numpy as np


## K Means

In [29]:
from sklearn.cluster import KMeans

# initialize KMeans with 5 clusters with n_init=10
kmeans = KMeans(n_clusters=5, n_init=10)

# train the model
kmeans.fit(X_train)

# save model using pickle
with open('models/kMeans.pkl', 'wb') as file:
    pickle.dump(kmeans, file)

#  load model using pickle
with open('models/kMeans.pkl', 'rb') as file:
    kmeans = pickle.load(file)

# predict datasets with model
predictions = kmeans.predict(X_test)

print("prediction:\n" , predictions)

prediction:
 [0 0 0 0 4 4 4 4 4 4 0 0 4 2 0 2 4 1 2 1 0 2 0 4 1 4 4 0 0 1 4 1 0 2 4 0 0
 1 0 2 0 4 2 4 0 1 4 0 4 0 2 0 1 4 4 4 4 4 4 1 0 2 4 4 2 4 1 2 1 4 0 2 4 0
 1 0 4 4 4 4 2 1 2 4 2 4 0 4 2 1 4 0 0 0 4 4 1 0 4 4 4 4 0 0 1 4 2 0 0 2 0
 1 2 0]


Analisis metrik pada model pembelajaran K-Means :<br>
Karena K-Means model merupakan model pembelajaran Unsupervised, maka hasil prediksi dari K-Means yang merupakan prediksi letak data terhadap cluster-cluster yang terbentuk tidak dapat diukur dengan metrik seperti accuracy, precision, recall, F1, dan confusion matrix yang digunakan hanya untuk mengukur model pembelajaran <strong>Supervised</strong>

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

## Neural Network - Multi-layer Perceptron (MLP) Classifier

In [30]:
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# initialize MLP classifier
clf = MLPClassifier(random_state=1, max_iter=300)

# train the model
clf.fit(X_train, y_train)

# save model using pickle
with open('models/mlpClassifier.pkl', 'wb') as file:
    pickle.dump(clf, file)

#  load model using pickle
with open('models/mlpClassifier.pkl', 'rb') as file:
    clf = pickle.load(file)

# predict datasets with model
predictions = clf.predict(X_test)

# evaluate metrics
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions)
recall = recall_score(y_test, predictions)
f1 = f1_score(y_test, predictions)
cm = confusion_matrix(y_test, predictions)

# show metrics
print('Accuracy: ', accuracy)
print('Precision: ', precision)
print('Recall: ', recall)
print('F1: ', f1)
print('Confusion Matrix:\n', cm)

Accuracy:  0.9122807017543859
Precision:  0.8701298701298701
Recall:  1.0
F1:  0.9305555555555556
Confusion Matrix:
 [[37 10]
 [ 0 67]]


## Support Vector Machine

In [None]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import numpy as np

In [2]:
from sklearn.model_selection import train_test_split
import numpy as np

# Load the dataset
data = np.array([[60, 1.6, 0], [55, 1.5, 0], [65, 1.7, 1], [70, 1.8, 1], [80, 1.9, 1], [50, 1.4, 0], [75, 1.85, 1]])

# Split the data into input (X) and output (y) variables
X = data[:, :2]
y = data[:, 2]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

print("X_train:\n", X_train)
print("X_test:\n", X_test)
print("y_train:\n", y_train)
print("y_test:\n", y_test)


X_train:
 [[60.    1.6 ]
 [65.    1.7 ]
 [80.    1.9 ]
 [75.    1.85]]
X_test:
 [[70.   1.8]
 [50.   1.4]
 [55.   1.5]]
y_train:
 [0. 1. 1. 1.]
y_test:
 [1. 0. 0.]
