In [1]:
from sklearn.datasets import load_breast_cancer

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import f1_score

from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC

import matplotlib.pyplot as plt

import pandas as pd



In [2]:
# Set random state for project
RD_STATE = 1

# Load breast cancer dataset
bc_data = load_breast_cancer()

# Pisahkan menjadi training set (80%) dan testing set (20%)
bc_X_train, bc_X_test, bc_y_train, bc_y_test = train_test_split(bc_data.data, bc_data.target, test_size=0.2, random_state=RD_STATE)

In [3]:
# Load play tennis dataset
pt_df_raw = pd.read_csv("play_tennis.csv")
pt_df = pt_df_raw.copy()

# Label encoding
le = LabelEncoder()
pt_classes = {}
for column in pt_df.columns:
    le.fit(pt_df[column])
    pt_df[column] = le.transform(pt_df[column])
    pt_classes[column] = le.classes_
pt_X_train, pt_X_test, pt_y_train, pt_y_test = train_test_split(pt_df[['outlook', 'temp', 'humidity', 'wind']], pt_df[['play']], test_size=0.2, random_state=RD_STATE)

In [4]:
# Logistic Regression

# Data preprocessing: normalisasi data agar setiap feature memiliki orde magnituda yang sama
bc_scaler = StandardScaler().fit(bc_data.data)
bc_X_train_scaled = bc_scaler.transform(bc_X_train)
bc_X_test_scaled = bc_scaler.transform(bc_X_test)

pt_scaler = StandardScaler().fit(pt_df[['outlook', 'temp', 'humidity', 'wind']])
pt_X_train_scaled = pt_scaler.transform(pt_X_train)
pt_X_test_scaled = pt_scaler.transform(pt_X_test)

# Fit classifier
bc_lr_clf = LogisticRegression(random_state=RD_STATE).fit(bc_X_train_scaled, bc_y_train)

pt_lr_clf = LogisticRegression(random_state=RD_STATE).fit(pt_X_train_scaled, pt_y_train['play'])

# Print accuracy dan nilai f1
print("Logistic Regression Performance:")
print("• Breast cancer dataset")
print("\tAccuracy: ", bc_lr_clf.score(bc_X_test_scaled, bc_y_test))
print("\tf1_score: ", f1_score(bc_y_test, bc_lr_clf.predict(bc_X_test_scaled)))
print("• Play tennis dataset")
print("\tAccuracy: ", pt_lr_clf.score(pt_X_test_scaled, pt_y_test['play']))
print("\tf1_score: ", f1_score(pt_y_test['play'], pt_lr_clf.predict(pt_X_test_scaled)))

Logistic Regression Performance:
• Breast cancer dataset
	Accuracy:  0.9736842105263158
	f1_score:  0.9793103448275863
• Play tennis dataset
	Accuracy:  0.6666666666666666
	f1_score:  0.8


In [5]:
# Neural Network: Multi-Layer Perceptron Model

# Fit classifier
bc_mlp_clf = MLPClassifier(random_state=RD_STATE, max_iter=1000).fit(bc_X_train, bc_y_train)

pt_mlp_clf = MLPClassifier(random_state=RD_STATE, max_iter=1000).fit(pt_X_train, pt_y_train['play'])

# Print accuracy dan nilai f1
print("Neural Network Performance:")
print("• Breast cancer dataset")
print("\tAccuracy: ", bc_mlp_clf.score(bc_X_test, bc_y_test))
print("\tf1_score: ", f1_score(bc_y_test, bc_mlp_clf.predict(bc_X_test)))
print("• Play tennis dataset")
print("\tAccuracy: ", pt_mlp_clf.score(pt_X_test, pt_y_test['play']))
print("\tf1_score: ", f1_score(pt_y_test['play'], pt_mlp_clf.predict(pt_X_test)))

Neural Network Performance:
• Breast cancer dataset
	Accuracy:  0.9473684210526315
	f1_score:  0.9594594594594595
• Play tennis dataset
	Accuracy:  1.0
	f1_score:  1.0


In [6]:
# Support Vector Machine: C-Support Vector Classification

# Data preprocessing: normalisasi data agar setiap feature memiliki orde magnituda yang sama
bc_scaler = StandardScaler().fit(bc_data.data)
bc_X_train_scaled = bc_scaler.transform(bc_X_train)
bc_X_test_scaled = bc_scaler.transform(bc_X_test)

pt_scaler = StandardScaler().fit(pt_df[['outlook', 'temp', 'humidity', 'wind']])
pt_X_train_scaled = pt_scaler.transform(pt_X_train)
pt_X_test_scaled = pt_scaler.transform(pt_X_test)

# Fit classifier
bc_svc_clf = SVC(random_state=RD_STATE, gamma='auto').fit(bc_X_train_scaled, bc_y_train)

pt_svc_clf = SVC(random_state=RD_STATE, gamma='auto').fit(pt_X_train_scaled, pt_y_train['play'])

# Print accuracy dan nilai f1
print("SVM Performance:")
print("• Breast cancer dataset")
print("\tAccuracy: ", bc_lr_clf.score(bc_X_test_scaled, bc_y_test))
print("\tf1_score: ", f1_score(bc_y_test, bc_lr_clf.predict(bc_X_test_scaled)))
print("• Play tennis dataset")
print("\tAccuracy: ", pt_lr_clf.score(pt_X_test_scaled, pt_y_test['play']))
print("\tf1_score: ", f1_score(pt_y_test['play'], pt_lr_clf.predict(pt_X_test_scaled)))

SVM Performance:
• Breast cancer dataset
	Accuracy:  0.9736842105263158
	f1_score:  0.9793103448275863
• Play tennis dataset
	Accuracy:  0.6666666666666666
	f1_score:  0.8
