In [265]:
from sklearn.datasets import load_breast_cancer

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC

import matplotlib.pyplot as plt

import pandas as pd



In [266]:
# Set random state for project
RD_STATE = 1

# Load breast cancer dataset
bc_data = load_breast_cancer()

# Pisahkan menjadi training set (80%) dan testing set (20%)
bc_X_train, bc_X_test, bc_y_train, bc_y_test = train_test_split(bc_data.data, bc_data.target, test_size=0.2, random_state=RD_STATE)

In [267]:
# Load play tennis dataset
pt_df_raw = pd.read_csv("play_tennis.csv")
pt_df = pt_df_raw.copy()

# Label encoding
le = LabelEncoder()
pt_classes = {}
for column in pt_df.columns:
    le.fit(pt_df[column])
    pt_df[column] = le.transform(pt_df[column])
    pt_classes[column] = le.classes_
pt_X_train, pt_X_test, pt_y_train, pt_y_test = train_test_split(pt_df.iloc[:, 1:-1], pt_df.iloc[:, -1:], test_size=0.2, random_state=RD_STATE)

In [268]:
# Logistic Regression

# Data preprocessing: normalisasi data agar setiap feature memiliki orde magnituda yang sama
bc_scaler = StandardScaler().fit(bc_data.data)
bc_X_train_scaled = bc_scaler.transform(bc_X_train)
bc_X_test_scaled = bc_scaler.transform(bc_X_test)

pt_scaler = StandardScaler().fit(pt_df.iloc[:, 1:-1])
pt_X_train_scaled = pt_scaler.transform(pt_X_train)
pt_X_test_scaled = pt_scaler.transform(pt_X_test)

# Fit classifier
bc_lr_clf = LogisticRegression(random_state=RD_STATE).fit(bc_X_train_scaled, bc_y_train)

pt_lr_clf = LogisticRegression(random_state=RD_STATE).fit(pt_X_train_scaled, pt_y_train['play'])

# Print accuracy
print("Logistic Regression accuracy on breast cancer dataset:", bc_lr_clf.score(bc_X_test_scaled, bc_y_test))
print("Logistic Regression accuracy on play tennis dataset:", pt_lr_clf.score(pt_X_test_scaled, pt_y_test['play']))

Logistic Regression accuracy on breast cancer dataset: 0.9736842105263158
Logistic Regression accuracy on play tennis dataset: 0.6666666666666666


In [269]:
# Neural Network: Multi-Layer Perceptron Model

# Fit classifier
bc_mlp_clf = MLPClassifier(random_state=RD_STATE, max_iter=1000).fit(bc_X_train, bc_y_train)

pt_mlp_clf = MLPClassifier(random_state=RD_STATE, max_iter=1000).fit(pt_X_train, pt_y_train['play'])

# Print accuracy
print("Neural Network accuracy on breast cancer dataset:", bc_mlp_clf.score(bc_X_test, bc_y_test))
print("Neural Network accuracy on play tennis dataset:", pt_mlp_clf.score(pt_X_test, pt_y_test['play']))

Neural Network accuracy on breast cancer dataset: 0.9473684210526315
Neural Network accuracy on play tennis dataset: 1.0


In [270]:
# Support Vector Machine: C-Support Vector Classification

# Data preprocessing: normalisasi data agar setiap feature memiliki orde magnituda yang sama
bc_scaler = StandardScaler().fit(bc_data.data)
bc_X_train_scaled = bc_scaler.transform(bc_X_train)
bc_X_test_scaled = bc_scaler.transform(bc_X_test)

pt_scaler = StandardScaler().fit(pt_df.iloc[:, 1:-1])
pt_X_train_scaled = pt_scaler.transform(pt_X_train)
pt_X_test_scaled = pt_scaler.transform(pt_X_test)

# Fit classifier
bc_svc_clf = SVC(random_state=RD_STATE, gamma='auto').fit(bc_X_train_scaled, bc_y_train)

pt_svc_clf = SVC(random_state=RD_STATE, gamma='auto').fit(pt_X_train_scaled, pt_y_train['play'])

# Print accuracy
print("SVM accuracy on breast cancer dataset:", bc_svc_clf.score(bc_X_test_scaled, bc_y_test))
print("SVM accuracy on play tennis dataset:", pt_svc_clf.score(pt_X_test_scaled, pt_y_test['play']))

SVM accuracy on breast cancer dataset: 0.9736842105263158
SVM accuracy on play tennis dataset: 1.0
