In [1]:
# 0. Import Library
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)  # Hilangkan warning SciPy
from sklearn.model_selection import train_test_split, cross_val_score, LeaveOneOut
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score

# Patch untuk scipy.stats.mode agar tidak munculkan FutureWarning
import scipy.stats
from functools import wraps
original_mode = scipy.stats.mode
@wraps(original_mode)
def mode_patch(*args, **kwargs):
    kwargs.setdefault("keepdims", True)
    return original_mode(*args, **kwargs)
scipy.stats.mode = mode_patch

# 1. Load Dataset
df = pd.read_csv('titanic.csv')
print("[1] Dataset dimuat:")
display(df.head())

# 3. Ambil Fitur & Imputasi Missing Value
features = ['Sex', 'Age', 'Pclass', 'Fare']
df_features = df[features].copy()
df_features['Sex'] = df_features['Sex'].map({'male': 0, 'female': 1})
df_features['Age'] = df_features.groupby('Pclass')['Age'].transform(lambda x: x.fillna(x.mean()))
print("\n[3] Fitur setelah imputasi missing value:")
display(df_features.head())

# 4. Ambil Label
label = df[['Survived']]
print("\n[4] Label (kolom kelas):")
display(label.head())

# 5. Normalisasi Train Data
scaler = MinMaxScaler()
train_data_norm = pd.DataFrame(scaler.fit_transform(df_features), columns=df_features.columns)
min_vals = scaler.data_min_
max_vals = scaler.data_max_
print("\n[5] Data setelah normalisasi:")
display(train_data_norm.head())
print("Nilai Min dan Max tiap atribut:")
minmax_df = pd.DataFrame([min_vals, max_vals], index=["Min", "Max"], columns=df_features.columns)
display(minmax_df)

# 6. Normalisasi Test Data (akan dilakukan otomatis pada Hold-Out)

# 7. Klasifikasi dan Validasi
# 7.a Hold-Out Method
X_train, X_test, y_train, y_test = train_test_split(train_data_norm, label, test_size=0.3, random_state=42)
knn_holdout = KNeighborsClassifier(n_neighbors=3)
knn_holdout.fit(X_train, y_train.values.ravel())
y_pred_holdout = knn_holdout.predict(X_test)
acc_holdout = round(accuracy_score(y_test, y_pred_holdout), 4)
err_holdout = round(1 - acc_holdout, 4)
result_holdout = pd.DataFrame({
    "Metode": ["Hold-Out (70-30)"],
    "Akurasi": [acc_holdout],
    "Error Ratio": [err_holdout]
})
print("\n[7.a] Validasi Hold-Out:")
display(result_holdout)

# 7.b K-Fold (k=10)
knn_kfold = KNeighborsClassifier(n_neighbors=3)
scores_kfold = cross_val_score(knn_kfold, train_data_norm, label.values.ravel(), cv=10)
acc_kfold = round(np.mean(scores_kfold), 4)
err_kfold = round(1 - acc_kfold, 4)
result_kfold = pd.DataFrame({
    "Metode": ["K-Fold (k=10)"],
    "Akurasi": [acc_kfold],
    "Error Ratio": [err_kfold]
})
print("\n[7.b] Validasi K-Fold:")
display(result_kfold)

# 7.c Leave-One-Out (LOO)
knn_loo = KNeighborsClassifier(n_neighbors=3)
loo = LeaveOneOut()
scores_loo = cross_val_score(knn_loo, train_data_norm, label.values.ravel(), cv=loo)
acc_loo = round(np.mean(scores_loo), 4)
err_loo = round(1 - acc_loo, 4)
result_loo = pd.DataFrame({
    "Metode": ["Leave-One-Out (LOO)"],
    "Akurasi": [acc_loo],
    "Error Ratio": [err_loo]
})
print("\n[7.c] Validasi Leave-One-Out:")
display(result_loo)

# Ringkasan
summary_all = pd.concat([result_holdout, result_kfold, result_loo], ignore_index=True)
print("\n[7] Ringkasan Hasil Validasi:")
display(summary_all)


[1] Dataset dimuat:


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S



[3] Fitur setelah imputasi missing value:


Unnamed: 0,Sex,Age,Pclass,Fare
0,0,22.0,3,7.25
1,1,38.0,1,71.2833
2,1,26.0,3,7.925
3,1,35.0,1,53.1
4,0,35.0,3,8.05



[4] Label (kolom kelas):


Unnamed: 0,Survived
0,0
1,1
2,1
3,1
4,0



[5] Data setelah normalisasi:


Unnamed: 0,Sex,Age,Pclass,Fare
0,0.0,0.271174,1.0,0.014151
1,1.0,0.472229,0.0,0.139136
2,1.0,0.321438,1.0,0.015469
3,1.0,0.434531,0.0,0.103644
4,0.0,0.434531,1.0,0.015713


Nilai Min dan Max tiap atribut:


Unnamed: 0,Sex,Age,Pclass,Fare
Min,0.0,0.42,1.0,0.0
Max,1.0,80.0,3.0,512.3292



[7.a] Validasi Hold-Out:


Unnamed: 0,Metode,Akurasi,Error Ratio
0,Hold-Out (70-30),0.8134,0.1866



[7.b] Validasi K-Fold:


Unnamed: 0,Metode,Akurasi,Error Ratio
0,K-Fold (k=10),0.8159,0.1841



[7.c] Validasi Leave-One-Out:


Unnamed: 0,Metode,Akurasi,Error Ratio
0,Leave-One-Out (LOO),0.8283,0.1717



[7] Ringkasan Hasil Validasi:


Unnamed: 0,Metode,Akurasi,Error Ratio
0,Hold-Out (70-30),0.8134,0.1866
1,K-Fold (k=10),0.8159,0.1841
2,Leave-One-Out (LOO),0.8283,0.1717
