In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import KNeighborsRegressor
import warnings

warnings.filterwarnings("ignore", category=FutureWarning)

# 1. Memuat dataset
print("1. Memuat dataset titanic.csv")
titanic = pd.read_csv('titanic.csv')
display(pd.DataFrame({
    'Deskripsi': ['Dataset Titanic dimuat'],
    'Jumlah Baris': [len(titanic)],
    'Kolom': [', '.join(titanic.columns)]
}))

# 2. Memilih kolom fitur
print("\n2. Memilih kolom fitur (Sex, Age, Pclass, Fare, Survived)")
data = titanic[['Sex', 'Age', 'Pclass', 'Fare', 'Survived']].copy()
display(data.head().style.set_caption("5 baris pertama data yang dipilih"))

# 3. Menyiapkan train_data (fitur dimana Age tidak null)
print("\n3. Menyiapkan train_data (fitur dimana Age tidak null)")
train_data = data[data['Age'].notnull()][['Sex', 'Pclass', 'Fare', 'Survived']].copy()
display(train_data.head().style.set_caption("5 baris pertama train_data"))

# 4. Menyiapkan train_label (Age dimana Age tidak null)
print("\n4. Menyiapkan train_label (Age dimana tidak null)")
train_label = data[data['Age'].notnull()]['Age'].copy()
display(pd.DataFrame(train_label.head()).style.set_caption("5 nilai pertama train_label"))

# 5. Menyiapkan test_data (fitur dimana Age null)
print("\n5. Menyiapkan test_data (fitur dimana Age null)")
test_data = data[data['Age'].isnull()][['Sex', 'Pclass', 'Fare', 'Survived']].copy()
display(test_data.head().style.set_caption("5 baris pertama test_data"))

# 6. Normalisasi train_data dengan Min-Max 0-1
print("\n6. Normalisasi train_data dengan Min-Max 0-1")
train_data['Sex'] = train_data['Sex'].map({'female': 0, 'male': 1})
scaler = MinMaxScaler()
train_data_normalized = pd.DataFrame(scaler.fit_transform(train_data),columns=train_data.columns)

# Menyimpan nilai min dan max
min_values = scaler.data_min_
max_values = scaler.data_max_

display(train_data_normalized.head().style.set_caption("5 baris pertama train_data yang dinormalisasi"))

# Menampilkan nilai min dan max
min_max_table = pd.DataFrame({
    'Atribut': train_data.columns,
    'Min': min_values,
    'Max': max_values
})
display(min_max_table.style.set_caption("Nilai Min dan Max setiap atribut"))

# 7. Normalisasi test_data dengan nilai min/max dari langkah 6
print("\n7. Normalisasi test_data dengan nilai min/max yang sama")
test_data['Sex'] = test_data['Sex'].map({'female': 0, 'male': 1})
test_data_normalized = pd.DataFrame(scaler.transform(test_data),columns=test_data.columns)
display(test_data_normalized.head().style.set_caption("5 baris pertama test_data yang dinormalisasi"))

# 8. Klasifikasi test_data terhadap train_data dengan 3-NN (seharusnya Regresi untuk Age)
print("\n8. Prediksi Age menggunakan 3-NN Regressor (bukan Classifier)")
knn_reg = KNeighborsRegressor(n_neighbors=3)  # <-- Menggunakan Regressor
knn_reg.fit(train_data_normalized, train_label)
age_predictions = knn_reg.predict(test_data_normalized)

display(pd.DataFrame(age_predictions[:5], columns=['Prediksi Age']).round(1).style.set_caption("5 nilai prediksi pertama untuk Age"))

# 9. Mengisi missing values Age dengan hasil prediksi
print("\n9. Mengisi missing values Age dengan hasil prediksi")
data.loc[data['Age'].isnull(), 'Age'] = age_predictions

# Menampilkan data yang sebelumnya Age-nya null
previously_null = titanic['Age'].isnull()
display(data[previously_null].head().style.set_caption("5 baris pertama dengan Age yang diisi"))

# 10. Memuat dataset uji
print("\n10. Memuat dataset uji titanic_test.csv")
titanic_test = pd.read_csv('titanic_test.csv')
titanic_testlabel = pd.read_csv('titanic_testlabel.csv')

display(pd.DataFrame({
    'Dataset': ['titanic_test', 'titanic_testlabel'],
    'Jumlah Baris': [len(titanic_test), len(titanic_testlabel)]
}).style.set_caption("Info Dataset Uji"))

# 11. Menyiapkan train_data baru dari data lengkap
print("\n11. Menyiapkan train_data baru untuk prediksi Survived")
train_data = data[['Sex', 'Age', 'Pclass', 'Fare']].copy()
display(train_data.head().style.set_caption("5 baris pertama train_data baru"))

# 12. Menyiapkan train_label (Survived)
print("\n12. Menyiapkan train_label (Survived)")
train_label = data['Survived'].copy()
display(pd.DataFrame(train_label.head()).style.set_caption("5 nilai pertama train_label"))

# 13. Menyiapkan test_data dari dataset uji, menghilangkan baris dengan missing values
print("\n13. Menyiapkan test_data dari dataset uji (tanpa missing values)")
test_data = titanic_test[['Sex', 'Age', 'Pclass', 'Fare']].copy()
test_data = test_data.dropna()
display(test_data.head().style.set_caption("5 baris pertama test_data yang sudah dibersihkan"))

# 14. Menyiapkan test_label (sesuai dengan test_data)
print("\n14. Menyiapkan test_label (sesuai urutan test_data)")
test_label = titanic_testlabel.loc[test_data.index]['Survived'].copy()
display(pd.DataFrame(test_label.head()).style.set_caption("5 nilai pertama test_label"))

# 15. Normalisasi train_data baru dengan Min-Max 0-1
print("\n15. Normalisasi train_data baru untuk prediksi Survived")
train_data['Sex'] = train_data['Sex'].map({'female': 0, 'male': 1})
scaler_survival = MinMaxScaler()
train_data_normalized = pd.DataFrame(scaler_survival.fit_transform(train_data),columns=train_data.columns)

# Menyimpan nilai min dan max
min_values_survival = scaler_survival.data_min_
max_values_survival = scaler_survival.data_max_

display(train_data_normalized.head().style.set_caption("5 baris pertama train_data yang dinormalisasi"))

# Menampilkan nilai min dan max
min_max_table_survival = pd.DataFrame({
    'Atribut': train_data.columns,
    'Min': min_values_survival,
    'Max': max_values_survival
})
display(min_max_table_survival.style.set_caption("Nilai Min dan Max setiap atribut"))

# 16. Normalisasi test_data dengan nilai min/max dari langkah 15
print("\n16. Normalisasi test_data dengan nilai min/max yang sama")
test_data['Sex'] = test_data['Sex'].map({'female': 0, 'male': 1})
test_data_normalized = pd.DataFrame(scaler_survival.transform(test_data),columns=test_data.columns)
display(test_data_normalized.head().style.set_caption("5 baris pertama test_data yang dinormalisasi"))

# 17. Klasifikasi test_data terhadap train_data dengan 3-NN
print("\n17. Klasifikasi dengan 3-NN untuk memprediksi Survived")
knn_survival = KNeighborsClassifier(n_neighbors=3)
knn_survival.fit(train_data_normalized, train_label)
class_result_survival = knn_survival.predict(test_data_normalized)

display(pd.DataFrame(class_result_survival[:5], columns=['Prediksi Survived']).style.set_caption("5 nilai prediksi pertama untuk Survived"))

# 18. Menghitung jumlah error
print("\n18. Menghitung jumlah error")
error = sum(class_result_survival != test_label)
error_table = pd.DataFrame({
    'Metrik': ['Jumlah Error'],
    'Nilai': [error]
})
display(error_table.style.set_caption("Hasil Perhitungan Error"))

# 19. Menghitung rasio error
print("\n19. Menghitung rasio error (%)")
error_ratio = (error / len(test_data)) * 100
error_ratio_table = pd.DataFrame({
    'Metrik': ['Rasio Error'],
    'Nilai (%)': [error_ratio]
})
display(error_ratio_table.style.set_caption("Hasil Perhitungan Rasio Error"))

1. Memuat dataset titanic.csv


Unnamed: 0,Deskripsi,Jumlah Baris,Kolom
0,Dataset Titanic dimuat,891,"PassengerId, Survived, Pclass, Name, Sex, Age,..."



2. Memilih kolom fitur (Sex, Age, Pclass, Fare, Survived)


Unnamed: 0,Sex,Age,Pclass,Fare,Survived
0,male,22.0,3,7.25,0
1,female,38.0,1,71.2833,1
2,female,26.0,3,7.925,1
3,female,35.0,1,53.1,1
4,male,35.0,3,8.05,0



3. Menyiapkan train_data (fitur dimana Age tidak null)


Unnamed: 0,Sex,Pclass,Fare,Survived
0,male,3,7.25,0
1,female,1,71.2833,1
2,female,3,7.925,1
3,female,1,53.1,1
4,male,3,8.05,0



4. Menyiapkan train_label (Age dimana tidak null)


Unnamed: 0,Age
0,22.0
1,38.0
2,26.0
3,35.0
4,35.0



5. Menyiapkan test_data (fitur dimana Age null)


Unnamed: 0,Sex,Pclass,Fare,Survived
5,male,3,8.4583,0
17,male,2,13.0,1
19,female,3,7.225,1
26,male,3,7.225,0
28,female,3,7.8792,1



6. Normalisasi train_data dengan Min-Max 0-1


Unnamed: 0,Sex,Pclass,Fare,Survived
0,1.0,1.0,0.014151,0.0
1,0.0,0.0,0.139136,1.0
2,0.0,1.0,0.015469,1.0
3,0.0,0.0,0.103644,1.0
4,1.0,1.0,0.015713,0.0


Unnamed: 0,Atribut,Min,Max
0,Sex,0.0,1.0
1,Pclass,1.0,3.0
2,Fare,0.0,512.3292
3,Survived,0.0,1.0



7. Normalisasi test_data dengan nilai min/max yang sama


Unnamed: 0,Sex,Pclass,Fare,Survived
0,1.0,1.0,0.01651,0.0
1,1.0,0.5,0.025374,1.0
2,0.0,1.0,0.014102,1.0
3,1.0,1.0,0.014102,0.0
4,0.0,1.0,0.015379,1.0



8. Prediksi Age menggunakan 3-NN Regressor (bukan Classifier)


Unnamed: 0,Prediksi Age
0,31.7
1,35.7
2,16.7
3,38.5
4,21.3



9. Mengisi missing values Age dengan hasil prediksi


Unnamed: 0,Sex,Age,Pclass,Fare,Survived
5,male,31.666667,3,8.4583,0
17,male,35.666667,2,13.0,1
19,female,16.666667,3,7.225,1
26,male,38.5,3,7.225,0
28,female,21.333333,3,7.8792,1



10. Memuat dataset uji titanic_test.csv


Unnamed: 0,Dataset,Jumlah Baris
0,titanic_test,418
1,titanic_testlabel,418



11. Menyiapkan train_data baru untuk prediksi Survived


Unnamed: 0,Sex,Age,Pclass,Fare
0,male,22.0,3,7.25
1,female,38.0,1,71.2833
2,female,26.0,3,7.925
3,female,35.0,1,53.1
4,male,35.0,3,8.05



12. Menyiapkan train_label (Survived)


Unnamed: 0,Survived
0,0
1,1
2,1
3,1
4,0



13. Menyiapkan test_data dari dataset uji (tanpa missing values)


Unnamed: 0,Sex,Age,Pclass,Fare
0,male,34.5,3,7.8292
1,female,47.0,3,7.0
2,male,62.0,2,9.6875
3,male,27.0,3,8.6625
4,female,22.0,3,12.2875



14. Menyiapkan test_label (sesuai urutan test_data)


Unnamed: 0,Survived
0,0
1,1
2,0
3,0
4,1



15. Normalisasi train_data baru untuk prediksi Survived


Unnamed: 0,Sex,Age,Pclass,Fare
0,1.0,0.271174,1.0,0.014151
1,0.0,0.472229,0.0,0.139136
2,0.0,0.321438,1.0,0.015469
3,0.0,0.434531,0.0,0.103644
4,1.0,0.434531,1.0,0.015713


Unnamed: 0,Atribut,Min,Max
0,Sex,0.0,1.0
1,Age,0.42,80.0
2,Pclass,1.0,3.0
3,Fare,0.0,512.3292



16. Normalisasi test_data dengan nilai min/max yang sama


Unnamed: 0,Sex,Age,Pclass,Fare
0,1.0,0.428248,1.0,0.015282
1,0.0,0.585323,1.0,0.013663
2,1.0,0.773813,0.5,0.018909
3,1.0,0.334004,1.0,0.016908
4,0.0,0.271174,1.0,0.023984



17. Klasifikasi dengan 3-NN untuk memprediksi Survived


Unnamed: 0,Prediksi Survived
0,0
1,0
2,0
3,1
4,0



18. Menghitung jumlah error


Unnamed: 0,Metrik,Nilai
0,Jumlah Error,55



19. Menghitung rasio error (%)


Unnamed: 0,Metrik,Nilai (%)
0,Rasio Error,16.616314
