In [7]:
# Penyelesaian Studi Kasus: Prediksi Keselamatan Penumpang Titanic

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# 1. Load Data
# Ganti path 'titanic.csv' sesuai lokasi file Anda, atau bisa juga pakai link dataset publik
try:
    df = pd.read_csv('titanic.csv')
except FileNotFoundError:
    print("File titanic.csv tidak ditemukan. Download otomatis sample dataset dari Github ...")
    url = 'https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv'
    df = pd.read_csv(url)
    df.to_csv('titanic.csv', index=False)
    print("File berhasil diunduh dan disimpan sebagai titanic.csv")

# 2. Preprocessing
# Mengisi nilai kosong pada kolom 'Age' dan 'Embarked'
df['Age'] = df['Age'].fillna(df['Age'].mean())
df['Embarked'] = df['Embarked'].fillna(df['Embarked'].mode()[0])

# Mengubah data kategorikal menjadi numerik
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
df['Embarked'] = df['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})

# 3. Feature Selection
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
X = df[features]
y = df['Survived']

# 4. Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Training Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 6. Prediksi dan Evaluasi
y_pred = model.predict(X_test)
print("Akurasi:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# 7. Fitur Penting
importances = model.feature_importances_
feature_importance = pd.DataFrame({'feature': features, 'importance': importances})
print("\nFeature Importance:")
print(feature_importance.sort_values(by="importance", ascending=False))

Akurasi: 0.8212290502793296

Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.87      0.85       105
           1       0.80      0.76      0.78        74

    accuracy                           0.82       179
   macro avg       0.82      0.81      0.81       179
weighted avg       0.82      0.82      0.82       179


Feature Importance:
    feature  importance
5      Fare    0.272032
1       Sex    0.266227
2       Age    0.252044
0    Pclass    0.085496
3     SibSp    0.052776
4     Parch    0.037304
6  Embarked    0.034121
