In [1]:
import pandas as pd
import joblib
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# 1. Load Dataset (Stable Mirror)
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df = pd.read_csv(url)

# 2. Feature Selection (5 features + Target)
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Fare']
target = 'Survived'

X = df[features].copy()
y = df[target]

# 3. Preprocessing
# a. Encoding Sex (Male: 0, Female: 1)
X['Sex'] = X['Sex'].map({'male': 0, 'female': 1})

# b. Handling Missing Values (Age is often missing)
X['Age'] = X['Age'].fillna(X['Age'].median())
X['Fare'] = X['Fare'].fillna(X['Fare'].median())

# 4. Train Model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
model.fit(X_train, y_train)

# 5. Evaluate
print("ðŸ“Š Classification Report:")
print(classification_report(y_test, model.predict(X_test)))

# 6. Save Model (Compressed for GitHub safety)
if not os.path.exists('model'):
    os.makedirs('model')
joblib.dump(model, 'model/titanic_survival_model.pkl', compress=3)
print("âœ… Model saved as model/titanic_survival_model.pkl")

ðŸ“Š Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.90      0.85       105
           1       0.82      0.69      0.75        74

    accuracy                           0.81       179
   macro avg       0.81      0.79      0.80       179
weighted avg       0.81      0.81      0.81       179

âœ… Model saved as model/titanic_survival_model.pkl
