<a href="https://colab.research.google.com/github/dvtran63/ai-learning-notebooks/blob/main_b1/titanic_voting_stacking_with_smote.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Voting & Stacking

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import classification_report, roc_auc_score
from imblearn.over_sampling import SMOTE
import warnings
warnings.filterwarnings("ignore")

from google.colab import files
uploaded = files.upload()

df = pd.read_csv("titanic_feature_engineered.csv")
X = df.drop("Survived", axis=1)
y = df["Survived"]

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

smote = SMOTE(random_state=42)
X_train_sm, y_train_sm = smote.fit_resample(X_train, y_train)

print("✅ After SMOTE:", y_train.value_counts().to_dict(), "→", dict(zip(*np.unique(y_train_sm, return_counts=True))))

logreg = LogisticRegression()
rf = RandomForestClassifier(n_estimators=100, random_state=42)
svm = SVC(probability=True, random_state=42)

rf.fit(X_train_sm, y_train_sm)
y_pred_rf = rf.predict(X_test)
y_prob_rf = rf.predict_proba(X_test)[:, 1]
print("\n📊 Random Forest Report:")
print(classification_report(y_test, y_pred_rf))
print("RF ROC AUC:", roc_auc_score(y_test, y_prob_rf))

svm.fit(X_train_sm, y_train_sm)
y_pred_svm = svm.predict(X_test)
y_prob_svm = svm.predict_proba(X_test)[:, 1]
print("\n📊 SVM Report:")
print(classification_report(y_test, y_pred_svm))
print("SVM ROC AUC:", roc_auc_score(y_test, y_prob_svm))

voting_clf = VotingClassifier(estimators=[
    ('lr', logreg), ('rf', rf), ('svm', svm)
], voting='soft')

stacking_clf = StackingClassifier(estimators=[
    ('lr', logreg), ('rf', rf), ('svm', svm)
], final_estimator=LogisticRegression())

#Train & Evaluate
voting_clf.fit(X_train_sm, y_train_sm)
y_pred_vote = voting_clf.predict(X_test)
y_prob_vote = voting_clf.predict_proba(X_test)[:, 1]
print("\n📊 Voting Classifier Report:")
print(classification_report(y_test, y_pred_vote))
print("ROC AUC:", roc_auc_score(y_test, y_prob_vote))

stacking_clf.fit(X_train_sm, y_train_sm)
y_pred_stack = stacking_clf.predict(X_test)
y_prob_stack = stacking_clf.predict_proba(X_test)[:, 1]
print("\n📊 Stacking Classifier Report:")
print(classification_report(y_test, y_pred_stack))
print("ROC AUC:", roc_auc_score(y_test, y_prob_stack))

Saving titanic_feature_engineered.csv to titanic_feature_engineered (1).csv
✅ After SMOTE: {0: 100, 1: 60} → {np.int64(0): np.int64(100), np.int64(1): np.int64(100)}

📊 Random Forest Report:
              precision    recall  f1-score   support

           0       0.58      0.60      0.59        25
           1       0.29      0.27      0.28        15

    accuracy                           0.47        40
   macro avg       0.43      0.43      0.43        40
weighted avg       0.47      0.47      0.47        40

ROC AUC: 0.43999999999999995

📊 SVM Report:
              precision    recall  f1-score   support

           0       0.59      0.40      0.48        25
           1       0.35      0.53      0.42        15

    accuracy                           0.45        40
   macro avg       0.47      0.47      0.45        40
weighted avg       0.50      0.45      0.46        40


📊 Voting Classifier Report:
              precision    recall  f1-score   support

           0       0.63    