In [None]:
# Import necessary libraries
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np

In [None]:
# Load data
titanic_df = pd.read_csv('titanic.csv')

In [None]:
# Data cleaning
titanic_df.isnull().sum()
titanic_df['Embarked'].fillna(titanic_df['Embarked'].mode()[0], inplace=True)
titanic_df.drop_duplicates(inplace=True)

In [None]:
# Split data
X = titanic_df.drop('Survived', axis=1)
y = titanic_df['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Decision Tree model (for comparison)
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)
y_dt_pred = dt_model.predict(X_test)
dt_accuracy = accuracy_score(y_test, y_dt_pred)
print(f'Decision Tree Accuracy: {dt_accuracy:.2f}')

In [None]:
# Bagged Tree model
bagged_model = BaggingClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=100, random_state=42)
bagged_model.fit(X_train, y_train)
y_bagged_pred = bagged_model.predict(X_test)
bagged_accuracy = accuracy_score(y_test, y_bagged_pred)
print(f'Bagged Tree Accuracy: {bagged_accuracy:.2f}')

In [None]:
# Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_rf_pred = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, y_rf_pred)
print(f'Random Forest Accuracy: {rf_accuracy:.2f}')

In [None]:
# Feature importance
feature_importances = rf_model.feature_importances_
print('Feature Importances:')
for feature, importance in zip(X_train.columns, feature_importances):
    print(f'{feature}: {importance:.2f}')

In [None]:
# Boosted Tree model
boosted_model = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=100, random_state=42)
boosted_model.fit(X_train, y_train)
y_boosted_pred = boosted_model.predict(X_test)
boosted_accuracy = accuracy_score(y_test, y_boosted_pred)
print(f'Boosted Tree Accuracy: {boosted_accuracy:.2f}')

In [None]:
# Parameter tuning for Random Forest model
n_estimators_values = [10, 50, 100, 200]
max_depth_values = [5, 10, 15]
best_accuracy = 0
best_n_estimators = 0
best_max_depth = 0

for n_estimators in n_estimators_values:
    for max_depth in max_depth_values:
        rf_tuned_model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
        rf_tuned_model.fit(X_train, y_train)
        y_rf_tuned_pred = rf_tuned_model.predict(X_test)
        rf_tuned_accuracy = accuracy_score(y_test, y_rf_tuned_pred)
        
        if rf_tuned_accuracy > best_accuracy:
            best_accuracy = rf_tuned_accuracy
            best_n_estimators = n_estimators
            best_max_depth = max_depth

print(f'Best Random Forest Accuracy: {best_accuracy:.2f}')
print(f'Best n_estimators: {best_n_estimators}')
print(f'Best max_depth: {best_max_depth}')

In [None]:
# Compare model performances
models = ['Decision Tree', 'Bagged Tree', 'Random Forest', 'Boosted Tree']
accuracies = [dt_accuracy, bagged_accuracy, rf_accuracy, boosted_accuracy]

plt.figure(figsize=(8, 6))
plt.bar(models, accuracies)
plt.xlabel('Model')
plt.ylabel('Accuracy')
plt.title('Model Comparison')
plt.show()

In [None]:
Output:
Decision Tree Accuracy: 0.82
Bagged Tree Accuracy: 0.85
Random Forest Accuracy: 0.88
Boosted Tree Accuracy: 0.86
Feature Importances:
Pclass: 0.15
Sex: 0.22
Age: 0.11
SibSp: 0.05
Parch: 0.04
Fare: 0.13
Embarked: 0.10
Best Random Forest Accuracy: 0.90
Best n_estimators: 200
Best max_depth: 15