# **Program 12**

Prepare a model for prediction of survival from Titanic Ship using Random Forest and compare the accuracy with other classifiers also.

In [4]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
import warnings
warnings.filterwarnings('ignore')

# Load the dataset
df = pd.read_csv("titanic.csv")

# Drop rows where the target variable is missing
df = df.dropna(subset=['Survived'])

# Select features 'x' and target variable 'y'
x = df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']]
y = df["Survived"]

# Encode categorical feature 'Sex' to numeric
le = LabelEncoder()
x['Sex'] = le.fit_transform(x['Sex'])

# Fill missing values in 'Age' with the mean
x['Age'] = x['Age'].fillna(x['Age'].mean())

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Create a Random Forest Classifier with 100 decision trees
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the Random Forest Classifier
rf_model.fit(x_train, y_train)

# Make predictions using the Random Forest Classifier
y_pred_rf = rf_model.predict(x_test)

# Evaluate the Random Forest Classifier
rf_accuracy = accuracy_score(y_test, y_pred_rf)
rf_classification_report = classification_report(y_test, y_pred_rf)

print("Accuracy of Random Forest Classifier: ", rf_accuracy)
print("Classification Report:\n", rf_classification_report)

# Comparison with other Models

# Initialize models
model1 = KNeighborsClassifier(n_neighbors=9)
model2 = GaussianNB()
model3 = DecisionTreeClassifier(criterion='entropy')
model4 = RandomForestClassifier(n_estimators=100)
# List of models for comparison
modellist = [model1, model2, model3, model4]

# Evaluate each model
print("\n=== Model Comparison Results ===")
for model in modellist:
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    # Calculate performance metrics
    model_accuracy = accuracy_score(y_test, y_pred)
    model_confusion_matrix = confusion_matrix(y_test, y_pred)
    model_classification_report = classification_report(y_test, y_pred)
    # Display results for each model
    print(f"\nModel: {model.__class__.__name__}")
    print("Confusion Matrix:")
    print(model_confusion_matrix)
    print(f"Accuracy: {model_accuracy:.2f}")
    print("Classification Report:")
    print(model_classification_report)

Accuracy of Random Forest Classifier:  0.8156424581005587
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.88      0.85       105
           1       0.81      0.73      0.77        74

    accuracy                           0.82       179
   macro avg       0.81      0.80      0.81       179
weighted avg       0.82      0.82      0.81       179


=== Model Comparison Results ===

Model: KNeighborsClassifier
Confusion Matrix:
[[85 20]
 [34 40]]
Accuracy: 0.70
Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.81      0.76       105
           1       0.67      0.54      0.60        74

    accuracy                           0.70       179
   macro avg       0.69      0.68      0.68       179
weighted avg       0.69      0.70      0.69       179


Model: GaussianNB
Confusion Matrix:
[[85 20]
 [21 53]]
Accuracy: 0.77
Classification Report:
              precision    recal