Import Libraries and Load data

In [11]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import joblib

# Load the dataset
data = pd.read_csv('train.csv')


Data Processing

In [12]:
# Fill missing values
data['Age'].fillna(data['Age'].median(), inplace=True)
data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)
data['Fare'].fillna(data['Fare'].median(), inplace=True)

# Drop the 'Cabin' column due to too many missing values
data.drop(columns=['Cabin'], inplace=True)

# Convert categorical variables to numerical
data['Sex'] = data['Sex'].map({'male': 0, 'female': 1})
data = pd.get_dummies(data, columns=['Embarked'], drop_first=True)

# Keep 'Name' and 'Sex' columns for later use
names = data['Name']
genders = data['Sex']  # 0 for male, 1 for female

# Drop unnecessary columns except 'Name', 'Sex'
data.drop(columns=['Ticket', 'PassengerId'], inplace=True)

# Ensure the features are in the correct order
required_columns = ['Pclass', 'Sex', 'Age', 'Fare', 'SibSp', 'Parch', 'Embarked_Q', 'Embarked_S']
X = data[required_columns]
y = data['Survived']


Load Model and Make Predictions

In [13]:
# Load the trained model
rf_clf = joblib.load('random_forest_model.pkl')

# Make predictions on the entire dataset
y_pred = rf_clf.predict(X_scaled)

# Add names, genders, actual, and predicted survival status to the original dataframe
results = pd.DataFrame({
    'Name': names,
    'Gender': genders,
    'Survived': y,
    'Survived_Prediction': y_pred
})

# Map gender back to string values
results['Gender'] = results['Gender'].map({0: 'male', 1: 'female'})

# Display the first few rows with predictions
print(results.head(10))


                                                Name  Gender  Survived  \
0                            Braund, Mr. Owen Harris    male         0   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female         1   
2                             Heikkinen, Miss. Laina  female         1   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female         1   
4                           Allen, Mr. William Henry    male         0   
5                                   Moran, Mr. James    male         0   
6                            McCarthy, Mr. Timothy J    male         0   
7                     Palsson, Master. Gosta Leonard    male         0   
8  Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)  female         1   
9                Nasser, Mrs. Nicholas (Adele Achem)  female         1   

   Survived_Prediction  
0                    0  
1                    1  
2                    1  
3                    1  
4                    0  
5                    0  
6         