In [226]:
# Step 1 : Importing Necessary Libraries

import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

In [227]:
# Step 2 : Importing Data

train_dataset=pd.read_csv('train.csv')
test_dataset=pd.read_csv('test.csv')

In [228]:
# Step 3 : Data Preprocessing

# 3.1 - Handling Missing Values  

train_dataset['Age']=train_dataset['Age'].fillna(train_dataset['Age'].median())
train_dataset['Embarked']=train_dataset['Embarked'].fillna(train_dataset['Embarked'].mode()[0])
test_dataset['Age']=test_dataset['Age'].fillna(test_dataset['Age'].median())
test_dataset['Embarked']=test_dataset['Embarked'].fillna(test_dataset['Embarked'].mode()[0])
test_dataset['Fare']=test_dataset['Fare'].fillna(test_dataset['Fare'].median())


# 3.2 - Dropping irrelevant features

train_dataset=train_dataset.drop(columns=['PassengerId','Name','Ticket','Cabin'])
test_dataset=test_dataset.drop(columns=['PassengerId','Name','Ticket','Cabin'])

# 3.3 - Handling Categorical Data

train_dataset=pd.get_dummies(train_dataset,columns=['Sex','Embarked'],drop_first=True)
test_dataset=pd.get_dummies(test_dataset,columns=['Sex','Embarked'],drop_first=True)

In [229]:
#  Step 4 : Splitting Data into Features and Target

x_train=train_dataset.drop('Survived',axis=1)
y_train=train_dataset['Survived']

x_test=test_dataset

In [230]:
# 1. Decision Tree Model

# Step 5 : Selecting Model

data_model1=DecisionTreeClassifier(random_state=42)

# Step 6 : Training the Model

data_model1.fit(x_train,y_train)

# Step 7 : Predicting on Test Dataset

y_predict1=data_model1.predict(x_test)

# Step 8 : Evaluating the Model on Training Data

y_train_predict1=data_model1.predict(x_train)

# Step 9 : Calculating Metrics

accuracy1 = accuracy_score(y_train, y_train_predict1)
precision1 = precision_score(y_train, y_train_predict1)
recall1 = recall_score(y_train, y_train_predict1)
f11 = f1_score(y_train, y_train_predict1)
conf_matrix1 = confusion_matrix(y_train, y_train_predict1)

# Displaying Results for Train Data

print(f'Accuracy (Train): {accuracy1:.4f}')
print(f'Precision (Train): {precision1:.4f}')
print(f'Recall (Train): {recall1:.4f}')
print(f'F1-Score (Train): {f11:.4f}')
print('Confusion Matrix (Train):')
print(conf_matrix1)

Accuracy (Train): 0.9798
Precision (Train): 0.9939
Recall (Train): 0.9532
F1-Score (Train): 0.9731
Confusion Matrix (Train):
[[547   2]
 [ 16 326]]


In [231]:
# 2. Random Forest Model

# Step 5 : Selecting Model

data_model2=RandomForestClassifier(random_state=42)

# Step 6 : Training the Model

data_model2.fit(x_train,y_train)

# Step 7 : Predicting on Test Dataset

y_predict2=data_model2.predict(x_test)

# Step 8 : Evaluating the Model on Training Data

y_train_predict2=data_model2.predict(x_train)

# Step 9 : Calculating Metrics

accuracy2 = accuracy_score(y_train, y_train_predict2)
precision2 = precision_score(y_train, y_train_predict2)
recall2 = recall_score(y_train, y_train_predict2)
f12 = f1_score(y_train, y_train_predict2)
conf_matrix2 = confusion_matrix(y_train, y_train_predict2)

# Displaying Results for Train Data

print(f'Accuracy (Train): {accuracy2:.4f}')
print(f'Precision (Train): {precision2:.4f}')
print(f'Recall (Train): {recall2:.4f}')
print(f'F1-Score (Train): {f12:.4f}')
print('Confusion Matrix (Train):')
print(conf_matrix2)

Accuracy (Train): 0.9798
Precision (Train): 0.9909
Recall (Train): 0.9561
F1-Score (Train): 0.9732
Confusion Matrix (Train):
[[546   3]
 [ 15 327]]


In [232]:
# 3. Logistic Regression

# Step 5 : Selecting Model

data_model3=LogisticRegression(max_iter=500, solver='lbfgs')

# Step 6 : Training the Model

data_model3.fit(x_train,y_train)

# Step 7 : Predicting on Test Dataset

y_predict3=data_model3.predict(x_test)

# Step 8 : Evaluating the Model on Training Data

y_train_predict3=data_model3.predict(x_train)

# Step 9 : Calculating Metrics

accuracy3 = accuracy_score(y_train, y_train_predict3)
precision3 = precision_score(y_train, y_train_predict3)
recall3 = recall_score(y_train, y_train_predict3)
f13 = f1_score(y_train, y_train_predict3)
conf_matrix3 = confusion_matrix(y_train, y_train_predict3)

# Displaying Results for Train Data

print(f'Accuracy (Train): {accuracy3:.4f}')
print(f'Precision (Train): {precision3:.4f}')
print(f'Recall (Train): {recall3:.4f}')
print(f'F1-Score (Train): {f13:.4f}')
print('Confusion Matrix (Train):')
print(conf_matrix3)

Accuracy (Train): 0.8002
Precision (Train): 0.7595
Recall (Train): 0.7018
F1-Score (Train): 0.7295
Confusion Matrix (Train):
[[473  76]
 [102 240]]


In [233]:
# 4. Support Vector Machine (SVM)

# Step 5 : Selecting Model

data_model4=SVC(random_state=42)

# Step 6 : Training the Model

data_model4.fit(x_train, y_train)

# Step 7 : Predicting on Test Dataset

y_predict4=data_model4.predict(x_test)

# Step 8 : Evaluating the Model on Training Data

y_train_predict4=data_model4.predict(x_train)

# Step 9 : Calculating Metrics

accuracy4 = accuracy_score(y_train,y_train_predict4)
precision4 = precision_score(y_train,y_train_predict4)
recall4 = recall_score(y_train,y_train_predict4)
f14 = f1_score(y_train,y_train_predict4)
conf_matrix4 = confusion_matrix(y_train,y_train_predict4)

# Displaying Results for Train Data

print(f'Accuracy (Train): {accuracy4:.4f}')
print(f'Precision (Train): {precision4:.4f}')
print(f'Recall (Train): {recall4:.4f}')
print(f'F1-Score (Train): {f14:.4f}')
print('Confusion Matrix (Train):')
print(conf_matrix4)

Accuracy (Train): 0.6835
Precision (Train): 0.6974
Recall (Train): 0.3099
F1-Score (Train): 0.4291
Confusion Matrix (Train):
[[503  46]
 [236 106]]
