In [163]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

In [164]:
df=pd.read_csv("titanic.csv")
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [165]:
print(df.columns)

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')


In [166]:
df=df.drop('PassengerId', axis=1)
df=df.drop('Name', axis=1)
df=df.drop('Fare', axis=1)
df=df.drop('Ticket', axis=1)
df=df.drop('Cabin', axis=1)

In [167]:
print(df.columns)

Index(['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Embarked'], dtype='object')


In [168]:
df.isnull().sum()

Survived      0
Pclass        0
Sex           0
Age         177
SibSp         0
Parch         0
Embarked      2
dtype: int64

In [169]:
df['Sex'] = df['Sex'].map({'male': 1, 'female': 0})
df['Embarked'] = df['Embarked'].fillna(0)
df=pd.get_dummies(df, columns=['Embarked'],drop_first=False)
df['Age'] = df['Age'].fillna(df['Age'].median())

In [170]:
scaler=StandardScaler()
x=df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Embarked_S', 'Embarked_C', 'Embarked_Q']]
y=df['Survived']
x=scaler.fit_transform(x)

In [171]:
features=['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Embarked_S', 'Embarked_C', 'Embarked_Q']

In [172]:
x_train, x_test, y_train, y_test=train_test_split(x, y, test_size=0.2, random_state=42)

Logistic Regression

In [173]:
lgr=LogisticRegression(max_iter=1000)
lgr.fit(x_train, y_train)

lgr_pred=lgr.predict(x_test)

lgr_report=classification_report(y_test, lgr_pred)
print(lgr_report)

              precision    recall  f1-score   support

           0       0.82      0.85      0.84       105
           1       0.77      0.74      0.76        74

    accuracy                           0.80       179
   macro avg       0.80      0.80      0.80       179
weighted avg       0.80      0.80      0.80       179



In [188]:
for i, feature in enumerate(features):
    lgr_coef=lgr.coef_
    print(f'{feature} :{lgr_coef[0][i]:.2f}')

Pclass :-0.86
Sex :-1.28
Age :-0.40
SibSp :-0.31
Parch :-0.07
Embarked_S :-0.27
Embarked_C :-0.04
Embarked_Q :-0.09


In [201]:
negative_impacts = [
    "being male",
    "older",
    "traveling in lower-class cabins",
    "having more siblings/spouses or parents/children aboard",
    "embarking from Southampton"
]

print("The negative impacts are")
for impact in negative_impacts:
    print(f"- {impact}")

The negative impacts are
- being male
- older
- traveling in lower-class cabins
- having more siblings/spouses or parents/children aboard
- embarking from Southampton


Decision Tree

In [178]:
dt=DecisionTreeClassifier()
dt.fit(x_train, y_train)

dt_pred=dt.predict(x_test)

dt_report=classification_report(y_test, dt_pred)
print(dt_report)

              precision    recall  f1-score   support

           0       0.79      0.85      0.82       105
           1       0.76      0.69      0.72        74

    accuracy                           0.78       179
   macro avg       0.78      0.77      0.77       179
weighted avg       0.78      0.78      0.78       179



In [187]:
for i, feature in enumerate(features):
    dt_importance = dt.feature_importances_[i]
    print(f'{feature} : {dt_importance:.2f}')

Pclass : 0.13
Sex : 0.36
Age : 0.32
SibSp : 0.09
Parch : 0.04
Embarked_S : 0.04
Embarked_C : 0.02
Embarked_Q : 0.01


In [202]:
positive_impacts = [
    "being female",
    "younger",
    "traveling in higher-class cabins",
    "having fewer siblings/spouses or parents/children aboard",
    "embarking from Cherbourg or Queenstown"
]

print("The positive impacts on survival are")
for impact in positive_impacts:
    print(f"- {impact}")

The positive impacts on survival are
- being female
- younger
- traveling in higher-class cabins
- having fewer siblings/spouses or parents/children aboard
- embarking from Cherbourg or Queenstown


In [180]:
svm=SVC(kernel='linear')

svm.fit(x_train, y_train)
svm_pred=svm.predict(x_test)

svm_report=classification_report(y_test, svm_pred)
print(svm_report)

              precision    recall  f1-score   support

           0       0.80      0.84      0.82       105
           1       0.75      0.70      0.73        74

    accuracy                           0.78       179
   macro avg       0.78      0.77      0.77       179
weighted avg       0.78      0.78      0.78       179



In [192]:
for i, feature in enumerate(features):
    svm_coeff=svm.coef_
    print(f"{feature}: {svm_coeff[0][i]:.2f}")

Pclass: -0.00
Sex: -0.96
Age: -0.00
SibSp: -0.00
Parch: -0.00
Embarked_S: -0.00
Embarked_C: 0.00
Embarked_Q: -0.00


In [204]:
negative_impacts = [
    "being male",
    "other features have minimal effect"
]

print("The negative impacts are:")
for impact in negative_impacts:
    print(f"- {impact}")

The negative impacts are:
- being male
- other features have minimal effect
