In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler,OrdinalEncoder, OneHotEncoder, OrdinalEncoder, LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

# 1. Load Data

In [2]:
df = pd.read_csv('../../titanic_Dataset_modi.csv')
df.drop(columns=['PassengerId','Name','Ticket','Cabin'], inplace=True)
df

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,education
0,0,3,male,22.0,1,0,7.2500,S,School
1,1,1,female,38.0,1,0,71.2833,C,UG
2,1,3,female,26.0,0,0,7.9250,S,PG
3,1,1,female,35.0,1,0,53.1000,S,PG
4,0,3,male,35.0,0,0,8.0500,S,UG
...,...,...,...,...,...,...,...,...,...
395,0,3,male,22.0,0,0,7.7958,S,PG
396,0,3,female,31.0,0,0,7.8542,S,PG
397,0,2,male,46.0,0,0,26.0000,S,PG
398,0,2,male,23.0,0,0,10.5000,S,UG


# 2. Clean the data

##### 1. Age -> mean impute ; Embarked -> median impute
##### 2. Fare -> Standard Scaler ;  Age -> MinMax Scaler
##### 3. Sex, Embaked -> OHE
##### 4. education -> Ordinal 
##### 5. Label Encoding
##### 5. Feature Selection
##### 6. Model train 

In [3]:
df.isnull().mean() * 100

Survived      0.00
Pclass        0.00
Sex           0.00
Age          19.50
SibSp         0.00
Parch         0.00
Fare          0.00
Embarked      0.25
education     0.00
dtype: float64

In [4]:
age_pipe = Pipeline([
    ('imputer', SimpleImputer()),
    ('scaling', MinMaxScaler())
])
# Fare (scale)
fare_pipe = Pipeline([
    ('scaling', StandardScaler())
])
# Embarked (impute,OHE)
embk_pipe = Pipeline([
    ('impute', SimpleImputer(strategy='most_frequent')),
    ('ohe', OneHotEncoder(drop='first', sparse_output=False, dtype=np.int32))
])
# Sex (OHE)
sex_pipe = Pipeline([
    ('ohe', OneHotEncoder(drop='first', sparse_output=False, dtype=np.int32))
])
# Education (ordinal)
edu_pipe = Pipeline([
    ("ordinal", OrdinalEncoder(categories=[['School', 'UG', 'PG']]))
])

In [5]:
# Preprocessor
preprocessor = ColumnTransformer(transformers=[
    ('age_pipe', age_pipe, ['Age']),
    ('fare_pipe', fare_pipe, ['Fare']),
    ('embk_pipe', embk_pipe, ['Embarked']),
    ('sex_pipe', sex_pipe, ['Sex']),
    ('edu_pipe', edu_pipe, ['education']),
], remainder='passthrough')


In [6]:
X = preprocessor.fit_transform(df.iloc[:,1:])
X

array([[ 0.30169588, -0.51903614,  0.        , ...,  3.        ,
         1.        ,  0.        ],
       [ 0.52971355,  0.75965824,  0.        , ...,  1.        ,
         1.        ,  0.        ],
       [ 0.3587003 , -0.50555692,  0.        , ...,  3.        ,
         0.        ,  0.        ],
       ...,
       [ 0.64372239, -0.14461346,  0.        , ...,  2.        ,
         0.        ,  0.        ],
       [ 0.31594699, -0.45413621,  0.        , ...,  2.        ,
         0.        ,  0.        ],
       [ 0.38720251, -0.41120241,  0.        , ...,  2.        ,
         0.        ,  0.        ]], shape=(400, 9))

In [7]:
y = df.iloc[:,0]
y

0      0
1      1
2      1
3      1
4      0
      ..
395    0
396    0
397    0
398    0
399    1
Name: Survived, Length: 400, dtype: int64

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2,random_state=42)

In [9]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((320, 9), (80, 9), (320,), (80,))

In [10]:
import torch

X_train = torch.from_numpy(np.asarray(X_train))
X_test = torch.from_numpy(np.asarray(X_test))
y_train = torch.from_numpy(np.asarray(y_train))
y_test = torch.from_numpy(np.asarray(y_test))

In [11]:
X_train

tensor([[ 0.4870,  0.3966,  0.0000,  ...,  1.0000,  1.0000,  0.0000],
        [ 0.4300, -0.3044,  0.0000,  ...,  3.0000,  1.0000,  0.0000],
        [ 0.4727, -0.5341,  0.0000,  ...,  3.0000,  0.0000,  0.0000],
        ...,
        [ 0.3969, -0.0448,  0.0000,  ...,  1.0000,  0.0000,  0.0000],
        [ 0.0309, -0.3463,  0.0000,  ...,  3.0000,  1.0000,  1.0000],
        [ 0.2874,  0.8796,  0.0000,  ...,  1.0000,  0.0000,  1.0000]],
       dtype=torch.float64)

# 3. Build Model

In [12]:
class MySimplePerceptron():
    def __init__(self, X_train):
        self.weights = torch.rand(X_train.shape[1], 1, dtype=torch.float64, requires_grad=True)
        self.bias = torch.rand(1, dtype=torch.float64, requires_grad=True)
    
    def forward(self, X_train):
        z = torch.matmul(X_train, self.weights) + self.bias
        y_pred = torch.sigmoid(z)
        return y_pred
    
    def loss_function(self, y_pred, y_train):
        epsilon = 1e-8
        y_pred = torch.clamp(y_pred, epsilon, 1-epsilon)
        loss = (-(y_train * torch.log(y_pred)) - ((1-y_train)*torch.log(1-y_pred))).mean()
        return loss

# 4. Train Model

In [13]:
learning_rate = 0.1
epochs = 100

In [14]:
model = MySimplePerceptron(X_train)

for epoch in range(epochs):
    #forward propagation
    y_pred = model.forward(X_train)
    
    # loss calculate
    loss = model.loss_function(y_pred, y_train)
    
    # backpropagation
    loss.backward()
    
    # update weight and bias
    with torch.no_grad():
        model.weights -= learning_rate * model.weights.grad
        model.bias -= learning_rate * model.bias.grad
    
    # reinitialize gradient
    model.weights.grad.zero_()
    model.bias.grad.zero_()
    
    print(f"Epoch: {epoch + 1}, Loss: {loss.item()}")

Epoch: 1, Loss: 1.4890785292593172
Epoch: 2, Loss: 1.3070655108229499
Epoch: 3, Loss: 1.1639052405874872
Epoch: 4, Loss: 1.0573151072336637
Epoch: 5, Loss: 0.9810458725831306
Epoch: 6, Loss: 0.9275405258894439
Epoch: 7, Loss: 0.8900308657274354
Epoch: 8, Loss: 0.8633477573151846
Epoch: 9, Loss: 0.8438702222696313
Epoch: 10, Loss: 0.8291723776367289
Epoch: 11, Loss: 0.817662750558809
Epoch: 12, Loss: 0.8083057219647213
Epoch: 13, Loss: 0.8004282361478721
Epoch: 14, Loss: 0.7935917993543369
Epoch: 15, Loss: 0.7875093920089808
Epoch: 16, Loss: 0.7819919206067292
Epoch: 17, Loss: 0.7769137417076518
Epoch: 18, Loss: 0.7721904262042214
Epoch: 19, Loss: 0.7677643797259458
Epoch: 20, Loss: 0.7635955193435326
Epoch: 21, Loss: 0.7596552165266306
Epoch: 22, Loss: 0.7559223574939068
Epoch: 23, Loss: 0.7523807800715292
Epoch: 24, Loss: 0.7490176070371226
Epoch: 25, Loss: 0.745822163709946
Epoch: 26, Loss: 0.7427852760996394
Epoch: 27, Loss: 0.7398988165194805
Epoch: 28, Loss: 0.7371554096670222
Epo

# 5. Evaluate Model 

In [15]:
with torch.no_grad():
    y_pred = model.forward(X_test)
    y_pred = (y_pred > 0.5).float()
    accuracy = (y_test == y_pred).float().mean()
    print(f"Accuracy: {accuracy}")

Accuracy: 0.6421874761581421
