In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler,OrdinalEncoder, OneHotEncoder, OrdinalEncoder, LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

# 1. Load Data

In [2]:
df = pd.read_csv('../../titanic_Dataset_modi.csv')
df.drop(columns=['PassengerId','Name','Ticket','Cabin'], inplace=True)
df

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,education
0,0,3,male,22.0,1,0,7.2500,S,School
1,1,1,female,38.0,1,0,71.2833,C,UG
2,1,3,female,26.0,0,0,7.9250,S,PG
3,1,1,female,35.0,1,0,53.1000,S,PG
4,0,3,male,35.0,0,0,8.0500,S,UG
...,...,...,...,...,...,...,...,...,...
395,0,3,male,22.0,0,0,7.7958,S,PG
396,0,3,female,31.0,0,0,7.8542,S,PG
397,0,2,male,46.0,0,0,26.0000,S,PG
398,0,2,male,23.0,0,0,10.5000,S,UG


# 2. Clean the data

##### 1. Age -> mean impute ; Embarked -> median impute
##### 2. Fare -> Standard Scaler ;  Age -> MinMax Scaler
##### 3. Sex, Embaked -> OHE
##### 4. education -> Ordinal 
##### 5. Label Encoding
##### 5. Feature Selection
##### 6. Model train 

In [3]:
df.isnull().mean() * 100

Survived      0.00
Pclass        0.00
Sex           0.00
Age          19.50
SibSp         0.00
Parch         0.00
Fare          0.00
Embarked      0.25
education     0.00
dtype: float64

In [4]:
age_pipe = Pipeline([
    ('imputer', SimpleImputer()),
    ('scaling', MinMaxScaler())
])
# Fare (scale)
fare_pipe = Pipeline([
    ('scaling', StandardScaler())
])
# Embarked (impute,OHE)
embk_pipe = Pipeline([
    ('impute', SimpleImputer(strategy='most_frequent')),
    ('ohe', OneHotEncoder(drop='first', sparse_output=False, dtype=np.int32))
])
# Sex (OHE)
sex_pipe = Pipeline([
    ('ohe', OneHotEncoder(drop='first', sparse_output=False, dtype=np.int32))
])
# Education (ordinal)
edu_pipe = Pipeline([
    ("ordinal", OrdinalEncoder(categories=[['School', 'UG', 'PG']]))
])

In [5]:
# Preprocessor
preprocessor = ColumnTransformer(transformers=[
    ('age_pipe', age_pipe, ['Age']),
    ('fare_pipe', fare_pipe, ['Fare']),
    ('embk_pipe', embk_pipe, ['Embarked']),
    ('sex_pipe', sex_pipe, ['Sex']),
    ('edu_pipe', edu_pipe, ['education']),
], remainder='passthrough')


In [6]:
X = preprocessor.fit_transform(df.iloc[:,1:])
X

array([[ 0.30169588, -0.51903614,  0.        , ...,  3.        ,
         1.        ,  0.        ],
       [ 0.52971355,  0.75965824,  0.        , ...,  1.        ,
         1.        ,  0.        ],
       [ 0.3587003 , -0.50555692,  0.        , ...,  3.        ,
         0.        ,  0.        ],
       ...,
       [ 0.64372239, -0.14461346,  0.        , ...,  2.        ,
         0.        ,  0.        ],
       [ 0.31594699, -0.45413621,  0.        , ...,  2.        ,
         0.        ,  0.        ],
       [ 0.38720251, -0.41120241,  0.        , ...,  2.        ,
         0.        ,  0.        ]], shape=(400, 9))

In [7]:
y = df.iloc[:,0]
y

0      0
1      1
2      1
3      1
4      0
      ..
395    0
396    0
397    0
398    0
399    1
Name: Survived, Length: 400, dtype: int64

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2,random_state=42)

In [9]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((320, 9), (80, 9), (320,), (80,))

In [10]:
import torch

X_train = torch.from_numpy(np.asarray(X_train).astype(np.float32))
X_test = torch.from_numpy(np.asarray(X_test).astype(np.float32))
y_train = torch.from_numpy(np.asarray(y_train).astype(np.float32))
y_test = torch.from_numpy(np.asarray(y_test).astype(np.float32))

In [11]:
X_train

tensor([[ 0.4870,  0.3966,  0.0000,  ...,  1.0000,  1.0000,  0.0000],
        [ 0.4300, -0.3044,  0.0000,  ...,  3.0000,  1.0000,  0.0000],
        [ 0.4727, -0.5341,  0.0000,  ...,  3.0000,  0.0000,  0.0000],
        ...,
        [ 0.3969, -0.0448,  0.0000,  ...,  1.0000,  0.0000,  0.0000],
        [ 0.0309, -0.3463,  0.0000,  ...,  3.0000,  1.0000,  1.0000],
        [ 0.2874,  0.8796,  0.0000,  ...,  1.0000,  0.0000,  1.0000]])

# 3. Dataload

In [12]:
from torch.utils.data import Dataset, DataLoader

In [13]:
class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
    
    def __len__(self):
        return self.features.shape[0]
    
    def __getitem__(self, index):
        return self.features[index], self.labels[index]

In [14]:
train_dataset = CustomDataset(X_train,y_train)
test_dataset = CustomDataset(X_test,y_test)

In [15]:
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True)

# 3. Build Model

In [16]:
import torch.nn as nn

class MySimplePerceptron(nn.Module):
    def __init__(self, X_train):
        super().__init__()
        
        self.network = nn.Sequential(
            # Layer-1
            nn.Linear(X_train.shape[1], 3),
            nn.ReLU(),
            
            # Layer-2
            nn.Linear(3,1),
            nn.Sigmoid()
        )
    
    def forward(self, X_train):
        out = self.network(X_train)
        return out


# 4. Train Model

In [17]:
learning_rate = 0.1
epochs = 100

In [18]:
model = MySimplePerceptron(X_train)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

loss_function = nn.BCELoss()

In [19]:

for epoch in range(epochs):
    
    for batch_features, batch_labels in train_dataloader:
    
        #forward propagation
        y_pred = model.forward(batch_features)
        
        # loss calculate
        loss = loss_function(y_pred, batch_labels.view(-1,1))
        
        # reinitialize gradient
        optimizer.zero_grad()
        
        # backpropagation
        loss.backward()
        
        # update weight and bias
        optimizer.step()

    print(f"Epoch: {epoch + 1}, Loss: {loss.item()}")

Epoch: 1, Loss: 0.7371950149536133
Epoch: 2, Loss: 0.6871441602706909
Epoch: 3, Loss: 0.6676521897315979
Epoch: 4, Loss: 0.639999508857727
Epoch: 5, Loss: 0.6008161306381226
Epoch: 6, Loss: 0.6349932551383972
Epoch: 7, Loss: 0.6550030708312988
Epoch: 8, Loss: 0.5849601030349731
Epoch: 9, Loss: 0.678916871547699
Epoch: 10, Loss: 0.5187326669692993
Epoch: 11, Loss: 0.5572418570518494
Epoch: 12, Loss: 0.36330628395080566
Epoch: 13, Loss: 0.5379130840301514
Epoch: 14, Loss: 0.44811999797821045
Epoch: 15, Loss: 0.5411677360534668
Epoch: 16, Loss: 0.44266319274902344
Epoch: 17, Loss: 0.36884161829948425
Epoch: 18, Loss: 0.6695500612258911
Epoch: 19, Loss: 0.3509647250175476
Epoch: 20, Loss: 0.3777262270450592
Epoch: 21, Loss: 0.480254590511322
Epoch: 22, Loss: 0.49191078543663025
Epoch: 23, Loss: 0.5454958081245422
Epoch: 24, Loss: 0.49159595370292664
Epoch: 25, Loss: 0.4911718964576721
Epoch: 26, Loss: 0.43365371227264404
Epoch: 27, Loss: 0.3486095666885376
Epoch: 28, Loss: 0.49883314967155

# 5. Evaluate Model 

In [20]:
model.eval()

MySimplePerceptron(
  (network): Sequential(
    (0): Linear(in_features=9, out_features=3, bias=True)
    (1): ReLU()
    (2): Linear(in_features=3, out_features=1, bias=True)
    (3): Sigmoid()
  )
)

In [21]:
accuracy_list = []
for batch_features, batch_labels in test_dataloader:
    with torch.no_grad():
        y_pred = model.forward(batch_features)
        y_pred = (y_pred > 0.8).float()
        batch_accuracy = (batch_labels == y_pred).float().mean().item()
        
        accuracy_list.append(batch_accuracy)
print(f"Accuracy: {sum(accuracy_list)/len(accuracy_list)}")

Accuracy: 0.6354166666666666


In [22]:
accuracy_list = []
for batch_features, batch_labels in train_dataloader:
    with torch.no_grad():
        y_pred = model.forward(batch_features)
        y_pred = (y_pred > 0.8).float()
        batch_accuracy = (batch_labels == y_pred).float().mean().item()
        
        accuracy_list.append(batch_accuracy)
print(f"Accuracy: {sum(accuracy_list)/len(accuracy_list)}")

Accuracy: 0.5517578125
