In [2]:
import torch
import numpy as np
import pandas as pd
import torch.nn.functional as F

In [27]:
def preprocess_data(data_path):
    data = pd.read_csv(data_path)
    features = data.columns.to_list()
    features.remove('PassengerId')
    features.remove('Name')
    features.remove('Ticket')
    features.remove('Embarked')
    features.remove('Parch')
    features.remove('Cabin')
    features.remove('Pclass')
    features.remove('Survived')
    x_data = data[features]
    y_data = data['Survived']
    x_data['Sex'] = x_data['Sex'].apply(lambda x:0 if x=='male' else 1)
    x_data['Age'].fillna(x_data['Age'].mean(),inplace=True)
    x_data['Fare'].fillna(x_data['Fare'].median(),inplace=True)
    x_data = torch.tensor(x_data.to_numpy(),dtype=torch.float32) ## Computation on top of this array will be executed on GPU
    y_data = torch.tensor(y_data.to_numpy(),dtype=torch.float32)
    return x_data,y_data

In [28]:
x_data,y_data = preprocess_data('tested.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_data['Sex'] = x_data['Sex'].apply(lambda x:0 if x=='male' else 1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_data['Age'].fillna(x_data['Age'].mean(),inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_data['Fare'].fillna(x_data['Fare'].median(),inplace=True)


In [29]:
x_data.shape,y_data.shape

(torch.Size([418, 4]), torch.Size([418]))

In [138]:
def forward(X,Y,W,b):
    Y_pred = X @ W + b
    Y_pred = F.sigmoid(Y_pred)
    # Y_pred.detach().apply_(lambda x:0 if x<0.5 else 1)
    # Y_pred.requires_grad = True
    loss = F.binary_cross_entropy(Y_pred,Y)
    return loss

In [161]:
def backward(W,b,loss):
    W.grad = None
    b.grad = None
    loss.backward()
    W.data -= (0.001)*W.grad
    b.data -= (0.001)*b.grad
    return W,b

In [162]:
def train(X,Y):
    W = torch.randn((4,),dtype=torch.float32,requires_grad=True)
    b = torch.randn((1,),dtype=torch.float32,requires_grad=True)
    for iteration in range(100):
        loss = forward(X,Y,W,b)
        W,b = backward(W,b,loss)
        print(f"Iteration {iteration}: loss = {loss}")
    return W,b

In [163]:
# NUmber of epochs = 100
# 1 Epoch --> Traversing whole dataset once
# How much we are moving W,b towards minimum loss is called learning rate.


In [164]:
W,b = train(x_data,y_data)

Iteration 0: loss = 3.1151912212371826
Iteration 1: loss = 2.758324384689331
Iteration 2: loss = 2.49771785736084
Iteration 3: loss = 2.3199093341827393
Iteration 4: loss = 2.1997387409210205
Iteration 5: loss = 2.3069539070129395
Iteration 6: loss = 2.224912166595459
Iteration 7: loss = 2.153271198272705
Iteration 8: loss = 2.0891621112823486
Iteration 9: loss = 2.030573606491089
Iteration 10: loss = 1.976215124130249
Iteration 11: loss = 1.9252406358718872
Iteration 12: loss = 1.87703537940979
Iteration 13: loss = 1.8310773372650146
Iteration 14: loss = 1.7870163917541504
Iteration 15: loss = 1.7445131540298462
Iteration 16: loss = 1.7034159898757935
Iteration 17: loss = 1.6635257005691528
Iteration 18: loss = 1.624699354171753
Iteration 19: loss = 1.5868898630142212
Iteration 20: loss = 1.550022840499878
Iteration 21: loss = 1.5140366554260254
Iteration 22: loss = 1.4789166450500488
Iteration 23: loss = 1.4446383714675903
Iteration 24: loss = 1.4111815690994263
Iteration 25: loss = 

In [165]:
W

tensor([ 0.2948, -0.0368,  0.5827,  0.0066], requires_grad=True)

In [166]:
b

tensor([0.0833], requires_grad=True)

In [167]:
## Forward Progagation

In [168]:
## Backward Propagation

In [None]:
## Gradient Descent

In [3]:
df = pd.read_csv("tested.csv")

In [4]:
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,0,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,1,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0000,,S
2,894,0,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,0,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,1,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
413,1305,0,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,S
414,1306,1,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,C
415,1307,0,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S
416,1308,0,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,S


In [5]:
features = df.columns.to_list()

In [6]:
features.remove('Name')
features.remove('PassengerId')
features.remove('Ticket')
features.remove('Cabin')
features.remove('Embarked')

In [7]:
features.remove('Survived')

In [9]:
features.remove('Pclass')
features.remove('Parch')

In [10]:
df[features]

Unnamed: 0,Sex,Age,SibSp,Fare
0,male,34.5,0,7.8292
1,female,47.0,1,7.0000
2,male,62.0,0,9.6875
3,male,27.0,0,8.6625
4,female,22.0,1,12.2875
...,...,...,...,...
413,male,,0,8.0500
414,female,39.0,0,108.9000
415,male,38.5,0,7.2500
416,male,,0,8.0500


In [11]:
def 

In [21]:
df1['Sex'] = df1['Sex'].apply(lambda x: 0 if x == "male" else 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['Sex'] = df1['Sex'].apply(lambda x: 0 if x == "male" else 1)


In [22]:
df1

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare
0,3,0,34.5,0,0,7.8292
1,3,1,47.0,1,0,7.0000
2,2,0,62.0,0,0,9.6875
3,3,0,27.0,0,0,8.6625
4,3,1,22.0,1,1,12.2875
...,...,...,...,...,...,...
413,3,0,,0,0,8.0500
414,1,1,39.0,0,0,108.9000
415,3,0,38.5,0,0,7.2500
416,3,0,,0,0,8.0500


In [27]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Pclass  418 non-null    int64  
 1   Sex     418 non-null    int64  
 2   Age     332 non-null    float64
 3   SibSp   418 non-null    int64  
 4   Parch   418 non-null    int64  
 5   Fare    417 non-null    float64
dtypes: float64(2), int64(4)
memory usage: 19.7 KB


In [28]:
df1['Age'].fillna(df1['Age'].mean(),inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['Age'].fillna(df1['Age'].mean(),inplace=True)


In [29]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Pclass  418 non-null    int64  
 1   Sex     418 non-null    int64  
 2   Age     418 non-null    float64
 3   SibSp   418 non-null    int64  
 4   Parch   418 non-null    int64  
 5   Fare    417 non-null    float64
dtypes: float64(2), int64(4)
memory usage: 19.7 KB


In [31]:
df1['Fare'].fillna(df1['Fare'].median(),inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['Fare'].fillna(df1['Fare'].median(),inplace=True)


In [32]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Pclass  418 non-null    int64  
 1   Sex     418 non-null    int64  
 2   Age     418 non-null    float64
 3   SibSp   418 non-null    int64  
 4   Parch   418 non-null    int64  
 5   Fare    418 non-null    float64
dtypes: float64(2), int64(4)
memory usage: 19.7 KB


In [55]:
X_df = df1

In [56]:
Y_df = df['Survived']

In [57]:
Y_df

0      0
1      1
2      0
3      0
4      1
      ..
413    0
414    1
415    0
416    0
417    0
Name: Survived, Length: 418, dtype: int64

In [58]:
X_df = torch.tensor(X_df.to_numpy(),dtype=torch.float32)

In [59]:
Y_df = torch.tensor(Y_df.to_numpy(),dtype=torch.float32)

In [60]:
X_df.shape

torch.Size([418, 6])

In [61]:
X_df.dtype

torch.float32

In [62]:
Y_df.shape

torch.Size([418])

In [145]:
W = torch.randn((6,1),dtype=torch.float32)

In [146]:
W.dtype

torch.float32

In [147]:
dot = X_df @ W

In [148]:
b = torch.rand(1)

In [149]:
y_pred = dot + b

In [150]:
loss = F.mse_loss(y_pred,Y_df)

  loss = F.mse_loss(y_pred,Y_df)


In [151]:
W.requires_grad = True
W.grad = None
loss.backward()

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [136]:
W.grad

  W.grad


In [129]:
W = W - 0.01*W.grad

In [86]:
W

tensor([[ -0.9421],
        [  0.2403],
        [-39.4923],
        [  0.2070],
        [ -0.2807],
        [-79.6047]], grad_fn=<SubBackward0>)

In [79]:
help(F.mse_loss)

Help on function mse_loss in module torch.nn.functional:

mse_loss(input: torch.Tensor, target: torch.Tensor, size_average: Optional[bool] = None, reduce: Optional[bool] = None, reduction: str = 'mean') -> torch.Tensor
    mse_loss(input, target, size_average=None, reduce=None, reduction='mean') -> Tensor
    
    Measures the element-wise mean squared error.
    
    See :class:`~torch.nn.MSELoss` for details.



tensor([[46.9952, 45.9952, 46.9952,  ..., 46.9952, 46.9952, 46.9952],
        [62.1034, 61.1034, 62.1034,  ..., 62.1034, 62.1034, 62.1034],
        [77.6706, 76.6706, 77.6706,  ..., 77.6706, 77.6706, 77.6706],
        ...,
        [51.1661, 50.1661, 51.1661,  ..., 51.1661, 51.1661, 51.1661],
        [42.3687, 41.3687, 42.3687,  ..., 42.3687, 42.3687, 42.3687],
        [51.4710, 50.4710, 51.4710,  ..., 51.4710, 51.4710, 51.4710]],
       grad_fn=<SubBackward0>)