# "Regression Model"

> "Using fast.ai and Pytorch's modules, building a regression model and Neural Network using Kaggle's titanic survival data set"

- toc: true
- branch: master
- comments : False
- author : Eric Vincent
- categories : [fastpages, jupyter]

In [1]:
import pandas as pd
import numpy as np


In [2]:
df = pd.read_csv('/kaggle/input/titanic-survival-dataset/train.csv')

In [3]:
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [4]:
df.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


In [5]:
df = df.drop(columns=['Name', 'Cabin', 'Ticket', 'PassengerId'])

In [6]:
df['Male'] = df['Sex']

In [7]:
df['Male'] = df['Male'].replace({'male': 1, 'female' : 0})

In [8]:
df['Embarked_C'] = df['Embarked']
df['Embarked_C'] = df['Embarked_C'].replace({'S':0, 'C':1})
df['Embarked_S'] = df['Embarked']
df['Embarked_S'] = df['Embarked_S'].replace({'S':1, 'C':0})
df['Pclass1'] = df['Pclass']
df['Pclass2'] = df['Pclass']
df['Pclass3'] = df['Pclass']
df['Pclass1'] = df['Pclass1'].replace({2:0, 3:0})
df['Pclass2'] = df['Pclass2'].replace({1:0, 3:0, 2:1})
df['Pclass3'] = df['Pclass3'].replace({1:0, 2:0, 3:1})

In [9]:
df

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Male,Embarked_C,Embarked_S,Pclass1,Pclass2,Pclass3
0,0,3,male,22.0,1,0,7.2500,S,1,0,1,0,0,1
1,1,1,female,38.0,1,0,71.2833,C,0,1,0,1,0,0
2,1,3,female,26.0,0,0,7.9250,S,0,0,1,0,0,1
3,1,1,female,35.0,1,0,53.1000,S,0,0,1,1,0,0
4,0,3,male,35.0,0,0,8.0500,S,1,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,1,0,1,0,1,0
887,1,1,female,19.0,0,0,30.0000,S,0,0,1,1,0,0
888,0,3,female,,1,2,23.4500,S,0,0,1,0,0,1
889,1,1,male,26.0,0,0,30.0000,C,1,1,0,1,0,0


In [10]:
random_parameters = np.random.rand(1,10)

In [11]:
Sibsp, Parch, Age, log_fare, Pclass1, Pclass2, EmbarkS, EmbarkC, Male, Const = [num for num in random_parameters[0]]

In [12]:
Sibsp

0.6427942275076264

In [13]:
Parch

0.15150387891218575

In [14]:
maxAge, maxFare = max(df['Age']), max(df['Fare'])

In [15]:
maxAge

80.0

In [16]:
df['Age_N'] = df['Age'] / maxAge

In [17]:
df['log_Fare'] = df['Fare'] / maxFare

In [18]:
df

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Male,Embarked_C,Embarked_S,Pclass1,Pclass2,Pclass3,Age_N,log_Fare
0,0,3,male,22.0,1,0,7.2500,S,1,0,1,0,0,1,0.2750,0.014151
1,1,1,female,38.0,1,0,71.2833,C,0,1,0,1,0,0,0.4750,0.139136
2,1,3,female,26.0,0,0,7.9250,S,0,0,1,0,0,1,0.3250,0.015469
3,1,1,female,35.0,1,0,53.1000,S,0,0,1,1,0,0,0.4375,0.103644
4,0,3,male,35.0,0,0,8.0500,S,1,0,1,0,0,1,0.4375,0.015713
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,1,0,1,0,1,0,0.3375,0.025374
887,1,1,female,19.0,0,0,30.0000,S,0,0,1,1,0,0,0.2375,0.058556
888,0,3,female,,1,2,23.4500,S,0,0,1,0,0,1,,0.045771
889,1,1,male,26.0,0,0,30.0000,C,1,1,0,1,0,0,0.3250,0.058556


In [19]:
df['log_Fare'] = np.log10(df['log_Fare'] + 1)

In [103]:
df['log_Fare'] = df['log_Fare'] +1

In [110]:
df = df.drop(columns=['log_fare'])

In [116]:
df['log_Fare'] = df['log_Fare'] + 1 

In [20]:
df

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Male,Embarked_C,Embarked_S,Pclass1,Pclass2,Pclass3,Age_N,log_Fare
0,0,3,male,22.0,1,0,7.2500,S,1,0,1,0,0,1,0.2750,0.006103
1,1,1,female,38.0,1,0,71.2833,C,0,1,0,1,0,0,0.4750,0.056575
2,1,3,female,26.0,0,0,7.9250,S,0,0,1,0,0,1,0.3250,0.006666
3,1,1,female,35.0,1,0,53.1000,S,0,0,1,1,0,0,0.4375,0.042829
4,0,3,male,35.0,0,0,8.0500,S,1,0,1,0,0,1,0.4375,0.006771
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,1,0,1,0,1,0,0.3375,0.010882
887,1,1,female,19.0,0,0,30.0000,S,0,0,1,1,0,0,0.2375,0.024714
888,0,3,female,,1,2,23.4500,S,0,0,1,0,0,1,,0.019437
889,1,1,male,26.0,0,0,30.0000,C,1,1,0,1,0,0,0.3250,0.024714


In [119]:
parameters = {
    Sibsp, Parch, Age, log_fare, Pclass1, Pclass2, EmbarkS, EmbarkC, Male, Const
    
    
}

In [21]:
df['Ones'] = 1

In [125]:
# for each row, calculate the sum product (linear)


Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Embarked_C,Embarked_S,Pclass1,Pclass2,Pclass3,Age_N,log_Fare,Ones
0,0,3,male,22.0,1,0,7.2500,S,0,1,0,0,1,0.2750,1.006103,1
1,1,1,female,38.0,1,0,71.2833,C,1,0,1,0,0,0.4750,1.056575,1
2,1,3,female,26.0,0,0,7.9250,S,0,1,0,0,1,0.3250,1.006666,1
3,1,1,female,35.0,1,0,53.1000,S,0,1,1,0,0,0.4375,1.042829,1
4,0,3,male,35.0,0,0,8.0500,S,0,1,0,0,1,0.4375,1.006771,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,0,1,0,1,0,0.3375,1.010882,1
887,1,1,female,19.0,0,0,30.0000,S,0,1,1,0,0,0.2375,1.024714,1
888,0,3,female,,1,2,23.4500,S,0,1,0,0,1,,1.019437,1
889,1,1,male,26.0,0,0,30.0000,C,1,0,1,0,0,0.3250,1.024714,1


In [127]:
a = np.array([[1,2,1],
            [0,1,0],
            [2,3,4]])

In [128]:
a

array([[1, 2, 1],
       [0, 1, 0],
       [2, 3, 4]])

In [129]:
b = np.array([[2,5], [6,7], [1,8]])

In [130]:
b

array([[2, 5],
       [6, 7],
       [1, 8]])

In [131]:
c = np.dot(a,b)

In [132]:
c

array([[15, 27],
       [ 6,  7],
       [26, 63]])

In [133]:
parameters

{0.023121551427445874,
 0.13402581877095354,
 0.1460823666152794,
 0.22088891772092012,
 0.594795670221624,
 0.7961562768917094,
 0.8530100056367039,
 0.8613248152275507,
 0.8901214376590094,
 0.9785782868782011}

In [22]:
parameters = np.array([0.023121551427445874,
 0.13402581877095354,
 0.1460823666152794,
 0.22088891772092012,
 0.594795670221624,
 0.7961562768917094,
 0.8530100056367039,
 0.8613248152275507,
 0.8901214376590094,
 0.9785782868782011])

In [23]:
parameters

array([0.02312155, 0.13402582, 0.14608237, 0.22088892, 0.59479567,
       0.79615628, 0.85301001, 0.86132482, 0.89012144, 0.97857829])

In [136]:
type(df['log_Fare'])

pandas.core.series.Series

In [138]:
test_array = np.array([num for num in df['log_Fare']])

In [139]:
#collapse_output
test_array

array([1.00610265, 1.05657548, 1.00666649, 1.04282912, 1.00677082,
       1.00711144, 1.04187758, 1.01750732, 1.00933647, 1.02477057,
       1.01393053, 1.02194231, 1.00677082, 1.02573369, 1.00660738,
       1.01335551, 1.02401262, 1.01088243, 1.01499643, 1.00608175,
       1.02149883, 1.01088243, 1.00675346, 1.02909607, 1.01750732,
       1.02582356, 1.00608175, 1.17993703, 1.00662825, 1.00664211,
       1.02288486, 1.10923744, 1.00652038, 1.00881073, 1.06460275,
       1.04198341, 1.00608526, 1.00677082, 1.01499643, 1.00942639,
       1.00795846, 1.01744625, 1.00664211, 1.03388884, 1.00662825,
       1.00677082, 1.0129443 , 1.00652038, 1.01799898, 1.01483262,
       1.03240311, 1.00656213, 1.06060924, 1.02149883, 1.04959606,
       1.02909607, 1.00881073, 1.00608526, 1.02290834, 1.03804073,
       1.00608526, 1.06301403, 1.06555445, 1.02302895, 1.02288486,
       1.0127351 , 1.00881073, 1.00686119, 1.00666649, 1.0072817 ,
       1.00881073, 1.03804073, 1.05822191, 1.01208297, 1.04542

In [24]:
model_df = df

In [25]:
model_df = model_df.drop(columns=['Survived', 'Sex', 'Age', 'Fare', 'Embarked'])

In [26]:
model_df = model_df.drop(columns=['Pclass'])

In [27]:
model_df['Age_N'] = model_df['Age_N'].fillna(0)


In [148]:
model_df.isnull().values.any()

True

In [28]:
model_df

Unnamed: 0,SibSp,Parch,Male,Embarked_C,Embarked_S,Pclass1,Pclass2,Pclass3,Age_N,log_Fare,Ones
0,1,0,1,0,1,0,0,1,0.2750,0.006103,1
1,1,0,0,1,0,1,0,0,0.4750,0.056575,1
2,0,0,0,0,1,0,0,1,0.3250,0.006666,1
3,1,0,0,0,1,1,0,0,0.4375,0.042829,1
4,0,0,1,0,1,0,0,1,0.4375,0.006771,1
...,...,...,...,...,...,...,...,...,...,...,...
886,0,0,1,0,1,0,1,0,0.3375,0.010882,1
887,0,0,0,0,1,1,0,0,0.2375,0.024714,1
888,1,2,0,0,1,0,0,1,0.0000,0.019437,1
889,0,0,1,1,0,1,0,0,0.3250,0.024714,1


In [29]:
model_df = model_df.drop(columns=['Pclass3'])

In [30]:
model_df

Unnamed: 0,SibSp,Parch,Male,Embarked_C,Embarked_S,Pclass1,Pclass2,Age_N,log_Fare,Ones
0,1,0,1,0,1,0,0,0.2750,0.006103,1
1,1,0,0,1,0,1,0,0.4750,0.056575,1
2,0,0,0,0,1,0,0,0.3250,0.006666,1
3,1,0,0,0,1,1,0,0.4375,0.042829,1
4,0,0,1,0,1,0,0,0.4375,0.006771,1
...,...,...,...,...,...,...,...,...,...,...
886,0,0,1,0,1,0,1,0.3375,0.010882,1
887,0,0,0,0,1,1,0,0.2375,0.024714,1
888,1,2,0,0,1,0,0,0.0000,0.019437,1
889,0,0,1,1,0,1,0,0.3250,0.024714,1


In [32]:
model_df['Linear'] = model_df.dot(parameters)

In [31]:
model_df['Embarked_S'] = model_df['Embarked_S'].replace({'Q': 0})
model_df['Embarked_C'] = model_df['Embarked_C'].replace({'Q': 0})

In [172]:
model_df

Unnamed: 0,SibSp,Parch,Embarked_C,Embarked_S,Pclass1,Pclass2,Age_N,log_Fare,Ones,Male
0,1,0,0.0,1.0,0,0,0.2750,1.006103,1,1
1,1,0,1.0,0.0,1,0,0.4750,1.056575,1,0
2,0,0,0.0,1.0,0,0,0.3250,1.006666,1,0
3,1,0,0.0,1.0,1,0,0.4375,1.042829,1,0
4,0,0,0.0,1.0,0,0,0.4375,1.006771,1,1
...,...,...,...,...,...,...,...,...,...,...
886,0,0,0.0,1.0,0,1,0.3375,1.010882,1,1
887,0,0,0.0,1.0,1,0,0.2375,1.024714,1,0
888,1,2,0.0,1.0,0,0,0.0000,1.019437,1,0
889,0,0,1.0,0.0,1,0,0.3250,1.024714,1,1


In [174]:
model_df

Unnamed: 0,SibSp,Parch,Embarked_C,Embarked_S,Pclass1,Pclass2,Age_N,log_Fare,Ones,Male,Linear
0,1,0,0.0,1.0,0,0,0.2750,1.006103,1,1,3.213869
1,1,0,1.0,0.0,1,0,0.4750,1.056575,1,0,2.969355
2,0,0,0.0,1.0,0,0,0.3250,1.006666,1,0,2.255305
3,1,0,0.0,1.0,1,0,0.4375,1.042829,1,0,3.000334
4,0,0,0.0,1.0,0,0,0.4375,1.006771,1,1,3.329937
...,...,...,...,...,...,...,...,...,...,...,...
886,0,0,0.0,1.0,0,1,0.3375,1.010882,1,1,4.044334
887,0,0,0.0,1.0,1,0,0.2375,1.024714,1,0,2.791007
888,1,2,0.0,1.0,0,0,0.0000,1.019437,1,0,2.280250
889,0,0,1.0,0.0,1,0,0.3250,1.024714,1,1,3.769418


In [175]:
 df

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Embarked_C,Embarked_S,Pclass1,Pclass2,Pclass3,Age_N,log_Fare,Ones,Male
0,0,3,male,22.0,1,0,7.2500,S,0,1,0,0,1,0.2750,1.006103,1,1
1,1,1,female,38.0,1,0,71.2833,C,1,0,1,0,0,0.4750,1.056575,1,0
2,1,3,female,26.0,0,0,7.9250,S,0,1,0,0,1,0.3250,1.006666,1,0
3,1,1,female,35.0,1,0,53.1000,S,0,1,1,0,0,0.4375,1.042829,1,0
4,0,3,male,35.0,0,0,8.0500,S,0,1,0,0,1,0.4375,1.006771,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,0,1,0,1,0,0.3375,1.010882,1,1
887,1,1,female,19.0,0,0,30.0000,S,0,1,1,0,0,0.2375,1.024714,1,0
888,0,3,female,,1,2,23.4500,S,0,1,0,0,1,,1.019437,1,0
889,1,1,male,26.0,0,0,30.0000,C,1,0,1,0,0,0.3250,1.024714,1,1


In [33]:
model_df['Survived'] = df['Survived']

In [34]:
model_df

Unnamed: 0,SibSp,Parch,Male,Embarked_C,Embarked_S,Pclass1,Pclass2,Age_N,log_Fare,Ones,Linear,Survived
0,1,0,1,0.0,1.0,0,0,0.2750,0.006103,1,1.984874,0
1,1,0,0,1.0,0.0,1,0,0.4750,0.056575,1,2.478233,1
2,0,0,0,0.0,1.0,0,0,0.3250,0.006666,1,1.859239,1
3,1,0,0,0.0,1.0,1,0,0.4375,0.042829,1,2.807605,1
4,0,0,1,0.0,1.0,0,0,0.4375,0.006771,1,2.102313,0
...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,0,1,0.0,1.0,0,1,0.3375,0.010882,1,2.872850,0
887,0,0,0,0.0,1.0,1,0,0.2375,0.024714,1,2.596093,1
888,1,2,0,0.0,1.0,0,0,0.0000,0.019437,1,1.881848,0
889,0,0,1,1.0,0.0,1,0,0.3250,0.024714,1,2.443635,1


In [35]:
model_df['Loss'] = (model_df['Linear'] - 1)**2

In [36]:
model_df

Unnamed: 0,SibSp,Parch,Male,Embarked_C,Embarked_S,Pclass1,Pclass2,Age_N,log_Fare,Ones,Linear,Survived,Loss
0,1,0,1,0.0,1.0,0,0,0.2750,0.006103,1,1.984874,0,0.969977
1,1,0,0,1.0,0.0,1,0,0.4750,0.056575,1,2.478233,1,2.185174
2,0,0,0,0.0,1.0,0,0,0.3250,0.006666,1,1.859239,1,0.738291
3,1,0,0,0.0,1.0,1,0,0.4375,0.042829,1,2.807605,1,3.267434
4,0,0,1,0.0,1.0,0,0,0.4375,0.006771,1,2.102313,0,1.215093
...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,0,1,0.0,1.0,0,1,0.3375,0.010882,1,2.872850,0,3.507568
887,0,0,0,0.0,1.0,1,0,0.2375,0.024714,1,2.596093,1,2.547514
888,1,2,0,0.0,1.0,0,0,0.0000,0.019437,1,1.881848,0,0.777656
889,0,0,1,1.0,0.0,1,0,0.3250,0.024714,1,2.443635,1,2.084081


In [37]:
model_df['Linear'].mean()

2.255390616421957

In [38]:
survived_label = model_df['Survived'] == 1

In [39]:
survived_label

0      False
1       True
2       True
3       True
4      False
       ...  
886    False
887     True
888    False
889     True
890    False
Name: Survived, Length: 891, dtype: bool

In [40]:
death_label = model_df['Survived'] == 0

In [43]:
survived = model_df.loc[survived_label]

In [44]:
death = model_df.loc[death_label]

In [46]:
import fastbook
fastbook.setup_book()

In [47]:
from fastai.vision.all import *
from fastbook import *

matplotlib.rc('image', cmap='Greys')

In [48]:
death

Unnamed: 0,SibSp,Parch,Male,Embarked_C,Embarked_S,Pclass1,Pclass2,Age_N,log_Fare,Ones,Linear,Survived,Loss
0,1,0,1,0.0,1.0,0,0,0.2750,0.006103,1,1.984874,0,0.969977
4,0,0,1,0.0,1.0,0,0,0.4375,0.006771,1,2.102313,0,1.215093
5,0,0,1,0.0,0.0,0,0,0.0000,0.007111,1,1.130991,0,0.017159
6,0,0,1,0.0,1.0,1,0,0.6750,0.041878,1,3.134283,0,4.555164
7,3,1,1,0.0,1.0,0,0,0.0250,0.017507,1,1.959964,0,0.921530
...,...,...,...,...,...,...,...,...,...,...,...,...,...
884,0,0,1,0.0,1.0,0,0,0.3125,0.005935,1,1.993904,0,0.987844
885,0,5,0,0.0,0.0,0,0,0.4875,0.024013,1,2.089977,0,1.188051
886,0,0,1,0.0,1.0,0,1,0.3375,0.010882,1,2.872850,0,3.507568
888,1,2,0,0.0,1.0,0,0,0.0000,0.019437,1,1.881848,0,0.777656


In [49]:
survived

Unnamed: 0,SibSp,Parch,Male,Embarked_C,Embarked_S,Pclass1,Pclass2,Age_N,log_Fare,Ones,Linear,Survived,Loss
1,1,0,0,1.0,0.0,1,0,0.4750,0.056575,1,2.478233,1,2.185174
2,0,0,0,0.0,1.0,0,0,0.3250,0.006666,1,1.859239,1,0.738291
3,1,0,0,0.0,1.0,1,0,0.4375,0.042829,1,2.807605,1,3.267434
8,0,2,0,0.0,1.0,0,0,0.3375,0.009336,1,2.140433,1,1.300588
9,1,0,0,1.0,0.0,0,1,0.1750,0.024771,1,2.248379,1,1.558451
...,...,...,...,...,...,...,...,...,...,...,...,...,...
875,0,0,0,1.0,0.0,0,0,0.1875,0.006082,1,1.366379,1,0.134234
879,0,1,0,1.0,0.0,1,0,0.7000,0.065324,1,2.790723,1,3.206687
880,0,1,0,0.0,1.0,0,1,0.3125,0.021499,1,2.848710,1,3.417730
887,0,0,0,0.0,1.0,1,0,0.2375,0.024714,1,2.596093,1,2.547514


In [51]:
survived_tensor = tensor(survived)
death_tensor = tensor(death)

In [52]:
survived_tensor

tensor([[1.0000, 0.0000, 0.0000,  ..., 2.4782, 1.0000, 2.1852],
        [0.0000, 0.0000, 0.0000,  ..., 1.8592, 1.0000, 0.7383],
        [1.0000, 0.0000, 0.0000,  ..., 2.8076, 1.0000, 3.2674],
        ...,
        [0.0000, 1.0000, 0.0000,  ..., 2.8487, 1.0000, 3.4177],
        [0.0000, 0.0000, 0.0000,  ..., 2.5961, 1.0000, 2.5475],
        [0.0000, 0.0000, 1.0000,  ..., 2.4436, 1.0000, 2.0841]])

In [55]:
len(survived_tensor), len(death_tensor)

(342, 549)

In [56]:
stacked_survived = torch.stack(survived_tensor).float()
stacked_death = torch.stack(death_tensor).float()

TypeError: stack(): argument 'tensors' (position 1) must be tuple of Tensors, not Tensor

In [67]:
num = range(len(survived))
stacked_survived = [tensor(survived.iloc[num]) for num in num]

In [65]:
num = range(len(survived))

In [66]:
num

range(0, 342)

In [69]:
copydf = model_df

In [71]:
copydf

Unnamed: 0,SibSp,Parch,Male,Embarked_C,Embarked_S,Pclass1,Pclass2,Age_N,log_Fare,Ones,Linear,Survived,Loss
0,1,0,1,0.0,1.0,0,0,0.2750,0.006103,1,1.984874,0,0.969977
1,1,0,0,1.0,0.0,1,0,0.4750,0.056575,1,2.478233,1,2.185174
2,0,0,0,0.0,1.0,0,0,0.3250,0.006666,1,1.859239,1,0.738291
3,1,0,0,0.0,1.0,1,0,0.4375,0.042829,1,2.807605,1,3.267434
4,0,0,1,0.0,1.0,0,0,0.4375,0.006771,1,2.102313,0,1.215093
...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,0,1,0.0,1.0,0,1,0.3375,0.010882,1,2.872850,0,3.507568
887,0,0,0,0.0,1.0,1,0,0.2375,0.024714,1,2.596093,1,2.547514
888,1,2,0,0.0,1.0,0,0,0.0000,0.019437,1,1.881848,0,0.777656
889,0,0,1,1.0,0.0,1,0,0.3250,0.024714,1,2.443635,1,2.084081


In [70]:
print('test')

test


In [72]:
copydf = copydf.drop(columns=['Age_N', 'log_Fare', 'Ones', 'Linear', 'Loss'])

In [73]:
copydf

Unnamed: 0,SibSp,Parch,Male,Embarked_C,Embarked_S,Pclass1,Pclass2,Survived
0,1,0,1,0.0,1.0,0,0,0
1,1,0,0,1.0,0.0,1,0,1
2,0,0,0,0.0,1.0,0,0,1
3,1,0,0,0.0,1.0,1,0,1
4,0,0,1,0.0,1.0,0,0,0
...,...,...,...,...,...,...,...,...
886,0,0,1,0.0,1.0,0,1,0
887,0,0,0,0.0,1.0,1,0,1
888,1,2,0,0.0,1.0,0,0,0
889,0,0,1,1.0,0.0,1,0,1


In [81]:
survived_label = copydf['Survived'] == 1
death_label = copydf['Survived'] == 0
survived = copydf.loc[survived_label]
death = copydf.loc[death_label]
num = range(len(survived))
nums = range(len(death))
stacked_survived = [tensor(survived.iloc[num]) for num in num]
stacked_death = [tensor(death.iloc[nums]) for nums in nums]


In [82]:
survive_tensors_stacked = torch.stack(stacked_survived).float()
death_tensors_stacked = torch.stack(stacked_death).float()


# mean_survived = stacked_death.mean(0)
# mean_death = stacked_survived.mean(0)


In [83]:
survive_tensors_stacked.shape, death_tensors_stacked.shape

(torch.Size([342, 8]), torch.Size([549, 8]))

In [84]:
mean_survived = survive_tensors_stacked.mean(0)
mean_death = death_tensors_stacked.mean(0)

In [85]:
mean_survived

tensor([0.4737, 0.4649, 0.3187,    nan,    nan, 0.3977, 0.2544, 1.0000])

In [89]:
single_survivor = survive_tensors_stacked[1]
single_death = death_tensors_stacked[1]

In [90]:
import torch.nn.functional as F

In [91]:
# distance between survivor and death tensor
F.l1_loss(single_survivor.float(),mean_death), F.mse_loss(single_survivor,mean_death).sqrt()

(tensor(0.4271), tensor(0.5318))

In [92]:
# distance between survivor and survivor tensor
F.l1_loss(single_survivor.float(),mean_survived), F.mse_loss(single_survivor,mean_survived).sqrt()
# need RElu given that data contains many 0? -- > nan output 

(tensor(nan), tensor(nan))

In [93]:
def survive_distance(a,b): 
    return (a-b).abs().mean((-1,-2))
distance_all_survived = survive_distance(survive_tensors_stacked, mean_survived)

In [94]:
distance_all_survived

tensor(nan)