# Multi-Layer Perceptron

In [1]:
import pandas as pd 

data = pd.read_csv("../data/2008_births.csv")
data


Unnamed: 0,INST,RPLACE,RCOUNTY,PLURAL,BDATE,BMONTH,BDAY,BYEAR,SEX,RACE,...,MOTHERTR,IANEMIA,BINJURY,FAS,HYALINE,ASPIRATE,VENTLESS,VENTMORE,ISEIZURE,OTHINF
0,1,6800,68,1,2008-01-01,1,1,2008,2,1,...,2,0,0,0,0,0,0,0,0,0
1,1,160,1,1,2008-01-02,1,2,2008,2,2,...,2,0,0,0,0,0,0,0,0,0
2,1,190,1,1,2008-01-02,1,2,2008,1,1,...,2,0,0,0,0,0,0,0,0,0
3,1,4100,41,1,2008-01-03,1,3,2008,2,1,...,2,0,0,0,0,0,0,0,0,0
4,1,160,1,1,2008-01-03,1,3,2008,2,1,...,2,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
133417,1,2000,20,1,2008-12-19,12,19,2008,1,1,...,2,9,9,9,9,9,9,9,9,9
133418,1,2000,20,1,2008-12-22,12,22,2008,2,1,...,2,9,9,9,9,9,9,9,9,9
133419,1,2600,26,1,2008-12-26,12,26,2008,1,1,...,2,9,9,9,9,9,9,9,9,9
133420,1,2000,20,1,2008-12-30,12,30,2008,2,1,...,2,9,9,9,9,9,9,9,9,9


In [2]:
import numpy as np 
from sklearn.model_selection import train_test_split


In [3]:
#birthweight is what we want to predict - change this to single target 
birth_weight = data[['BPOUND', 'BOUNCE']] 
birth_weight = birth_weight.assign(total_weight = lambda x: birth_weight['BPOUND'] + (birth_weight['BOUNCE']/16))
birth_weight = birth_weight.drop(['BPOUND', 'BOUNCE'], axis = 1) 
birth_weight

Unnamed: 0,total_weight
0,4.0625
1,8.1875
2,9.0000
3,7.3750
4,9.4375
...,...
133417,6.5000
133418,9.1250
133419,8.4375
133420,5.8125


In [4]:
#PCA might be a good technique to select predictors 

#note that PCA performs best when data is normalized (range b/w 0 and 1)

#It is possible to use categorical and continuous predictors 
#for a regression problem. My understanding is you need to make 
#dummy variables for the binary predictors. 

#Variables that we will need to deal with: 
# BDATE, HISPMOM, HISPDAD

In [5]:
#Attempting PCA on data
#for now I drop the BDATE, HISPMOM AND HISPDAD
data_drop = data.drop(["BDATE", "HISPMOM", "HISPDAD", "BOUNCE", "BPOUND"], axis = 1) #axis = 1 means to drop column not row

In [6]:
#get a list of columns in pandas object 
names_of_data = data_drop.columns.tolist()

#shuffle = false prevents data split being different everytime
X_train, X_test, y_train, y_test = train_test_split(data_drop, birth_weight, test_size = 0.2, shuffle = False)

#split test into validate and test, again making sure the data is always the same for consistency
##X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, shuffle = False)

#Normalizing the data
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

#running the actual PCA
from sklearn.decomposition import PCA

pca = PCA()
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

#relief f algorithm - sorting features 

In [7]:
explained_variance = pca.explained_variance_ratio_
print(len(explained_variance))
print(explained_variance)

120
[3.89856404e-02 3.41089470e-02 3.05310536e-02 2.86690661e-02
 2.39828710e-02 2.07124228e-02 1.81565355e-02 1.70798239e-02
 1.68763289e-02 1.59961073e-02 1.56220702e-02 1.35585642e-02
 1.30251204e-02 1.13087563e-02 1.10457470e-02 1.09427186e-02
 1.06522571e-02 1.03091115e-02 1.02034145e-02 1.01603763e-02
 9.98586972e-03 9.86404012e-03 9.78778404e-03 9.61165628e-03
 9.46902421e-03 9.40630729e-03 9.25092446e-03 9.20797437e-03
 9.16882901e-03 9.11728971e-03 9.08471022e-03 9.05880935e-03
 8.91090184e-03 8.85632587e-03 8.83902308e-03 8.82366452e-03
 8.73228213e-03 8.70972804e-03 8.64475483e-03 8.63888132e-03
 8.60300393e-03 8.57402898e-03 8.54542908e-03 8.51710741e-03
 8.50298288e-03 8.46111398e-03 8.42122923e-03 8.39505222e-03
 8.37548109e-03 8.34046815e-03 8.29732609e-03 8.28947627e-03
 8.24895028e-03 8.22904830e-03 8.20238682e-03 8.12690154e-03
 8.11341630e-03 8.08291392e-03 8.07851589e-03 8.03763212e-03
 8.01473052e-03 7.96613523e-03 7.90999598e-03 7.89944166e-03
 7.83600377e-03 7.82

In [8]:
#Explained variance prints the variance each principal component contributes.
#As we can see, the last 5 contribute very little (maybe we can get rid of?)

#We also want to check for linearity between the input predictors and the output 
#If there is high colinearity, then we want to use ridge regression - A variant of lin regression that has regulatization

#Correlation indicates strength and direction of a linear relationship. let's use this on the predictors 

In [9]:
from sklearn.neural_network import MLPRegressor as reg
nn = reg(activation = 'relu', solver = 'adam', learning_rate_init = 0.01)

In [10]:
y_test[["total_weight"]]

Unnamed: 0,total_weight
106737,7.8125
106738,7.6875
106739,5.1250
106740,8.0000
106741,6.3750
...,...
133417,6.5000
133418,9.1250
133419,8.4375
133420,5.8125


In [11]:

nn.fit(X_train, np.ravel(y_train))

MLPRegressor(learning_rate_init=0.01)

In [12]:
y_pred = nn.predict(X_test)
y_pred

array([8.2060987 , 7.37414802, 5.11214767, ..., 5.43307517, 5.43307517,
       5.43307517])

In [13]:
from sklearn.metrics import mean_squared_error
mean_squared_error(y_pred, y_test)

1.3342447580055667