In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
df = pd.read_csv("formodels.csv", low_memory=False)
df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,False,3,M,22.0,1,0,7.25,S
1,True,1,F,38.0,1,0,71.2833,C
2,True,3,F,26.0,0,0,7.925,S
3,True,1,F,35.0,1,0,53.1,S
4,False,3,M,35.0,0,0,8.05,S


## Imports

In [4]:
from seaborn import heatmap

from sklearn.metrics import mean_squared_error, classification_report, ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from numpy.random import seed
seed(1)
from tensorflow.keras.utils import set_random_seed
set_random_seed(42)

## Train/test

In [5]:
X = df.drop(columns = 'Survived')
y = df['Survived']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

## Pipeline

In [7]:
from sklearn.compose import make_column_selector, make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.decomposition import PCA
from sklearn import set_config
set_config(display='diagram')

In [10]:
scaler = StandardScaler()
ohe = OneHotEncoder(handle_unknown='ignore', sparse=False)

In [8]:
num_selector = make_column_selector(dtype_include='number')
cat_selector = make_column_selector(dtype_include='object')
num_columns = num_selector(X_train)
cat_columns = cat_selector(X_train)

In [11]:
numeric_pipe = make_pipeline(scaler)
numeric_pipe

In [12]:
categorical_pipe = make_pipeline(ohe)
categorical_pipe

In [13]:
number_tuple = (numeric_pipe, num_selector)
category_tuple = (categorical_pipe, cat_selector)
preprocessor = make_column_transformer(number_tuple, category_tuple)
preprocessor

In [16]:
pca = PCA(n_components = .95)
pcapipe = make_pipeline(preprocessor, pca)
pcapipe

In [17]:
X_train_proc = pcapipe.fit_transform(X_train)
X_test_proc = pcapipe.transform(X_test)
print('Shape before PCA:', X_train.shape)
print('Shape after PCA:', pcapipe.fit_transform(X_train).shape)

Shape before PCA: (534, 7)
Shape after PCA: (534, 7)


## Model

In [18]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import metrics

In [19]:
input_shape = X_train_proc.shape[1]
input_shape

7

In [None]:
base_model = Sequential()
#hidden layer 1
base_model.add(Dense(10,
                     input_dim = input_shape,
                     activation = 'relu'))
#hidden layer 2
base_model.add(Dense(10,
                     activation = 'relu'))
base_model.add(Dense(1, activation='linear'))

base_model.compile(loss='mse',
                  optimizer='adam',
                  metrics=[metrics.MeanAbsoluteError(),
                           metrics.RootMeanSquaredError()])