<a href="https://colab.research.google.com/github/databyhuseyn/DeepLearning/blob/main/Titanic_SK_Learn_vs_TF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
train = pd.read_csv('/content/train.csv')
test = pd.read_csv('/content/test.csv')

In [None]:
if train['Survived'].isna().sum() > 0:
  train.dropna(subset=['Survived'])

In [None]:
train.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1, inplace=True)
test.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1, inplace=True)

In [None]:
X_train_full = train.drop(['Survived'], axis=1)
y_train_full = train['Survived']

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, test_size=0.2, stratify=y_train_full)

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier

In [None]:
cat_features = X_train.select_dtypes(exclude=[np.number]).columns
num_features = X_train.select_dtypes(include=[np.number]).columns

In [None]:
cat_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='constant', fill_value='Missing')),
    ('onehot', OneHotEncoder(handle_unknown ='ignore', sparse_output=False))
])

num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

transformer = ColumnTransformer([
    ('cat', cat_pipeline, cat_features),
    ('num', num_pipeline, num_features)
], remainder='passthrough')

full_pipeline = Pipeline([
    ('preprocessing', transformer),
    ('rfc', RandomForestClassifier())
])

In [None]:
full_pipeline.fit(X_train, y_train)

In [None]:
X_train

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
552,3,male,,0,0,7.8292,Q
72,2,male,21.0,0,0,73.5000,S
537,1,female,30.0,0,0,106.4250,C
571,1,female,53.0,2,0,51.4792,S
73,3,male,26.0,1,0,14.4542,C
...,...,...,...,...,...,...,...
378,3,male,20.0,0,0,4.0125,C
173,3,male,21.0,0,0,7.9250,S
546,2,female,19.0,1,0,26.0000,S
282,3,male,16.0,0,0,9.5000,S


In [None]:
X_valid

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
130,3,male,33.0,0,0,7.8958,C
258,1,female,35.0,0,0,512.3292,C
223,3,male,,0,0,7.8958,S
529,2,male,23.0,2,1,11.5000,S
611,3,male,,0,0,7.0500,S
...,...,...,...,...,...,...,...
721,3,male,17.0,1,0,7.0542,S
583,1,male,36.0,0,0,40.1250,C
317,2,male,54.0,0,0,14.0000,S
244,3,male,30.0,0,0,7.2250,C


In [None]:
full_pipeline.score(X_train, y_train)

0.9859550561797753

In [None]:
full_pipeline.score(X_valid, y_valid)

0.8156424581005587

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
param_grid = {
    'rfc__n_estimators': [500, 700, 900],
    'rfc__criterion': ['gini', 'entropy', 'log_loss'],
    'rfc__n_jobs' : [-1],
    'rfc__max_features': ['sqrt', 'log2']
}

grid_cv = GridSearchCV(full_pipeline, param_grid, cv = 5)
grid_cv.fit(X_train, y_train)

In [None]:
grid_cv.best_params_

{'rfc__criterion': 'log_loss',
 'rfc__max_features': 'log2',
 'rfc__n_estimators': 500,
 'rfc__n_jobs': -1}

In [None]:
grid_cv.best_estimator_.fit(X_train, y_train)

In [None]:
grid_cv.best_estimator_.score(X_train, y_train) , grid_cv.best_estimator_.score(X_valid, y_valid)

(0.9859550561797753, 0.8156424581005587)

In [None]:
import tensorflow as tf

In [None]:
X_train.shape[1:][0]

7

In [None]:
model = tf.keras.Sequential([
    tf.keras.Input(shape=[11]),
    tf.keras.layers.Dense(300, activation='relu'),
    tf.keras.layers.Dense(100, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
# model.fit(tf.constant(X_train), tf.constant(y_train))

In [None]:
from tensorflow.keras.metrics import F1Score

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)


model.compile(loss='BinaryCrossentropy', optimizer=optimizer, metrics=['accuracy'])

In [None]:
X_train_transformed = transformer.fit_transform(X_train)

In [None]:
X_train_transformed_df = pd.DataFrame(X_train_transformed, columns = transformer.get_feature_names_out())

In [None]:
X_train_transformed_df

Unnamed: 0,cat__Sex_female,cat__Sex_male,cat__Embarked_C,cat__Embarked_Missing,cat__Embarked_Q,cat__Embarked_S,num__Pclass,num__Age,num__SibSp,num__Parch,num__Fare
0,0.0,1.0,0.0,0.0,1.0,0.0,0.840740,-0.103876,-0.460719,-0.459792,-0.487963
1,0.0,1.0,0.0,0.0,0.0,1.0,-0.368567,-0.649988,-0.460719,-0.459792,0.892853
2,1.0,0.0,1.0,0.0,0.0,0.0,-1.577873,0.052156,-0.460719,-0.459792,1.585146
3,1.0,0.0,0.0,0.0,0.0,1.0,-1.577873,1.846522,1.419121,-0.459792,0.429837
4,0.0,1.0,1.0,0.0,0.0,0.0,0.840740,-0.259908,0.479201,-0.459792,-0.348664
...,...,...,...,...,...,...,...,...,...,...,...
707,0.0,1.0,1.0,0.0,0.0,0.0,0.840740,-0.728004,-0.460719,-0.459792,-0.568214
708,0.0,1.0,0.0,0.0,0.0,1.0,0.840740,-0.649988,-0.460719,-0.459792,-0.485949
709,1.0,0.0,0.0,0.0,0.0,1.0,-0.368567,-0.806019,0.479201,-0.459792,-0.105898
710,0.0,1.0,0.0,0.0,0.0,1.0,0.840740,-1.040067,-0.460719,-0.459792,-0.452832


In [None]:
model.compile(loss='BinaryCrossentropy', optimizer=optimizer, metrics=['accuracy'])
model.fit(X_train_transformed_df, y_train, epochs=200)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.src.callbacks.History at 0x7bacbbb03f70>

In [None]:
inputs = tf.keras.Input(shape=[11])
hidden1 = tf.keras.layers.Dense(1024, activation='relu')(inputs)
hidden2 = tf.keras.layers.Dense(512, activation='relu')(hidden1)
hidden3 = tf.keras.layers.Dense(256, activation='relu')(hidden2)
main_output = tf.keras.layers.Dense(1, activation='sigmoid')(hidden3)

In [None]:
model = tf.keras.Model(inputs, main_output)

In [None]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 11)]              0         
                                                                 
 dense_3 (Dense)             (None, 1024)              12288     
                                                                 
 dense_4 (Dense)             (None, 512)               524800    
                                                                 
 dense_5 (Dense)             (None, 256)               131328    
                                                                 
 dense_6 (Dense)             (None, 1)                 257       
                                                                 
Total params: 668673 (2.55 MB)
Trainable params: 668673 (2.55 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
model.compile(loss='BinaryCrossentropy', optimizer=optimizer, metrics='Accuracy')

In [None]:
model.fit(X_train_transformed_df, y_train, epochs=200)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.src.callbacks.History at 0x7bac6b9837f0>

In [None]:
X_valid_transformed = transformer.transform(X_valid)
X_valid_transformed_df = pd.DataFrame(X_valid_transformed, columns = transformer.get_feature_names_out())
X_valid_transformed_df

Unnamed: 0,cat__Sex_female,cat__Sex_male,cat__Embarked_C,cat__Embarked_Missing,cat__Embarked_Q,cat__Embarked_S,num__Pclass,num__Age,num__SibSp,num__Parch,num__Fare
0,0.0,1.0,1.0,0.0,0.0,0.0,0.840740,0.286203,-0.460719,-0.459792,-0.486563
1,1.0,0.0,1.0,0.0,0.0,0.0,-1.577873,0.442235,-0.460719,-0.459792,10.119825
2,0.0,1.0,0.0,0.0,0.0,1.0,0.840740,-0.103876,-0.460719,-0.459792,-0.486563
3,0.0,1.0,0.0,0.0,0.0,1.0,-0.368567,-0.493956,1.419121,0.865601,-0.410780
4,0.0,1.0,0.0,0.0,0.0,1.0,0.840740,-0.103876,-0.460719,-0.459792,-0.504347
...,...,...,...,...,...,...,...,...,...,...,...
174,0.0,1.0,0.0,0.0,0.0,1.0,0.840740,-0.962051,0.479201,-0.459792,-0.504258
175,0.0,1.0,1.0,0.0,0.0,0.0,-1.577873,0.520251,-0.460719,-0.459792,0.191099
176,0.0,1.0,0.0,0.0,0.0,1.0,-0.368567,1.924538,-0.460719,-0.459792,-0.358214
177,0.0,1.0,1.0,0.0,0.0,0.0,0.840740,0.052156,-0.460719,-0.459792,-0.500667


In [None]:
X_valid

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
130,3,male,33.0,0,0,7.8958,C
258,1,female,35.0,0,0,512.3292,C
223,3,male,,0,0,7.8958,S
529,2,male,23.0,2,1,11.5000,S
611,3,male,,0,0,7.0500,S
...,...,...,...,...,...,...,...
721,3,male,17.0,1,0,7.0542,S
583,1,male,36.0,0,0,40.1250,C
317,2,male,54.0,0,0,14.0000,S
244,3,male,30.0,0,0,7.2250,C


In [None]:
y_valid

130    0
258    1
223    0
529    0
611    0
      ..
721    0
583    0
317    0
244    0
554    1
Name: Survived, Length: 179, dtype: int64

In [None]:
import shutil

shutil.rmtree("my_keras_model", ignore_errors=True)

In [None]:
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint('My checkpoints',
                                   save_weights_only=True)

early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10,
                                                     restore_best_weights=True)

In [None]:
history = model.fit(X_train_transformed_df, y_train, epochs=1000,
          validation_data=(X_valid_transformed_df, y_valid), callbacks=[checkpoint_cb, early_stopping_cb])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000


In [None]:
class shallowTitanic(tf.keras.Model):
  def __init__(self, unit=30, activation='relu', **kwargs):
    super().__init__(**kwargs)
    self.inputs = tf.keras.Input(shape=[11])
    self.hidden1 = tf.keras.layers.Dense(unit, activation='relu')
    self.hidden2 = tf.keras.layers.Dense(unit, activation='relu')
    self.hidden3 = tf.keras.layers.Dense(unit, activation='relu')
    self.outputs = tf.keras.layers.Dense(1, activation='sigmoid')

  def call(self, inputs):
    layer1 = self.hidden1(inputs)
    layer2 = self.hidden2(layer1)
    layer3 = self.hidden3(layer2)
    output = self.outputs(layer3)
    return output

tf.random.set_seed(42)
model = shallowTitanic()

In [None]:
tf.keras.backend.clear_session()

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
model.compile(loss='BinaryCrossentropy', optimizer=optimizer, metrics=['accuracy'])
model.fit(X_train_transformed_df, y_train, epochs=1000,
          validation_data=(X_valid_transformed_df, y_valid),
          callbacks=[checkpoint_cb, early_stopping_cb])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000


<keras.src.callbacks.History at 0x7bacb3899f60>