In [55]:
# Update sklearn to prevent version mismatches
!pip install sklearn --upgrade



In [56]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib



In [4]:
!pip install tensorflow



In [57]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(40)

In [58]:
# Dependencies
import numpy as np
import pandas as pd

In [59]:
import tensorflow
tensorflow.keras.__version__

'2.2.4-tf'

In [60]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler ,OneHotEncoder
from tensorflow.keras.utils import to_categorical

# Read the CSV and Perform Basic Data Cleaning

In [61]:
#https://www.tensorflow.org/datasets/catalog/titanic
df_tensor = pd.read_csv("data/TensorFlowtitanic.csv")
# Drop the null columns where all values are null
df_tensor =df_tensor.dropna(axis='columns', how='all')
# Drop the null rows
df_tensor = df_tensor.dropna()
df_tensor.head()

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
0,1,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,2,?,"St Louis, MO"
1,1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C22 C26,S,11,?,"Montreal, PQ / Chesterville, ON"
2,1,0,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,?,?,"Montreal, PQ / Chesterville, ON"
3,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S,?,135,"Montreal, PQ / Chesterville, ON"
4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1,2,113781,151.55,C22 C26,S,?,?,"Montreal, PQ / Chesterville, ON"


In [62]:
# Drop the null columns where all values are null
df_tensor = df_tensor.dropna(axis='columns', how='all')
# Drop the null rows
df_tensor = df_tensor.dropna()

In [63]:
df_tensor.count()

pclass       1309
survived     1309
name         1309
sex          1309
age          1309
sibsp        1309
parch        1309
ticket       1309
fare         1309
cabin        1309
embarked     1309
boat         1309
body         1309
home.dest    1309
dtype: int64

In [64]:
df_tensor.columns

Index(['pclass', 'survived', 'name', 'sex', 'age', 'sibsp', 'parch', 'ticket',
       'fare', 'cabin', 'embarked', 'boat', 'body', 'home.dest'],
      dtype='object')

In [65]:
X = df_tensor.drop(["survived", 'parch', 'ticket', 'embarked' ,'name','home.dest','boat','body','cabin'], axis=1)

y= df_tensor["survived"]
print(X.shape, y.shape)

(1309, 5) (1309,)


In [66]:
X


Unnamed: 0,pclass,sex,age,sibsp,fare
0,1,female,29,0,211.3375
1,1,male,0.9167,1,151.55
2,1,female,2,1,151.55
3,1,male,30,1,151.55
4,1,female,25,1,151.55
...,...,...,...,...,...
1304,3,female,14.5,1,14.4542
1305,3,female,?,1,14.4542
1306,3,male,26.5,0,7.225
1307,3,male,27,0,7.225


In [67]:
y


0       1
1       1
2       0
3       0
4       0
       ..
1304    0
1305    0
1306    0
1307    0
1308    0
Name: survived, Length: 1309, dtype: int64

In [68]:
X['sex'] = X['sex'].replace(to_replace='?', value=0)

In [None]:
X


In [69]:
X['age'] = X['age'].replace(to_replace='?', value=0)

X['cabin'] = X['cabin'].replace(to_replace='?', value=0)

In [70]:
X['fare'] = X['fare'].replace(to_replace='?', value=0)

In [71]:
X['sibsp'] = X['sibsp'].replace(to_replace='?', value=0)

In [72]:
X['pclass'] = X['pclass'].replace(to_replace='?', value=0)

In [73]:
X


Unnamed: 0,pclass,sex,age,sibsp,fare
0,1,female,29,0,211.3375
1,1,male,0.9167,1,151.55
2,1,female,2,1,151.55
3,1,male,30,1,151.55
4,1,female,25,1,151.55
...,...,...,...,...,...
1304,3,female,14.5,1,14.4542
1305,3,female,0,1,14.4542
1306,3,male,26.5,0,7.225
1307,3,male,27,0,7.225


In [74]:
data = X.copy()
data_binary_encoded = pd.get_dummies(data, columns=["sex"])
data_binary_encoded.head()

Unnamed: 0,pclass,age,sibsp,fare,sex_female,sex_male
0,1,29.0,0,211.3375,1,0
1,1,0.9167,1,151.55,0,1
2,1,2.0,1,151.55,1,0
3,1,30.0,1,151.55,0,1
4,1,25.0,1,151.55,1,0


In [75]:
new_data  = data_binary_encoded.drop(["sex_female"],axis=1)

In [76]:
new_data

Unnamed: 0,pclass,age,sibsp,fare,sex_male
0,1,29,0,211.3375,0
1,1,0.9167,1,151.55,1
2,1,2,1,151.55,0
3,1,30,1,151.55,1
4,1,25,1,151.55,0
...,...,...,...,...,...
1304,3,14.5,1,14.4542,0
1305,3,0,1,14.4542,0
1306,3,26.5,0,7.225,1
1307,3,27,0,7.225,1


In [20]:
X_train, X_test, y_train, y_test = train_test_split(
    new_data, y, random_state=1)

In [47]:
y_train.value_counts()

0    614
1    367
Name: survived, dtype: int64

In [21]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Del
y_train_categorical = to_categorical(y_train)
y_train_categorical

# Del
y_test_categorical = to_categorical(y_test)
y_test_categorical

In [78]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [79]:
model = Sequential()
model.add(Dense(units=2, activation='relu', input_dim=5))
#model.add(Dense(units=2, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [86]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [87]:
model.fit(
    X_train_scaled,
    y_train,
    epochs=5,
    shuffle=True,
    verbose=2
)

Train on 981 samples
Epoch 1/5
981/981 - 0s - loss: 0.7027 - accuracy: 0.5892
Epoch 2/5
981/981 - 0s - loss: 0.6954 - accuracy: 0.6259
Epoch 3/5
981/981 - 0s - loss: 0.6889 - accuracy: 0.6259
Epoch 4/5
981/981 - 0s - loss: 0.6833 - accuracy: 0.6259
Epoch 5/5
981/981 - 0s - loss: 0.6779 - accuracy: 0.6259


<tensorflow.python.keras.callbacks.History at 0x2ab98a4ceb8>

In [91]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_14 (Dense)             (None, 2)                 12        
_________________________________________________________________
dense_15 (Dense)             (None, 2)                 6         
Total params: 18
Trainable params: 18
Non-trainable params: 0
_________________________________________________________________


In [88]:
#Quantify our Trained Model
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

328/328 - 0s - loss: 0.6821 - accuracy: 0.5945
Normal Neural Network - Loss: 0.6820901923063325, Accuracy: 0.5945122241973877


In [98]:
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_predictions = model.predict_classes(X_test_scaled[:10])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [99]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:10])}")

Predicted classes: [0 0 0 0 0 0 0 0 0 0]
Actual Labels: [0, 0, 1, 0, 1, 0, 0, 1, 1, 0]


In [None]:
OLD

In [None]:
data = X.copy()
data_binary_encoded = pd.get_dummies(data, columns=["sex"])
data_binary_encoded.head()

In [None]:
new_data  = data_binary_encoded.drop(["sex_female"],axis=1)

In [None]:
new_data

In [None]:
new_data

In [None]:
new_data['sex_male'].value_counts()

In [None]:
new_data['cabin'].value_counts()


In [None]:
new_data['fare'].value_counts()

In [None]:
new_data['age'].value_counts()

In [None]:
new_data['cabin'].value_counts()

In [None]:
X['age'] = X['age'].replace(to_replace='?', value=0)


In [None]:
X['age'].value_counts()

In [None]:
X['cabin'] = X['cabin'].replace(to_replace='?', value=0)

In [None]:
X['cabin'].value_counts()

In [None]:
new_data['sex_male'].value_counts()

#Need to undersatnd that
X = pd.get_dummies(X)

In [None]:
y

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)

In [None]:
X_train

In [None]:
X_test.shape


In [None]:
X_train.shape

In [None]:
y_test.shape

In [None]:
y_train.shape

In [None]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

CREATE DEEP LEARNING MODEL

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
model = Sequential()
model.add(Dense(units=6, activation='relu', input_dim=6))
model.add(Dense(units=2, activation='relu'))
model.add(Dense(units=1, activation='softmax'))

In [None]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2
)

# Select your features (columns)

# Save the Model

In [None]:
# save your model by updating "your_name" with your name
# and "your_model" with your model variable
# be sure to turn this in to BCS
# if joblib fails to import, try running the command to install in terminal/git-bash
import joblib
filename = 'your_name.sav'
joblib.dump(your_model, filename)