# Neural Network Model

## Step 1: Reading cleaned_cancer_data.csv from Resources folder to create a Pandas DataFrame.

In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from pathlib import Path

#  Import and read the charity_data.csv.
file_path = Path("Resources/cleaned_cancer_data.csv")
data_df = pd.read_csv(file_path)
data_df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Diagnosis Age,Fraction Genome Altered,Mutation Count,Overall Survival Status,TMB (nonsynonymous),Cancer Type Detailed,Prior Cancer Diagnosis Occurence,Smoking History,Sex,Person Cigarette Smoking History Pack Year Value
0,159,159,70,0.4565,189,0:LIVING,6.3,1,0,1,0,38.0
1,160,160,67,0.2221,288,0:LIVING,9.633333,1,0,1,0,52.0
2,161,161,79,0.2362,296,1:DECEASED,9.833333,1,0,1,1,47.0
3,162,162,68,0.0854,1625,0:LIVING,54.233333,1,1,1,0,62.0
4,163,163,66,0.0661,122,0:LIVING,4.066667,1,1,1,0,20.0


In [2]:
# Dropping unneccesary columns
clean_data_df = data_df.drop(["Unnamed: 0.1","Unnamed: 0"],axis=1)
clean_data_df

Unnamed: 0,Diagnosis Age,Fraction Genome Altered,Mutation Count,Overall Survival Status,TMB (nonsynonymous),Cancer Type Detailed,Prior Cancer Diagnosis Occurence,Smoking History,Sex,Person Cigarette Smoking History Pack Year Value
0,70,0.4565,189,0:LIVING,6.300000,1,0,1,0,38.0
1,67,0.2221,288,0:LIVING,9.633333,1,0,1,0,52.0
2,79,0.2362,296,1:DECEASED,9.833333,1,0,1,1,47.0
3,68,0.0854,1625,0:LIVING,54.233333,1,1,1,0,62.0
4,66,0.0661,122,0:LIVING,4.066667,1,1,1,0,20.0
...,...,...,...,...,...,...,...,...,...,...
977,75,0.2382,211,1:DECEASED,7.033333,0,0,1,1,1.0
978,63,0.5420,101,1:DECEASED,3.400000,0,1,1,0,2.5
979,71,0.4405,216,1:DECEASED,7.200000,0,0,1,1,2.5
980,68,0.0598,109,0:LIVING,3.633333,1,0,1,1,95.0


In [3]:
clean_data_df['Overall Survival Status'] = clean_data_df['Overall Survival Status'].replace(
    {'0:LIVING': 0,
     '1:DECEASED': 1
     })

  clean_data_df['Overall Survival Status'] = clean_data_df['Overall Survival Status'].replace(


In [4]:
# Checking data types
clean_data_df.nunique()
clean_data_df.dtypes

Diagnosis Age                                         int64
Fraction Genome Altered                             float64
Mutation Count                                        int64
Overall Survival Status                               int64
TMB (nonsynonymous)                                 float64
Cancer Type Detailed                                  int64
Prior Cancer Diagnosis Occurence                      int64
Smoking History                                       int64
Sex                                                   int64
Person Cigarette Smoking History Pack Year Value    float64
dtype: object

In [5]:
# Step 2: Creating and separating labels (y) and features (X) from dummies DataFrame.
X = clean_data_df.copy()
X.drop("Overall Survival Status",axis=1,inplace=True)
y = clean_data_df["Overall Survival Status"]

# Splitting into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=42)

## Step 3: Scaling the data features (X & y) for the model.

In [6]:
scaler = StandardScaler()

# Fitting StandardScaler
X_scaler = scaler.fit(X_train)

# Scaling the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [30]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])

    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=10,
        step=2), activation=activation, input_dim=len(X_train_scaled[0])))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 4)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int(f'units_{i}',
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))

    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])

    return nn_model

In [29]:
# Import the kerastuner library
import keras_tuner as kt
# Define method to create tuner instance
def run_tuner(epochs):

    tuner = kt.Hyperband(
        create_model,
        objective="val_accuracy",
        max_epochs=30,
        hyperband_iterations=2)
    tuner.search(X_train_scaled,y_train,epochs=30,validation_data=(X_test_scaled,y_test))
    return tuner

In [31]:
# Run the kerastuner search for best hyperparameters
tuner1 = run_tuner(10)

Trial 180 Complete [00h 00m 10s]
val_accuracy: 0.7195122241973877

Best val_accuracy So Far: 0.7317073345184326
Total elapsed time: 00h 24m 12s


In [32]:
# Get best model hyperparameters
best_hyper = tuner1.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'relu',
 'first_units': 9,
 'num_layers': 3,
 'units_0': 5,
 'units_1': 3,
 'units_2': 9,
 'units_3': 3,
 'tuner/epochs': 30,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 0,
 'tuner/round': 0}

In [68]:
best_model = tuner1.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

  saveable.load_own_variables(weights_store.get(inner_path))


8/8 - 0s - 41ms/step - accuracy: 0.7317 - loss: 0.6757
Loss: 0.6756824254989624, Accuracy: 0.7317073345184326


# Step 4: Compile and Train Model

In [67]:
# Create best model based on hyperparameters
def create_best_model():
    nn_model = tf.keras.models.Sequential()

    # Use the best hyperparameters
    activation = 'tanh'
    first_units = 9

    # First hidden layer
    nn_model.add(tf.keras.layers.Dense(units=first_units, activation=activation, input_dim=len(X_train_scaled[0])))

    # Adding the specified number of hidden layers with the given units
    hidden_units = [5, 3, 9]  # Corresponds to units_0, units_1, units_2
    for units in hidden_units:
        nn_model.add(tf.keras.layers.Dense(units=units, activation=activation))

    # Output layer
    nn_model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

    # Compile the model
    nn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    return nn_model

# Create and train the model
best_model = create_best_model()

best_model.fit(X_train_scaled, y_train, epochs=20, validation_data=(X_test_scaled, y_test))

Epoch 1/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.4735 - loss: 0.7573 - val_accuracy: 0.6016 - val_loss: 0.6359
Epoch 2/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5336 - loss: 0.7011 - val_accuracy: 0.6911 - val_loss: 0.6149
Epoch 3/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6300 - loss: 0.6606 - val_accuracy: 0.7033 - val_loss: 0.6023
Epoch 4/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6709 - loss: 0.6343 - val_accuracy: 0.7154 - val_loss: 0.5951
Epoch 5/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7156 - loss: 0.6138 - val_accuracy: 0.7195 - val_loss: 0.5912
Epoch 6/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6947 - loss: 0.6176 - val_accuracy: 0.7195 - val_loss: 0.5897
Epoch 7/20
[1m23/23[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x1e207b2eaa0>

In [69]:
# Evaluate the model using the test data
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

8/8 - 0s - 2ms/step - accuracy: 0.7317 - loss: 0.6757
Loss: 0.6756824254989624, Accuracy: 0.7317073345184326


In [62]:
# Save a copy
best_model.save('Images/best_model.h5')

