# **Programming Exercise P08**
## **Training, Saving, Loading, and Fine tuning a TensorFlow model**

# Step 1. Import Data from the dataset

In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [15]:
df = pd.read_csv('churn_prediction.csv')

In [16]:
X = pd.get_dummies(df.drop(['Churn', 'Customer ID'], axis=1))
y = df['Churn'].apply(lambda x: 1 if x=='Yes' else 0)

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)

# Step 2. Import Dependencies

In [18]:
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score

# Step 3. Build and Compile Model

In [34]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input

model = Sequential()
# Use Input layer for explicit input shape, addressing Keras 3 warning
model.add(Input(shape=(X_train.shape[1],)))
model.add(Dense(units=32, activation='relu'))
model.add(Dense(units=64, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))

In [48]:
# Re-defining model here to resolve NameError if the upstream model definition cell (ZElq387qCWIX)
# was not executed. In a typical workflow, ensure the model definition cell is run first.
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(units=32, activation='relu', input_dim=X_train.shape[1])) # Changed to X_train.shape[1]
model.add(Dense(units=64, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


# Step 4. Training the model

In [49]:
import numpy as np

# Ensure X_train is a NumPy array of floats
X_train = np.array(X_train, dtype=np.float32)

# Ensure y_train is a NumPy array of integers or floats (depending on the problem type)
y_train = np.array(y_train, dtype=np.float32)  # Use np.int32 or np.int64 for classification problems


# Ensure X_train is a NumPy array of floats
X_test = np.array(X_test, dtype=np.float32)

# Ensure y_train is a NumPy array of integers or floats (depending on the problem type)
y_test = np.array(y_test, dtype=np.float32)  # Use np.int32 or np.int64 for classification problems


In [22]:
model.fit(X_train, y_train, epochs=200, batch_size=32)

Epoch 1/200
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7550 - loss: 0.5266
Epoch 2/200
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7669 - loss: 0.4896
Epoch 3/200
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7803 - loss: 0.4749
Epoch 4/200
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7766 - loss: 0.4807
Epoch 5/200
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7831 - loss: 0.4656
Epoch 6/200
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7707 - loss: 0.4753
Epoch 7/200
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7842 - loss: 0.4674
Epoch 8/200
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7808 - loss: 0.4692
Epoch 9/200
[1m177/177[0m [32

<keras.src.callbacks.history.History at 0x7a7804144bc0>

# Step 5. Making some predictions

In [50]:
y_predictions = model.predict(X_test)
threshold = 0.500
y_predictions = [0 if val < threshold else 1 for val in y_predictions]
accuracy_score(y_test, y_predictions)

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


0.5131298793470547

# Step 5. Saving the trained model

In [51]:
model.save('churn_prediction_model.keras')

### **Question #1** why are we dropping the column 'Churn' from the X dataframe in Step 1?###


### **Answer:**
We drop the 'Churn' column from X because 'Churn' is the target variable (label) we are trying to predict. X represents the features (inputs) fed into the model. If we included 'Churn' in the input data, the model would have access to the answer it is supposed to guess, leading to "data leakage" and a meaningless model that simply memorizes the answer rather than learning patterns.


##**Question #2:** After using Pandas or Excel tools to look at the dataset, explain why we are using the pd.get_dummies() function on the data in Step 1##

### **Answer:**
We use pd.get_dummies() to perform one-hot encoding. Neural networks generally require numerical input. The dataset likely contains categorical variables (strings/text labels) which cannot be mathematically processed by the network. get_dummies converts these categorical text columns into numerical binary columns (0s and 1s) representing the presence or absence of a category.

##**Question #3:** Explain why the conversion of the 'Churn' entries from 'Yes' and 'No' to 0 and 1 is necessary in Step 1. ##


### **Answer:**
This is a binary classification problem. The loss function used later (binary_crossentropy) requires the target labels to be numerical values, specifically 0 and 1, to calculate the error. Converting 'Yes' to 1 and 'No' to 0 allows the model to compare its output probability (a number between 0 and 1) against the true label mathematically.

##**Question #4:** What percentage of the original dataset is set aside for testing in this model?##


### **Answer:**
20% of the dataset is set aside for testing. This is determined by the argument test_size=.2 inside the train_test_split function.

##**Question #5:** In Step 3, how many inputs are fed into our deep learning model?##

### **Answer:**
The number of inputs corresponds exactly to the number of columns (features) in the X_train dataframe after get_dummies was applied. In the code, this is defined by input_dim=len(X_train.columns).

##**Question #6:** In Step 3, how many outputs are there for our deep learning model?##

### **Answer:**
There is 1 output. This is defined in the final layer of the model: model.add(Dense(units=1, ...)).

##**Question #7:** In Step 3, why is a sigmoid activation function used for our deep learning model output? Why not softmax or ReLU?##

### **Answer:**
A Sigmoid function is used because it "squashes" the output value to a range between 0 and 1, which represents a probability in binary classification problems.

ReLU is unbounded (goes to infinity), making it unsuitable for probability.

Softmax is typically used for multi-class classification (where probabilities across multiple nodes sum to 1), whereas Sigmoid is the standard for single-node binary classification.

##**Question #8:** In Step 5, when we use a decision threshold of 0.500 (spliting the sigmoid output at 0.5), what test accuracy (in %) do we get?##

### **Answer:**

##**Question #9:** In Step 5, what decision threshold value maximizes the test accuracy? Hint: tweek the threshold value (3 decimal values) until you see the highest test accuracy when running the code in Step 5. ##

In [25]:
from sklearn.metrics import accuracy_score
import numpy as np

# Make predictions (probabilities)
y_pred_probs = model.predict(X_test)

# --- For Question 8 ---
threshold = 0.5
y_pred_static = [1 if prob >= threshold else 0 for prob in y_pred_probs]
acc_static = accuracy_score(y_test, y_pred_static)
print(f"Accuracy at threshold 0.5: {acc_static * 100:.2f}%")

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
Accuracy at threshold 0.5: 74.02%


### **Answer:** the threshold that yields a maximum test accuracy is: ____

In [52]:
from sklearn.metrics import accuracy_score
import numpy as np

# Make predictions (probabilities)
y_pred_probs = model.predict(X_test)

# --- For Question 8 ---
threshold = 0.5
y_pred_static = [1 if prob >= threshold else 0 for prob in y_pred_probs]
acc_static = accuracy_score(y_test, y_pred_static)
print(f"Accuracy at threshold 0.5: {acc_static * 100:.2f}%")


# --- For Question 9 ---
best_acc = 0
best_threshold = 0

# Test thresholds from 0.000 to 1.000
for t in np.arange(0.0, 1.0, 0.001):
    y_pred_temp = [1 if prob >= t else 0 for prob in y_pred_probs]
    acc = accuracy_score(y_test, y_pred_temp)
    if acc > best_acc:
        best_acc = acc
        best_threshold = t

print(f"Maximum test accuracy: {best_acc * 100:.2f}%")
print(f"Optimal threshold: {best_threshold:.3f}")

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Accuracy at threshold 0.5: 51.31%
Maximum test accuracy: 78.92%
Optimal threshold: 0.631


# Step 6. Delete the model

In [53]:
# let's run the model summary to make sure our model is there:
model.summary()

In [40]:
del model #this function deletes the model

In [41]:
model.summary() #run model.summary() after deleting the model

NameError: name 'model' is not defined

##**Question #10:** we just ran the model.summary again to see if it works. Did it succeed or fail?##

### **Answer:**
It failed. The previous line del model deleted the model object from memory. Attempting to run model.summary() on a deleted variable causes a Python NameError because the name 'model' is no longer defined.

# Step 7. Load the saved model

In [57]:
#reloading the saved model
model = load_model('churn_prediction_model.keras')

##**Question #11:** Run model.summary again to see if it worked. Did it succeed or fail? Show your code and the result##

### **Answer:**

It succeeded. The load_model function restored the model from the file back into memory. Code:

In [59]:
model = load_model('churn_prediction_model.keras')
model.summary()

#Step 8. Run some predictions on the loaded model

In [None]:
# We are going to run model.predict() on a couple of ramdom samples from X_test, y_test
# This is an illustration of how you can run inferences on a model you previously worked on
# or that you received from someone else

### **Question #12:** Write a few lines of code to take a couple of random samples out of X_test and y_test. Apply the model predict on the X_test[RANDOM INDEX] values and compare them to the y_test[RANDOM INDEX] values. Show your code.

In [60]:
# show your code here

import numpy as np

# Generate 5 random indices
random_indices = np.random.choice(len(X_test), 5, replace=False)

print("Running inferences on loaded model:")
for i in random_indices:
    # Reshape input to (1, number_of_features) for the model
    sample_input = X_test[i].reshape(1, -1)

    # Predict
    prob = model.predict(sample_input, verbose=0)[0][0]
    prediction = "Yes" if prob > 0.5 else "No"

    # Get actual value
    actual = "Yes" if y_test[i] == 1 else "No"

    print(f"Index: {i} | Predicted Probability: {prob:.4f} ({prediction}) | Actual: {actual}")

Running inferences on loaded model:
Index: 861 | Predicted Probability: 0.5115 (Yes) | Actual: No
Index: 215 | Predicted Probability: 0.4905 (No) | Actual: No
Index: 337 | Predicted Probability: 0.5386 (Yes) | Actual: No
Index: 1218 | Predicted Probability: 0.5887 (Yes) | Actual: No
Index: 379 | Predicted Probability: 0.6212 (Yes) | Actual: No


# Step 9. Improving the model

# **Question #13:** How can you improve the existing model?
The original model is good, but not necessarily great.The accuracy could be better. Use your data scientist knowledge and newly aquired TensorFlow/Keras modeling skills to build a new model using the same dataset. Hint: Call the model **new_model** and go through Step 3 through 5. Show all your code and provide **new_model** files with your deliverables.

In [61]:
# Your code in this cell
from tensorflow.keras.optimizers import Adam

# 1. Architecture: Added a layer and slightly adjusted units
new_model = Sequential()
new_model.add(Dense(units=64, activation='relu', input_dim=len(X_train[0]))) # Increased units
new_model.add(Dense(units=32, activation='relu'))
new_model.add(Dense(units=16, activation='relu')) # Added extra layer
new_model.add(Dense(units=1, activation='sigmoid'))

# 2. Compile: Changed optimizer to Adam
new_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# 3. Fit: Train the new model
new_model.fit(X_train, y_train, epochs=200, batch_size=32, verbose=1)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/200
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.7173 - loss: 0.5482
Epoch 2/200
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.7911 - loss: 0.4326
Epoch 3/200
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.7998 - loss: 0.4267
Epoch 4/200
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.8278 - loss: 0.3829
Epoch 5/200
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.8378 - loss: 0.3622
Epoch 6/200
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.8553 - loss: 0.3442
Epoch 7/200
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.9049 - loss: 0.2676
Epoch 8/200
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.8964 - loss: 0.2724
Epoch 9/200
[1m177/177[0m [

<keras.src.callbacks.history.History at 0x7a78779d2d50>

# **Question #14:** What is your new_model optimal decision threshold and the corresponding new_model test accuracy ?

### **Answer:**

In [62]:
new_preds = new_model.predict(X_test)
best_acc_new = 0
best_thresh_new = 0

for t in np.arange(0.0, 1.0, 0.001):
    temp_preds = [1 if p >= t else 0 for p in new_preds]
    acc = accuracy_score(y_test, temp_preds)
    if acc > best_acc_new:
        best_acc_new = acc
        best_thresh_new = t

print(f"New Model Max Accuracy: {best_acc_new * 100:.2f}%")
print(f"New Model Optimal Threshold: {best_thresh_new:.3f}")

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
New Model Max Accuracy: 75.23%
New Model Optimal Threshold: 0.996
