In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, accuracy_score
from sklearn.datasets import load_digits
import shap
import matplotlib.pyplot as plt 
%matplotlib inline  
import tensorflow as tf# tensor flow


ModuleNotFoundError: No module named 'tensorflow'

In [None]:
tf.__version__

## Deep Learning Introduction
We want to find mappings to our inputs to some outputs(some neurons) e.g.<br>
![alt text](images/deep_learning.jpeg) <br>
- each of those connections have their own **unique weight**
- 1 Hidden layer $\rightarrow$ **Neuaral Network**
- 2 $\leq$ x Hidden layer $\rightarrow$ **Deep Neuaral Network** <br>
![alt text](images/deep_learning2.png) <br>
- Outputs also have their **Sinusoidal Activation Function as well**, and adding all of them = 1
- We take the arg max of those probabilities and that is the output


# Load Digits Dataset (MNIST)
- Data: 28*28 pxls represented as images of numbers
    - Grayscale image: input 0-255 (white-black)
- Target: number 0-9

In [None]:
# load dataset
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
plt.imshow(x_train[0], cmap="gray")

In [None]:
# Normalize (puts values to 0-1) which makes
# the neural network easier to learn
x_train = tf.keras.utils.normalize(x_train, axis=1)
x_test = tf.keras.utils.normalize(x_test, axis=1)

In [None]:
print("x_train: ", np.shape(x_train))
print("y_train: ", np.shape(y_train))
print("x_test: ", np.shape(x_test))
print("y_test: ", np.shape(y_test))

In [None]:
img_rows, img_cols = 28, 28
if tf.keras.backend.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

In [None]:
# 2 types of deep learning models
# sequential is most common one
seq_model = tf.keras.models.Sequential()
# Add our flatten input layer
# seq_model.add(tf.keras.layers.Flatten())
seq_model.add(tf.keras.layers.Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(28,28,1)))
# Dense(#neurons in layer, activation f(x))
# Added two hidden layers
seq_model.add(tf.keras.layers.Dense(128,activation=tf.nn.relu))
seq_model.add(tf.keras.layers.Dense(128,activation=tf.nn.relu))
# Add our output layer
seq_model.add(tf.keras.layers.Dense(10,activation=tf.nn.softmax))

#a NN does not aim to maximize accuracy
# NN alwasy tries to minimize loss (degree of error)
# so the way to calculate loss impacts greatly the 
# NN performance
seq_model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
seq_model.fit(x_train, y_train, epochs=3)

In [None]:
val_loss, val_acc = seq_model.evaluate(x_test, y_test)
val_loss, val_acc

In [None]:
# seq_model.save('epic_num_reader.model')
# load_model = tf.keras.models.load_model('epic_num_reader.model')
example = 15
pred = seq_model.predict([x_test])
# argmax because it is a probability distribution
print("Model predicted: ",np.argmax(pred[example]))
print("Actual number:")
plt.imshow(x_test[example], cmap="gray")

In [None]:
# SHAP explanation
background = x_train[:50] #first examples
background = x_train[np.random.choice(x_train.shape[0], 100, replace=False)]
explainer = shap.DeepExplainer(seq_model, background)

In [None]:
shap_values = explainer.shap_values(x_test[:5])

In [None]:
shap.image_plot(shap_values, -x_test[1:5])

In [None]:
print("SHAP version is:", shap.__version__)
print("Tensorflow version is:", tf.__version__)

# Load Digits Dataset (sklearn)
- Data: 8*8 pxls represented as images of numbers
    - Grayscale image: input 0-16 (white-black)
- Target: number 0-9
- Samples: 1797 ($\approx$180 samples per class)

In [None]:
X, y = load_digits(return_X_y=True, as_frame=True)

In [None]:
X.head()

In [None]:
X[0:1].to_numpy()
plt.imshow(X[0:1].to_numpy())