# Introduction
This code tests our work on Breast Cancer Wisconsin (Diagnostic) Data Set (WDBCD) (https://www.kaggle.com/datasets/uciml/breast-cancer-wisconsin-data)

In [None]:
#Kaggle is already using the latest tensorflow
#!pip uninstall -y tensorflow
#!pip install tensorflow==2.13.0
!pip install tensorflow-model-optimization #==0.7.3

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from matplotlib import cm
import seaborn as sns

#import os
#print(os.listdir("../input"))
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

%matplotlib inline 
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv1D, MaxPool1D,Flatten,Dense,Dropout,BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import utils

import tensorflow_model_optimization as tfmot


#Import models from scikit learn module:
from sklearn import datasets,metrics
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold

#Measuring Time
import time


#Packages that help save pre-trained weight
import tempfile
import os


# Load Tensor Board for visualization of ML
%load_ext tensorboard

# Parameters

In [None]:
epoch=50

# Loading Dataset

In [None]:
#load dataset
data = pd.read_csv("/kaggle/input/wdbc-kd-data/data.csv",header = 0) 
print(type(data))
data.head(5)

# Data Preprocessing
## Get classes

In [None]:
Y=data.diagnosis

print(Y.value_counts())
Y.head(5)

In [None]:
plt.title('Count of cancer type')
sns.countplot(x="diagnosis",data=data)
plt.ylabel('Count')
plt.show()

## Prepare and clean the dataset

In [None]:
data.isnull().any().describe()

In [None]:
data.info()

## Remove unwanted info

In [None]:
# drop id and unnamed 32 colums from the features
# we still need diagnosis for further statistics, 
# it will be dropped later

data.drop(['id','Unnamed: 32'],axis=1,inplace=True)
data.info(),

In [None]:
data.describe()

## Analyse usable data

In [None]:
for i in (data.columns[1:6]):
    plt.subplot(1,2,1)
    data[i][data['diagnosis']=='B'].plot.hist(alpha=0.5,title=i,color='green')
    data[i][data['diagnosis']=='M'].plot.hist(alpha=0.5,color='red')
    plt.legend(['B','M'],loc='upper right')
    #plt.grid(visible=True)
    
    
    plt.subplot(1,2,2)
    sns.boxplot(x="diagnosis", y=i, data=data)
    plt.show()

## Observations*
1. Mean values of cell like radius, perimeter, area, compactness, concavity,concave points etc can be used in classification of the cancer. Larger values of these parameters tends to show a correlation with malignant tumors.
2. Mean values of column like texture, smoothness, symmetry or fractual dimension does not show a particular preference of one diagnosis over the other. In any of the histograms there are no noticeable large outliers that warrants further cleanup.

## Remove input from data

In [None]:
#Diagnosis is in index 0, 
#so we start copying from index 1
#see, I told you we will drop it ;-)

data.drop('diagnosis',axis=1,inplace=True)
data.head(5)

## Correlation matrix

In [None]:
# Create correlation matrix
data.corr()

In [None]:
correlation=data.corr()

# Getting the Upper Triangle of the co-relation matrix
matrix = np.triu(correlation)
plt.figure(figsize=(40,16))
sns.heatmap(correlation, vmax=1, square=True,annot=True,cmap='copper',mask=matrix)
plt.title('Correlation between different fearures')
#plt.savefig("cor.svg")

In [None]:
type(data)

## Get the input (X)

In [None]:
#Copying data to X for bravity
X=data

#for emulating user
temp=pd.DataFrame(data=[data.values.tolist()[0]],columns=data.columns.values.tolist())
del data

#Verify :-D
print(type(X))
X.head(5)

## PCA

In [None]:
from sklearn.decomposition import PCA
pca = PCA()
pca.fit_transform(X)

In [None]:
pca.get_covariance()

In [None]:
explained_variance=pca.explained_variance_ratio_
explained_variance

In [None]:
# We used logarithmic scale in the y-axis becaus the first value is so high

with plt.style.context('dark_background'):
    fig=plt.figure(figsize=(10, 6))
    ax = fig.add_subplot()

    ax.bar(range(30), explained_variance, alpha=0.5, align='center',
            label='individual explained variance')
    ax.set_yscale('log')
    
    plt.ylabel('Explained variance ratio')
    plt.xlabel('Principal components')
    plt.legend(loc='best')
    plt.grid(visible=True)
    plt.tight_layout()

Thus we can see from the above plot that first three components constitute almost 73% of the variance. Fourth to twenty sixth components has 25% of the data sprad.The remaining component has less than 0.001% of the variance.Hence we can drop from 27 to 30th component(total of 4 features)



In [None]:
pca=PCA(n_components=26)
X_new=pca.fit_transform(X)
X_new

In [None]:
pca.get_covariance()

In [None]:
explained_variance=pca.explained_variance_ratio_
explained_variance

In [None]:
# We used logarithmic scale in the y-axis becaus the first value is so high

with plt.style.context('dark_background'):
    fig=plt.figure(figsize=(20, 12))
    ax = fig.add_subplot()

    ax.bar(range(26), explained_variance, alpha=0.5, align='center',
            label='individual explained variance')
    ax.set_yscale('log')
    
    plt.ylabel('Explained variance ratio')
    plt.xlabel('Principal components')
    plt.legend(loc='best')
    plt.grid(visible=True)
    plt.tight_layout()

In [None]:
X=X_new
(X.shape, Y.shape)

## Convert to one-hot

In [None]:
#Check befor conversion
print("Converting to categorical")
print("Before Conversion:")
print(Y.shape)
print(type(Y))


#Convert to Categorical values
Y = Y.map({'B':0,'M':1})
Y = utils.to_categorical(Y, num_classes=2)


#verify shape
print("After Conversion:")
print(Y.shape)
type(Y)

## Splitting data into training and testing

In [None]:
(X.shape, Y.shape)

In [None]:
X_train,X_test,y_train,y_test= train_test_split(X,Y,test_size=0.15,stratify=Y)

In [None]:
y_test.shape

## Scaling

In [None]:
scaler=StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

(X_train.shape, X_test)

## Reshaping

In [None]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

In [None]:
X_train = X_train.reshape(X_train.shape[0],X_train.shape[1],1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1],1)

In [None]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

In [None]:
X_train.shape[1]

# Create Baseline model

## Creating the layers of the neural network
We do this because we want to train and save the layer for pruning

In [None]:
# Create the teacher
teacher_layers =keras.Sequential()
teacher_layers.add(keras.Input(shape=(X_train.shape[1],1)))
teacher_layers.add(keras.layers.Conv1D(filters=64,kernel_size= 2,activation='relu'))
teacher_layers.add(keras.layers.BatchNormalization())
teacher_layers.add(keras.layers.Dropout(0.2))
        
teacher_layers.add(keras.layers.Conv1D(448, 2,activation='relu'))
teacher_layers.add(keras.layers.BatchNormalization())
teacher_layers.add(keras.layers.Dropout(0.2))
        
teacher_layers.add(keras.layers.Flatten())
teacher_layers.add(keras.layers.Dense(64,activation='relu'))
teacher_layers.add(keras.layers.Dropout(0.2))

teacher_layers.add(keras.layers.Dense(2,activation='sigmoid'))

## Pre-training the weights

In [None]:
#Get the model
pre_trained_model= teacher_layers

#set learning optimizer
adam_optimizer = keras.optimizers.Adam(learning_rate=0.0001)

#compile it
pre_trained_model.compile(
    optimizer=adam_optimizer,
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [None]:
# dx 
pre_trained_model.summary()

## Training the model

In [None]:
pre_trained_model.fit(X_train,y_train,verbose=1)

## Save the pre-trained weight

In [None]:
#Creates a temporary file in the most secure manner possible
_, pretrained_weights = tempfile.mkstemp('.tf')

#Saves a copy of these learned parameters into the temporary file
#Saving the model is necessary to make an exact copy of the model
pre_trained_model.save_weights(pretrained_weights)

# Pruning

## Load the pre-trained model

In [None]:
#Create a base model
base_model = teacher_layers
base_model.load_weights(pretrained_weights) # optional but recommended.
base_model.summary()

## Add the pruning wrapping parameters

In [None]:
#To make the whole model train with pruning, 
#apply tfmot.sparsity.keras.prune_low_magnitude to the model.
model_for_pruning = tfmot.sparsity.keras.prune_low_magnitude(base_model)

## Create the pruning callback function

In [None]:
#Create a callback function for the pruning training
log_dir = tempfile.mkdtemp()
callbacks = [
    tfmot.sparsity.keras.UpdatePruningStep(),
    # Log sparsity and other metrics in Tensorboard.
    tfmot.sparsity.keras.PruningSummaries(log_dir=log_dir)
]

## Compile the modified model

In [None]:
adam_optimizer = keras.optimizers.Adam(learning_rate=0.0001)

model_for_pruning.compile(
      loss=keras.losses.categorical_crossentropy,
      optimizer=adam_optimizer,
      metrics=['accuracy']
)

## Train the model (with pre-trained weights)

In [None]:
prune_history = model_for_pruning.fit(
    X_train,
    y_train,
    callbacks=callbacks,
    epochs=epoch,
)

In [None]:
#Model after pruning
print("Model after prunning, before stripping")
model_for_pruning.summary()

## Remove the pruning wrapper paramerters

In [None]:
#Remove wrapper variables that helped in pruning
model_for_pruning = tfmot.sparsity.keras.strip_pruning(model_for_pruning)

#Model after pruning and stripping
print("\n\nModel after prunning and stripping")
model_for_pruning.summary()

## Compile the prunned model (without the wrapper parameters)

In [None]:
adam_optimizer = keras.optimizers.Adam(learning_rate=0.0001)

#Compile the model
model_for_pruning.compile(
      loss=keras.losses.categorical_crossentropy,
      optimizer=adam_optimizer,
      metrics=['accuracy']
)

## Evaluate the performance

In [None]:
prune_loss, prune_acc = model_for_pruning.evaluate(X_test,y_test)

In [None]:
model_for_pruning.save("pruned_model.keras") 

In [None]:
# Export model
model_for_pruning.export("pruned_model")

# Convert to a TFLite model
converter = tf.lite.TFLiteConverter.from_saved_model("pruned_model")
my_pruned_model = converter.convert()

with open("pruned_model.tflite", "wb") as f:
    f.write(my_pruned_model)

# Teacher Model 
Let's make the control model. We will use it to compare with the pruned model.

## Create a model: Making the layers

In [None]:
# Create the teacher
teacher =keras.Sequential()
teacher.add(keras.Input(shape=(X_train.shape[1],1)))
teacher.add(keras.layers.Conv1D(filters=64,kernel_size= 2,activation='relu'))
teacher.add(keras.layers.BatchNormalization())
teacher.add(keras.layers.Dropout(0.2))
        
teacher.add(keras.layers.Conv1D(448, 2,activation='relu'))
teacher.add(keras.layers.BatchNormalization())
teacher.add(keras.layers.Dropout(0.2))
        
teacher.add(keras.layers.Flatten())
teacher.add(keras.layers.Dense(64,activation='relu'))
teacher.add(keras.layers.Dropout(0.2))

teacher.add(keras.layers.Dense(2,activation='sigmoid'))

## Compile the teacher Model

In [None]:
adam_optimizer = keras.optimizers.Adam(learning_rate=0.0001)

#compile the model
teacher.compile(
    optimizer=adam_optimizer,
    loss='binary_crossentropy',
    metrics=['accuracy']
)

## Train the Teacher model

In [None]:
start_time = time.time()
teacher_history=teacher.fit(X_train,y_train,epochs=epoch,validation_data=(X_test,y_test),verbose=1)
end_time = time.time()
print( sep='\n')

In [None]:
teacher_loss, teacher_acc = teacher.evaluate(X_test,y_test)

# Check Performance

## Pruned Performance Visualization

In [None]:
#docs_infra: no_execute
%tensorboard --logdir={log_dir}

In [None]:
#eor
#==========================================================================
#Function for counting weights that were zeroed
#==========================================================================
def count_zeros(model):
    total = 0
    zeros = 0
    for layer in model.layers:
        weights = layer.get_weights()
        for w in weights:
            total += w.size
            zeros += np.sum(w == 0)
    print(f"Total weights: {total}")
    print(f"Zero weights: {zeros}")
    print(f"Sparsity: {100 * zeros / total:.2f}%")
    return total, zeros
    
#Print the performance
print("Teacher model:")
teacher_param_total, teacher_param_zeros = count_zeros(teacher)

#Print the performance
print("\n\nPruned & stripped model:")
prune_param_total, prune_param_zeros = count_zeros(model_for_pruning)

## Confusion Matrix

In [None]:
#--------------Pruned Model--------------
# Get predicted probabilities from the model
y_pred_probs = model_for_pruning.predict(X_test)

# Convert sigmoid outputs to class labels by picking the class with highest probability
y_pred_prune = np.argmax(y_pred_probs, axis=1)

# Convert one-hot true labels to class indices
y_true_prune = np.argmax(y_test, axis=1)



#------------Original Model--------------
# Get predicted probabilities from the model
y_pred_probs = teacher.predict(X_test)

# Convert sigmoid outputs to class labels by picking the class with highest probability
y_pred = np.argmax(y_pred_probs, axis=1)

# Convert one-hot true labels to class indices
y_true = np.argmax(y_test, axis=1)

## Plot Confusion Matrix

In [None]:
def plot_confusion_matrix(y_true, y_pred, class_names, title="Confusion Matrix"):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(5, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names, yticklabels=class_names)
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title(title)
    plt.tight_layout()
    plt.show()

# Define your class labels
class_names = ['B', 'M']


#=================
# Teacher Model
#=================
plot_confusion_matrix(y_true, y_pred, class_names, "Base Model (Tearcher Model) Confusion Matrix")


#=================
# Pruned Model
#=================
plot_confusion_matrix(y_true_prune, y_pred_prune, class_names, "Pruned Model Confusion Matrix")

In [None]:
teacher.save("teacher_model.keras") 

In [None]:
# Export model
teacher.export("teacher") 

# Convert to a TFLite model
converter = tf.lite.TFLiteConverter.from_saved_model("teacher")
my_teacher_model = converter.convert()

with open("teacher.tflite", "wb") as f:
    f.write(my_teacher_model)

In [None]:
def evaluate_tflite_model(tflite_path, X_test, y_test):
    import numpy as np
    import tensorflow as tf

    interpreter = tf.lite.Interpreter(model_path=tflite_path)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    input_index = input_details[0]['index']
    output_index = output_details[0]['index']

    correct = 0
    y_pred=[]
    y_true=[]
    for i in range(len(X_test)):
        input_data = np.expand_dims(X_test[i], axis=0).astype(np.float32)

        interpreter.set_tensor(input_index, input_data)
        interpreter.invoke()
        output = interpreter.get_tensor(output_index)  # shape (1, 2)

        predicted = np.argmax(output[0])       # pick class with highest sigmoid score
        actual = np.argmax(y_test[i])          # one-hot to label
        y_pred.append(predicted)
        y_true.append(actual)

        if predicted == actual:
            correct += 1

    accuracy = correct / len(X_test)
    print(f"TFLite model accuracy: {accuracy * 100:.2f}%")
    return accuracy, y_true, y_pred

In [None]:
teacher_tflite_acc, teacher_tflite_y_true, teacher_tflite_y_pred= evaluate_tflite_model("teacher.tflite", X_test, y_test)

In [None]:
pruned_tflite_acc, pruned_tflite_y_true, pruned_tflite_y_pred= evaluate_tflite_model("pruned_model.tflite", X_test, y_test)

## Accuracy and loss of the Teacher model

In [None]:
def plotLearningCurve(history,epochs):
  epochRange = range(1,epochs+1)
  plt.plot(epochRange,history.history['accuracy'])
  plt.plot(epochRange,history.history['val_accuracy'])
  plt.title('Model Accuracy')
  plt.xlabel('Epoch')
  plt.ylabel('Accuracy')
  plt.legend(['Train','Validation'],loc='lower right')
  plt.grid(visible=True)
  plt.show()

  plt.plot(epochRange,history.history['loss'])
  plt.plot(epochRange,history.history['val_loss'])
  plt.title('Model Loss')
  plt.xlabel('Epoch')
  plt.ylabel('Loss')
  plt.legend(['Train','Validation'],loc='upper right')
  plt.grid(visible=True)
  plt.show()

In [None]:
plotLearningCurve(teacher_history,epoch)

## Accuracy and loss of the Teacher model

In [None]:
def plotPruneCurveD(history,epochs):
  epochRange = range(1,epochs+1)
  plt.plot(epochRange,history.history['accuracy'])
  plt.title('Model Accuracy')
  plt.xlabel('Epoch')
  plt.ylabel('Accuracy')
  plt.legend(['Train','Validation'],loc='lower right')
  plt.grid(visible=True)
  plt.show()

  plt.plot(epochRange,history.history['loss'])
  plt.title('Model Loss')
  plt.xlabel('Epoch')
  plt.ylabel('Loss')
  plt.legend(['Train','Validation'],loc='upper left')
  plt.grid(visible=True)
  plt.show()

In [None]:
print(prune_history.history.keys())

In [None]:
plotPruneCurveD(prune_history,epoch)

## File sizes

In [None]:
import os

original_size = os.path.getsize("teacher_model.keras") / 1024
original_tflite_size = os.path.getsize("teacher.tflite") / 1024

pruned_size = os.path.getsize("pruned_model.keras") / 1024
pruned_tflite_size = os.path.getsize("pruned_model.tflite") / 1024


print(f"Keras Original size: {original_size:.2f} KB")
print(f"TFLite Original size: {original_tflite_size:.2f} KB")

print(f"Keras Prune size: {pruned_size:.2f} KB")
print(f"TFLite Prune size: {pruned_tflite_size:.2f} KB")

# Result Summary

In [None]:
print("Metric\t\t\tTeacher\t\tPrune")
print("="*50)

print("tflite File size (kb)\t%.2f\t\t%.2f"%(original_tflite_size,pruned_tflite_size))
print("Accuracy (%%)\t\t%.2f\t\t%.2f"%(teacher_acc*100,prune_acc*100))
print("Accuracy (tflite)(%%)\t%.2f\t\t%.2f"%(teacher_tflite_acc*100,pruned_tflite_acc*100))
print("Total Weights\t%d\t\t%d"%(teacher_param_total, prune_param_total))
print("Zero Weights\t\t%d\t\t%d"%(teacher_param_zeros, prune_param_zeros))
print("Sparsity\t\t%.2f\t\t%.2f"%(teacher_param_zeros/teacher_param_total*100, prune_param_zeros/prune_param_total*100))
print("="*50)
print("*File size for Baseline Keras format %.2f kB"%(original_size))
print("*File size for Prune Keras format %.2f kB"%(pruned_size))