<h1>CNN</h1>

References:  
https://www.tensorflow.org/tutorials/images/cnn  
https://www.datacamp.com/tutorial/cnn-tensorflow-python  

Convolutional Layers:  
https://www.sciencedirect.com/topics/engineering/convolutional-layer#:~:text=2.3.,-1%20Convolutional%20layer&text=A%20convolutional%20layer%20is%20the,and%20creates%20an%20activation%20map.  
https://towardsdatascience.com/convolutional-neural-networks-explained-9cc5188c4939  


Model:  
https://www.tensorflow.org/api_docs/python/tf/keras/Model  
https://keras.io/api/layers/convolution_layers/convolution2d/  

Architecture:

Input: 28 x 28 image (Pixel Size)   
-> Convolution Layer 1 -> ReLU Activation -> Max Pooling  
-> Convolution Layer 2 -> ReLU Activation -> Max Pooling  
-> Convolution Layer 3 -> ReLU Activation -> Max Pooling <br/>
-> Flatten -> Fully Connected Layer (Dense Layer) -> ReLU Activation  
-> Output Layer (17 units)

Output Classes:  
['(',')','+','-','0','1','2','3','4','5','6','7','8','9','=','div','times'] <br/>
Total: 17

# Global Variable (Edit Before you Run on your own)
- Important: Change the MODEL_SAVE_NAME before running a new model so that we don't override any saved model

In [None]:
EPOCHS_SIZE = 20 #Adjusted for trial first
BATCH_SIZE = 128
MODEL_SAVE_NAME = "" #change this so that u dont overwrite saved model
training_class_labels = ['(',')','+','-','0','1','2','3','4','5','6','7','8','9','=','div','times']
class_labels_dict = {'(': 0,')': 1,'+': 2,'-': 3,'0': 4,'1': 5,'2': 6,'3': 7,'4': 8,'5': 9,'6': 10,'7': 11,'8': 12,'9': 13,'=': 14,'div': 15,'times': 16}

# Pre requisites that you need to install before use

#Just Run Once
!pip install tensorflow
!pip install scikit-learn
!pip install keras
!pip install keras-tuner
!pip install matplotlib
!pip install opencv-python
!pip install scipy

## Imports

In [None]:
import os
import glob
import cv2
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
import keras_tuner as kt
import seaborn as sns
from keras import metrics

# Training

### 1. Function to Load The Images from Folder

In [None]:
def pre_processing_from_dir(dataset_dir, class_labels_dict, training=False):
    # Initialize lists to store images and labels
    images = []
    labels = []
    class_labels = []

    # Get a list of all subdirectories (each subdirectory represents a class)
    class_directories = os.listdir(dataset_dir)

    # Iterate through each subdirectory (class directory)
    for class_directory in class_directories:
        class_label = class_directory  # Use the directory name as the class label
        
        class_labels.append(class_label)
        class_path = os.path.join(dataset_dir, class_directory)

        # Get a list of image files in the class directory
        image_files = glob.glob(os.path.join(class_path, "*.jpg"))  # You may need to adjust the file extension

        # print(image_files)

        # Iterate through image files in the class directory
        for image_file in image_files:
            # Load and preprocess the image
            image = cv2.imread(image_file, cv2.IMREAD_GRAYSCALE)
            image = cv2.resize(image, (28, 28))
            image = image / 255.0  # Normalize pixel values

            # plt.imshow(image, cmap=plt.cm.binary)

            # Append the preprocessed image and its label to the lists
            images.append(image)
            labels.append(class_label)

    if training:

        data = list(zip(images, labels))

        # Shuffle the combined data
        np.random.shuffle(data)

        # shuffle the training images
        shuffled_images, shuffled_labels = zip(*data)

        images = np.array(shuffled_images)

        label_encoder = LabelEncoder()

        # Encode class labels using LabelEncoder
        labels = label_encoder.fit_transform(shuffled_labels)

        for i in range(len(class_labels)):
            class_labels_dict[class_labels[i]] = i

        labels = np.array(labels, dtype="int64")

        # comment the below 2 lines if doing label-encoding
        # One-hot encode labels (need to do one code in order to fit into the model)
        num_classes = len(class_labels)
        labels = to_categorical(labels, num_classes=num_classes)

        return images, labels, class_labels, class_labels_dict

    else:
       
        # Convert lists to NumPy arrays
        images = np.array(images)

        # label-encoding done on test data should correspond to the ones in training data
        # this is to account for times when test data is lesser than training data

        for i in range(len(labels)):
            labels[i] = class_labels_dict[labels[i]]

        labels = np.array(labels, dtype="int64")

        # comment the below 2 lines if doing label-encoding
        # One-hot encode labels (need to do one code in order to fit into the model)
        num_classes = len(class_labels_dict)
        labels = to_categorical(labels, num_classes=num_classes)

        return images, labels, class_labels

### 2. Data Augmentation Function for Training Data 
- random rotation
- random noise

In [None]:
def data_augmentation(image):

    ##############################################################
    # Rotating images to mimic slanted handwriting

    # Convert the image to a NumPy array (assuming it's in the range [0, 1])
    image = (image * 255).astype(np.uint8)

    # Calculate the image center
    center = tuple(np.array(image.shape[1::-1]) / 2)

    rotation_angle = random.uniform(-30, 30)

    # Create a rotation matrix and apply the rotation
    rotation_matrix = cv2.getRotationMatrix2D(center, rotation_angle, 1.0)
    rotated_image = cv2.warpAffine(image, rotation_matrix, image.shape[1::-1], flags=cv2.INTER_LINEAR, borderValue=(255, 255, 255))

    # Convert back to the range [0, 1]
    rotated_image = rotated_image.astype(np.float32) / 255.0

    ##############################################################
    # Adding random noise to mimic low quality images

    max_noise_level = random.uniform(0, 0.1)
    noise = tf.random.normal(shape=tf.shape(rotated_image), stddev=max_noise_level)
    
    return tf.clip_by_value(rotated_image + noise, 0.0, 1.0)


### 3. Function for Defining The Model

In [None]:
def math_model(X_train, X_test, y_train, y_test, num_classes, model_name):

    # X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

    # perform data augmentation on X_train
    X_train = np.array([data_augmentation(image) for image in X_train])

    # Define your CNN model for multi-class classification

    input_shape = (28,28,1) # decision point: what size are our images fixed at
    layer1_size = 32 # number of filters in the convolutional layer
    layer2_size = 64
    layer3_size = 128
    layer_shape = (3,3) # size of the filter

    pool_shape = (2,2) # size of the pooling laye
    fully_connected_layer_size = 128 # number of neurons in the fully connected layer


    model = Sequential([
        layers.Conv2D(layer1_size, layer_shape, activation='relu', input_shape=input_shape),
        layers.MaxPooling2D(pool_shape),
        layers.Conv2D(layer2_size, layer_shape, activation='relu'),
        layers.MaxPooling2D(pool_shape),
        layers.Conv2D(layer3_size, layer_shape, activation='relu'),
        layers.MaxPooling2D(pool_shape),
        layers.Flatten(),
        layers.Dense(fully_connected_layer_size, activation='relu'),
        layers.Dense(num_classes, activation='softmax')
    ])

    model_metrics = ['accuracy', metrics.Recall(name = "Recall"), metrics.Precision(name = "Precision")]

    # Compile the model
    # for one-hot encoding
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=model_metrics)

    # uncomment this if using label-encoding, & comment the one above
    # model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=model_metrics)

    # Fit model. (Batch size either 32, 64, 128. 1000 epochs as we expect training to stop before that.
    history = model.fit(X_train, y_train, batch_size=BATCH_SIZE,
                    epochs=EPOCHS_SIZE, validation_data=(X_test, y_test))

    # Save the trained model for later use
    model.save(f"{model_name}.keras")

    # not a must to return history here but it's to see whether model is overfitting or underfitting after training
    # can remove history once we confirmed model is good
    return f"{model_name}.keras", history

### 4. Function for Evaluating Training Model
- Confusion Matrix, 
- Classification Report, 
- Number of Correct, Wrong and % Match

In [None]:
def math_reports(model, X_test, y_test, test_class_labels, train_class_labels):

    # Load the saved model
    loaded_model = model + ".keras"
    loaded_model = tf.keras.models.load_model(model)

    predicted_y = loaded_model.predict(X_test)

    # Convert one-hot encoded labels back to integer labels
    y_test_labels = y_test

    # comment this if label-encoding was used
    y_test_labels = np.argmax(y_test_labels, axis=1)

    predicted_labels = np.argmax(predicted_y, axis=1)

    confusion = confusion_matrix(y_test_labels, predicted_labels)

    cf_report = classification_report(y_test_labels, predicted_labels, labels=np.unique(y_test_labels), target_names=test_class_labels)

    # Initialize dictionaries to store correct and total counts for each class
    correct_instances_per_class = {}
    total_instances_per_class = {}
    report = ""
    predicted_report = ""

    # Iterate through predictions and true labels to calculate correct and total instances
    for i in range(predicted_labels.size):
        predicted = train_class_labels[predicted_labels[i]]
        test_label = train_class_labels[y_test_labels[i]]

        result = "wrong"

        if (predicted == test_label):
            result = "correct"

        predicted_report += f"Predicted: {predicted}, Actual: {test_label}, Result: {result}\n"

        if test_label not in correct_instances_per_class:
            correct_instances_per_class[test_label] = 0
            total_instances_per_class[test_label] = 0

        total_instances_per_class[test_label] += 1

        if predicted == test_label:
            correct_instances_per_class[test_label] += 1

    import operator

    sorted_correct = dict(sorted(correct_instances_per_class.items(), key=operator.itemgetter(0)))

    # Print the summary of correct/total for each class
    for label in sorted_correct:
        correct_count = sorted_correct[label]
        total_count = total_instances_per_class[label]
        report += f"Class {label}: Correct {correct_count}/{total_count} | Wrong: {total_count - correct_count} | % Match: {round((correct_count/total_count) * 100,2)}\n"

    # print(report)

    return confusion, cf_report, report, predicted_report, predicted_labels

# Running model on Training Data (Digits & Symbols Images only (17 classes))

In [None]:
# the directory final_82 is too large, thus it is not stored in the github repo
images, labels, training_class_labels, class_labels_dict = pre_processing_from_dir("final_82/train_images", {}, True)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

In [None]:
# MODEL_SAVE_NAME is defined in global variable
model, history = math_model(X_train, X_test, y_train, y_test, len(training_class_labels), MODEL_SAVE_NAME)

### Check for under/overfitting & deciding on epoch

In [None]:
# change to the model that you have trained for as the X_test & y_test would be different for each epoch
loaded_model = MODEL_SAVE_NAME + ".keras"
loaded_model = tf.keras.models.load_model(loaded_model)

loss, accuracy, recall, precision = loaded_model.evaluate(X_test, y_test)
print(f"Loss : {round(loss * 100, 2)}, Accuracy : {round(accuracy * 100, 2)}, Recall : {round(recall * 100, 2)}, Precision : {round(precision * 100, 2)}")

# Plot the training and validation loss
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Plot the training and validation accuracy
plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

### Reports (Training data)

In [None]:
# change this variable to get reports on previously trained model
loaded_model = MODEL_SAVE_NAME + ".keras"

In [None]:
# test class labels & training class labels is the same here
confusion, cf_report, report, predicted_report, predicted_labels = math_reports(loaded_model, X_test, y_test, training_class_labels, training_class_labels)

In [None]:
print("Confusion Matrix")
print(confusion)

In [None]:
print("Classification Report")
print(cf_report)

In [None]:
print("Confusion Matrix Report")
print(report)

In [None]:
print("Predicted vs Actual")
print(predicted_report)

# Running Model with Unseen Test Data (Digits & Symbols Images only (17 classes))

In [None]:
# the directory final_82 is too large, thus it is not stored in the github repo
images_test, labels_test, test_class_labels = pre_processing_from_dir("final_82/test_images", class_labels_dict)

In [None]:
loaded_model = MODEL_SAVE_NAME + ".keras"
loaded_model = tf.keras.models.load_model(loaded_model)

loss, accuracy, recall, precision = loaded_model.evaluate(images_test, labels_test)
print(f"Loss : {round(loss * 100, 2)}, Accuracy : {round(accuracy * 100, 2)}, Recall : {round(recall * 100, 2)}, Precision : {round(precision * 100, 2)}")


In [None]:
# change this variable to get reports on previously trained model
loaded_model = MODEL_SAVE_NAME + ".keras"

### Reports (Unseen Test Data (17 classes))

In [None]:
#Predict using loaded model
confusion_test, cf_report_test, report_test, predicted_report_test, predicted_labels_test = math_reports(loaded_model, images_test, labels_test, test_class_labels, training_class_labels)

In [None]:
print("Classification Report Test")
print(cf_report_test)

In [None]:
print("Confusion Matrix Test")
print(confusion_test)

In [None]:
# to better visualize confusion matrix

# Replace this with your class labels
class_labels = [key for key, value in class_labels_dict.items() if value in np.unique(predicted_labels_test)]

# Create a heatmap
plt.figure(figsize=(10, 8))
sns.set(font_scale=1.2)
sns.heatmap(confusion_test, annot=True, fmt='d', cmap='Blues', xticklabels=class_labels, yticklabels=class_labels)

# Set labels and title
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix Heatmap')

# Show the plot
plt.show()

In [None]:
print("Confusion Matrix Report (20 epochs)")
print(report_test)

In [None]:
print("Confusion Matrix Report (20t2 epochs)")
print(report_test)

In [None]:
print("Confusion Matrix Report (30t2 epochs)")
print(report_test)

In [None]:
print("Predicted vs Actual Test")
print(predicted_report_test)

# Running model with Unseen Test Data (from Equation Images)

In [None]:
def pre_processing_from_test(dataset_dir, class_label):
    # Initialize lists to store images and labels
    images = []

    # Get a list of all subdirectories (each subdirectory represents a class)
    class_directories = os.listdir(dataset_dir)

    # look for the directory that matchesthe class_label
    for class_directory in class_directories:

        if class_label != class_directory:
            continue
        
        class_path = os.path.join(dataset_dir, class_directory)

        allowed_extensions = ["*.jpg", "*.jpeg", "*.png"]

        image_files = []
        for extension in allowed_extensions:
            image_files.extend(glob.glob(os.path.join(class_path, extension)))

        # Iterate through image files in the class directory
        for image_file in image_files:
            # Load and preprocess the image
            image = cv2.imread(image_file, cv2.IMREAD_GRAYSCALE)
            image = cv2.resize(image, (28, 28))
            image = image / 255.0  # Normalize pixel values

            # plt.imshow(image, cmap=plt.cm.binary)

            # Append the preprocessed image and its label to the lists
            images.append(image)
            # labels.append(class_label)

    return np.array(images), class_label

## Retrieve Ground Truth from csv

In [None]:
import pandas as pd

# Read the CSV file 
# change to csv that you stored the ground truth
filtered_data = pd.read_csv("./ground_truth_csv/ten_paths.csv")

filtered_data

Convert to dictionary for easy reference

In [None]:
# Convert to dictionary for easy reference
filtered_data_dictionary = filtered_data.set_index('path')['gt'].to_dict()
print(filtered_data_dictionary)

Convert predicted labels to gt notation

In [None]:
# Convert predicted output to gt notation
def combine_all_predict(predicted_labels):
    line = ""
    for i in range(predicted_labels.size):
        predicted = training_class_labels[predicted_labels[i]]
        line += convert_into_gt(predicted) + " "
        
    return line

def convert_into_gt(input_symbol):

    # print(input_symbol)
    if input_symbol == "div":
        return "\\div"
    if input_symbol == "times":
        return "\\times"
    return input_symbol
    ## in gt symbals div are \div and times are \times

Function to compare with ground truth and calculate % similarity

In [None]:
import re
def compare_w_ground(path, combined_predicted, filtered_data_dictionary):
    ground_truth = filtered_data_dictionary.get(path)
    return string_similarity(combined_predicted, ground_truth)

def string_similarity(str1, str2):
    # Remove white spaces from both strings
    str1 = str1.strip().split(" ")
    str2 = str2.strip()
    
    pattern = r"(\(|\)|\\div|\\times|[0-9]|[\+\-\=/]+)"
    # print(re.findall(pattern, str2))
    str2 = re.findall(pattern, str2)
    
    # Calculate the length of the longer string
    min_length = min(len(str1), len(str2))
    
    # Initialize a variable to count the number of matching characters
    matching_count = 0

    # print(str1)
    # print(str2)

    for i in range(min_length):
        # print(str1[i], str2[i])
        if str1[i] == str2[i]:
            matching_count += 1
    
    # Calculate the percentage of similarity
    # no. of correct / no. of characters ground truth has
    similarity_percentage = (matching_count / len(str2)) * 100
    
    return round(similarity_percentage, 2)

Retrieve predicted ouput & similarity percentage from predicted output

In [None]:
predicted_1 = {}

for i in filtered_data_dictionary:
    # change folder path here to where the folders of the images are saved in
    # e.g., all_equations
    ez_test, ez_label = pre_processing_from_test("./processed_data/ten_eq", i)

    if ez_test.size != 0:

        training_class_labels = ['(',')','+','-','0','1','2','3','4','5','6','7','8','9','=','div','times']

        # Load the saved model (change model to load different model)
        loaded_model = "20.keras"

        # loaded_model = tf.keras.saving.load_model(loaded_model)
        loaded_model = tf.keras.models.load_model(loaded_model)

        predicted_y = loaded_model.predict(ez_test)

        predicted_labels = np.argmax(predicted_y, axis=1)

        # print("predicted_labels: ", predicted_labels)

        predicted_report = ""

        # Iterate through predictions and true labels to calculate correct and total instances
        for j in range(predicted_labels.size):
            predicted = training_class_labels[predicted_labels[j]]
            predicted_report += predicted + " "
            
        # print("predicted output: ", predicted_report)
        
        ## compare with ground truth
        combine = combine_all_predict(predicted_labels)

        predicted_1[ez_label] = [combine[:-1], compare_w_ground(i, combine, filtered_data_dictionary)]
    
print(predicted_1)

Store Predicted Output & Percentage Similarity in dataframe

In [None]:
filtered_data['predicted output'] = filtered_data['path'].map(lambda x: predicted_1.get(x, [None, None])[0])
filtered_data['% similarity'] = filtered_data['path'].map(lambda x: predicted_1.get(x, [None, None])[1])

In [None]:
filtered_data

Calculate Word Error Rate

In [None]:
import re
def word_error_rate(reference, hypothesis):
    # Tokenize the reference and hypothesis into words
    # ground truth
    ref_words = reference.strip()
    pattern = r"(\(|\)|\\div|\\times|[0-9]|[\+\-\=/]+)"
    ref_words = re.findall(pattern, ref_words)
    
    # predicted output
    hyp_words = hypothesis.strip().split(" ")
    
    # Initialize a 2D matrix to store the edit distances
    dp = [[0] * (len(hyp_words) + 1) for _ in range(len(ref_words) + 1)]

    # Initialize the first row and first column
    for i in range(len(ref_words) + 1):
        dp[i][0] = i
    for j in range(len(hyp_words) + 1):
        dp[0][j] = j

    # Fill in the matrix using dynamic programming
    for i in range(1, len(ref_words) + 1):
        for j in range(1, len(hyp_words) + 1):
            cost = 0 if ref_words[i - 1] == hyp_words[j - 1] else 1
            dp[i][j] = min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + cost)

    # The last cell of the matrix contains the minimum edit distance
    wer = dp[len(ref_words)][len(hyp_words)]

    # Calculate the WER by dividing by the number of words in the reference
    wer /= len(ref_words)
    #print(len(ref_words))
    return wer

In [None]:
#Code below assumes csv has 2 columns: prediction and groundtruth
n = len(filtered_data_dictionary)
#Store WER values in a list, then appends it to DataFrame object
wer_list = []
for i in range(n):
    pred = filtered_data.iloc[i][2] #Prediction column
    truth = filtered_data.iloc[i][1] #Groundtruth Column
    wer = word_error_rate(truth, pred)
    if wer == 0:
        wer_list.append(0)
    else:
        wer_list.append(round(wer,2))
        
filtered_data['WER'] = wer_list

In [None]:
filtered_data

Save results to evaluation csv

In [None]:
# change accordingly to how you would like your file name to be named
filtered_data.to_csv("./evaluations_csv/fair_ten.csv", index=False)