# 📖 Loading Necessary Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.ndimage import interpolation as inter
from PIL import Image as im
import pickle
import cv2
from tqdm import tqdm
from scipy.ndimage import rotate
from sklearn.metrics import accuracy_score,f1_score,classification_report
import os
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV
from sklearn.neural_network import MLPClassifier
from scipy.signal import convolve2d
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelBinarizer
from skimage.feature import hog
from copy import deepcopy
from sklearn.feature_selection import VarianceThreshold
import plotly.express as px
from utils import *
from preprocess import *

# 🔧 Utilities Functions

In [2]:
def show_images(images,titles=None):
    """
    This function is used to show image(s) with titles by sending an array of images and an array of associated titles.
    images[0] will be drawn with the title titles[0] if exists.
    """
    n_ims = len(images)
    if titles is None: titles = ['(%d)' % i for i in range(1,n_ims + 1)]
    fig = plt.figure()
    n = 1
    for image,title in zip(images,titles):
        a = fig.add_subplot(1,n_ims,n)
        if image.ndim == 2: 
            plt.gray()
        plt.imshow(image)
        a.set_title(title)
        plt.axis('off')
        n += 1
    fig.set_size_inches(np.array(fig.get_size_inches()) * n_ims)
    plt.show() 

In [3]:
def load_images():
    """
    This function is used to load the images from the fonts-dataset folder.
    """
    images_train = []
    labels_train = []
    filenames = []
    labels = ['Scheherazade New', 'Marhey', 'Lemonada', 'IBM Plex Sans Arabic']
    empty_images_filenames = ["360.jpeg","627.jpeg","853.jpeg"] 
    for i in tqdm(labels):
        for filename in os.listdir(f'fonts-dataset/{i}'):
            img = cv2.imread(f'fonts-dataset/{i}/{filename}', cv2.IMREAD_GRAYSCALE)
            if i == "Lemonada" and filename in empty_images_filenames:
                print(f"{filename} is empty image!")
                continue
            images_train.append(img)
            labels_train.append(i)
            filenames.append(filename)
    return images_train, labels_train, filenames

# ⚙️ Preprocessing Functions

In [6]:
def find_score(arr, angle):
    """
    Find the score of the skew angle to be used in deskewing the image
    
    Args:
    arr: the image array
    angle: the angle to rotate the image by
    
    Returns:
    hist: the histogram of the image
    score: the score of the skew angle
    """
    
    # mode{‘reflect’, ‘grid-mirror’, ‘constant’, ‘grid-constant’, ‘nearest’, ‘mirror’, ‘grid-wrap’, ‘wrap’}
    data = rotate(arr, angle, reshape=False, order=0, mode='constant', cval=0, prefilter=False)
    hist = np.sum(data, axis=1)
    score = np.sum((hist[1:] - hist[:-1]) ** 2)
    return hist, score

In [None]:
def rotate_image(image, angle):
    """
    Rotates an image by a given angle and fills the remaining pixels with white color.

    Args:
        image: A NumPy array representing the input image.
        angle: The rotation angle in degrees.

    Returns:
        A new NumPy array representing the rotated image.
    """
    # Get image height and width
    height, width = image.shape[:2]

    # Compute the rotation matrix
    rotation_matrix = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1)

    # Perform the rotation and fill the remaining pixels with white color
    rotated_image = cv2.warpAffine(image, rotation_matrix, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0))

    return rotated_image

In [None]:
def deskew(binary_img):
    """
    Deskew the image
    
    Args:
    binary_img: the binary image
    
    Returns:
    pix: the deskewed image
    """
    bin_img = (binary_img // 255.0)
    # angles to check for skew angle = 45 degrees and 90 degrees and 180
    angles = np.array ([0 , 45 , 90 , 135 , 180 , 225 , 270 , 315])
    scores = []
    for angle in angles:
        hist, score = find_score(bin_img, angle)
        scores.append(score)

    best_score = max(scores)
    best_angle = angles[scores.index(best_score)]
    # print('Best angle: {}'.format(best_angle))

    # correct skew
    # data = rotate(bin_img, best_angle, reshape=False, order=0)
    data = rotate_image(bin_img, best_angle)
    img = im.fromarray((255 * data).astype("uint8"))

    pix = np.array(img)
    return pix

In [8]:
def preprocess(img):
    """
    Preprocess the image
    
    Args:
    img: the image
    
    Returns:
    img: the preprocessed image
    """
    image_size = 600
    sharpen_kernel = np.array([[0,-1, 0], [-1,5,-1], [0,-1,0]])
    img = cv2.medianBlur(img, 3) # To remove Salt and Pepper noise
    img = cv2.filter2D(img, -1, sharpen_kernel)  # Sharpen the image
    img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] # Convert the image to binary
    img = cv2.bitwise_not(img) if np.mean(img) > 127 else img # Invert the image if the mean is less than 127 
    img = deskew(img) # Deskew the image
    final_img = cv2.resize(img, (image_size, image_size)) # Resize the image
    # final_img = variance_threshold(final_img)
    return final_img

##  ✘ Unsuccessful Preprocessing Techniques

### Segmentation technique (Insipred by Variance Threshold by scikit-learn)

In [None]:
def remove_rows(image, threshold=0.008):
    """
    Remove rows with white pixels less than 10% of the image size
    
    Args:
    X_preprocess_sliced: the list of images
    max_shape_index: the index of the image to process
    
    Returns:
    image: the image after removing the rows
    """
    white_pixels_per_row = np.sum(image == 255, axis=1)
    rows_to_remove = white_pixels_per_row < image.shape[1] * threshold
    image = image[~rows_to_remove]
    return image

def remove_columns(image, threshold = 0.1):
    """
    Remove columns with white pixels less than 10% of the image size
    
    Args:
    X_preprocess_sliced: the list of images
    max_shape_index: the index of the image to process
    
    Returns:
    image: the image after removing the columns
    """
    white_pixels_per_column = np.sum(image == 255, axis=0)
    # white < 0.1 * height
    columns_to_remove = white_pixels_per_column < image.shape[0] * threshold
    image = image[:, ~columns_to_remove]
    return image

In [None]:
def pad_image(image):
    """
    Pad the image with zeros if the width is less than 515 and the height is less than 270
    
    Args:
    image: the image
    
    Returns:
    image: the padded image
    """
    if image.shape[1] < 515:
        pad_width = 515 - image.shape[1]
        image = np.pad(image, ((0, 0), (0, pad_width)), 'constant', constant_values=(0, 0))
    if image.shape[0] < 270:
        pad_height = 270 - image.shape[0]
        image = np.pad(image, ((0, pad_height), (0, 0)), 'constant', constant_values=(0, 0))
    return image

def crop_image(image):
    """
    Crop the image if the width is more than 515 and the height is more than 270
    
    Args:
    image: the image
    
    Returns:
    image: the cropped image
    """
    if image.shape[1] > 515:
        crop_width = image.shape[1] - 515
        image = image[:, crop_width//2:-(crop_width//2)]
    if image.shape[0] > 270:
        crop_height = image.shape[0] - 270
        image = image[crop_height//2:-(crop_height//2), :]
    return cv2.resize(image, (515, 270))

In [None]:
def preprocess_new(img, columns_threshold=0.001, rows_threshold=0.005):
    """
    Preprocess the image
    
    Args:
    img: the image
    
    Returns:
    img: the preprocessed image
    """
    image_size = 600
    sharpen_kernel = np.array([[0,-1, 0], [-1,5,-1], [0,-1,0]])
    img = cv2.medianBlur(img, 3) # To remove Salt and Pepper noise
    img = cv2.filter2D(img, -1, sharpen_kernel)  # Sharpen the image
    img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] # Convert the image to binary
    img = cv2.bitwise_not(img) if np.mean(img) > 127 else img # Invert the image if the mean is less than 127 
    img = deskew(img) # Deskew the image
    img = cv2.resize(img, (image_size, image_size)) # Resize the image
    img = remove_columns(img, columns_threshold)
    img = remove_rows(img, rows_threshold)
    img = pad(img)
    final_img = crop_image(img)
    return final_img

### Variance Thresholding by scikit-learn

In [None]:
def variance_threshold(image):
    selector = VarianceThreshold(10)
    returned_var_column = selector.fit_transform(image)
    returned_var_column = returned_var_column.T
    returned_var_column_row = selector.fit_transform(returned_var_column)
    returned_var = returned_var_column_row.T
    return returned_var

### Line & Word Segmentation

In [None]:
def save_image(img, folder, title):
    cv2.imwrite(f'./{folder}/{title}.png', img)
    
def projection(gray_img, axis:str='horizontal'):
    """ 
    Compute the horizontal or the vertical projection of a gray image 
    """
    if axis == 'horizontal':
        projection_bins = np.sum(gray_img, 1).astype('int32')
    elif axis == 'vertical':
        projection_bins = np.sum(gray_img, 0).astype('int32')

    return projection_bins

In [11]:
def projection_segmentation(clean_img, axis, cut=15, min_width=20, min_height=30):
    """Segment the image based on the projection profile

    Args:
        clean_img : Preprocessed image
        axis (str): 'horizontal' or 'vertical'
        cut (int, optional): Gap between the segments. Defaults to 3.
        min_width (int, optional): Width of the segment. Defaults to 5.
        min_height (int, optional): Height of the segment. Defaults to 5.

    Returns:
        _type_: _description_
    """
    segments = []
    start = -1
    cnt = 0

    projection_bins = projection(clean_img, axis)
    for idx, projection_bin in enumerate(projection_bins):

        if projection_bin != 0:
            cnt = 0
        if projection_bin != 0 and start == -1:
            start = idx
        if projection_bin == 0 and start != -1:
            cnt += 1
            if cnt >= cut:
                if axis == 'horizontal':
                    # Line segmentation
                    segment = clean_img[max(start-1, 0):idx, :]
                    # if segment.shape[0] >= min_height:                    
                    segments.append(segment)
                elif axis == 'vertical':
                    # Word segmentation
                    segment = clean_img[:, max(start-1, 0):idx]
                    # if segment.shape[1] >= min_width:
                    segments.append(segment)
                cnt = 0
                start = -1
    
    return segments

#### Line Segmentation

In [12]:
def line_horizontal_projection(image, cut=3): 
    lines = projection_segmentation(image, axis='horizontal', cut=cut)
    return lines

#### Word Segmentation

In [13]:
def word_vertical_projection(line_image, cut=3):
    line_words = projection_segmentation(line_image, axis='vertical', cut=cut)
    line_words.reverse()
    return line_words

In [14]:
def extract_words(img, visual=0):

    lines = line_horizontal_projection(img)
    words = []
    
    for idx, line in enumerate(lines):
        
        if visual:
            # Check for the size of the line to be greater than 30
            # if line.shape[0] > 30:
            save_image(line, 'lines', f'line{idx}')

        line_words = word_vertical_projection(line)
        for w in line_words:
            # if len(words) == 585:
            #     print(idx)
            words.append((w, line))
        # words.extend(line_words)

    # breakpoint()
    if visual:
        for idx, word in enumerate(words):
            # check for the size of the word to be greater than 30
            # print (word[0].shape)
            # if word[0].shape[0] < 100 and word[0].shape[1] > 20 :
            save_image(word[0], 'words', f'word{idx}')
    return words

# 🛠️ Feature Extraction

### HOG

In [15]:
def apply_hog(X_train_preprocess):
    X_train_hog = []
    for i in tqdm(X_train_preprocess):
        X_train_hog.append(hog(i, orientations= 16, pixels_per_cell=(32, 32), cells_per_block=(4, 4), block_norm='L2-Hys'))
    X_train_hog = np.array(X_train_hog)
    return X_train_hog

### SIFT

In [17]:
def apply_sift(X_train_preprocess):
    sift = cv2.SIFT_create()

    X_train_sift = []
    for i in tqdm(X_train_preprocess):
        kp, des = sift.detectAndCompute(i, None)
        if des is None:
            # Add a row of zeros to the SIFT descriptors
            des = np.zeros((1, 128))
        des = des.flatten()
        X_train_sift.append(des)
    return X_train_sift
    
# Pad the SIFT descriptors to the maximum length
def pad_sift_descriptors(X_train_sift, fixed_len):
    # Create a generator that yields each padded descriptor on-the-fly
    padded_descriptors = (np.pad(des, (0, max(0, fixed_len - des.shape[0])))[:fixed_len] for des in X_train_sift)

    # Convert the generator to a numpy array
    X_train_sift_np = np.array(list(padded_descriptors))
    return X_train_sift_np

In [18]:
# X_train_sift = apply_sift(X_train_preprocess)

# # Get the maximum keypoint length of the SIFT descriptors
# max_kp = max(len(kp)/128 for kp in X_train_sift)

# # Get the average keypoint length of the SIFT descriptors
# avg_kp = np.mean([len(kp)/128 for kp in X_train_sift])

# # Get the minimum keypoint length of the SIFT descriptors
# min_kp = min(len(kp)/128 for kp in X_train_sift)

# # Print the maximum keypoint length
# print(max_kp)

# # Print the average keypoint length
# print(avg_kp)

# # Print the minimum keypoint length
# print(min_kp)

##  ✘ Unsuccessful Feature Extraction Techniques

### EDM

#### EDM1

In [21]:
def Laplacian_filter(img):
    laplacian_filter = np.array([
        [-1,-1,-1],
        [-1,8,-1],
        [-1,-1,-1]
    ])
    edge_image = convolve2d(img, laplacian_filter)
    edge_image = np.where(edge_image > 0.5, edge_image, 0)
    edge_image = np.where(edge_image < 0.5, edge_image, 255)

    edge_image = 255 - edge_image
    return edge_image

In [22]:
def get_edm1_matrix(edge_image):
    edm_matrix = np.zeros((3,3))
    edge_image = np.pad(edge_image, 1, mode='constant', constant_values=1)
    for i in range(0, edge_image.shape[0]):
        for j in range(0, edge_image.shape[1]):
            if edge_image[i, j] == 0:
                edm_matrix[1,1] += 1
                if edge_image[i, j + 1] == 0:
                    edm_matrix[1,2] += 1
                if edge_image[i + 1, j + 1] == 0:
                    edm_matrix[2,2] += 1
                if edge_image[i + 1, j] == 0:
                    edm_matrix[2,1] += 1
                if edge_image[i + 1, j - 1] == 0:
                    edm_matrix[2,0] += 1
                if edge_image[i, j - 1] == 0:
                    edm_matrix[1,0] += 1
                if edge_image[i - 1, j - 1] == 0:
                    edm_matrix[0,0] += 1
                if edge_image[i - 1, j] == 0:
                    edm_matrix[0,1] += 1
                if edge_image[i - 1, j + 1] == 0:
                    edm_matrix[0,2] += 1
    return edm_matrix

#### EDM2

In [23]:
def sort_values(edm1_matrix):
    values = edm1_matrix.flatten()
    edm1_occurrences_sorted = {}
    edm1_occurrences_sorted[values[5]] = [5,3]
    if values[2] not in edm1_occurrences_sorted:
        edm1_occurrences_sorted[values[2]] = [2,6]
    else:
        edm1_occurrences_sorted[values[2]].extend([2,6])
    if values[1] not in edm1_occurrences_sorted:
        edm1_occurrences_sorted[values[1]] = [1,7]
    else:
        edm1_occurrences_sorted[values[1]].extend([1,7])
    if values[0] not in edm1_occurrences_sorted:
        edm1_occurrences_sorted[values[0]] = [0,8]
    else:
        edm1_occurrences_sorted[values[0]].extend([0,8])
    edm1_occurrences_sorted = dict(sorted(edm1_occurrences_sorted.items(), reverse=True))

    lst = []
    for key in edm1_occurrences_sorted:
        lst.extend(edm1_occurrences_sorted[key])
    return lst

In [24]:
def get_first_occurrence(neighboring_indices, edm1_occurrences_sorted_list):
    for idx in edm1_occurrences_sorted_list:
        if idx in neighboring_indices:
            return idx

In [25]:
def get_edm2_matrix(edge_image, edm1_matrix):
    edm2_matrix_flattened = np.zeros(9)
    edm1_occurrences_sorted_list = sort_values(edm1_matrix)
    edm2_matrix_flattened[4] = edm1_matrix[1,1]
    edge_image = np.pad(edge_image, 1, mode='constant', constant_values=1)
    for i in range(0, edge_image.shape[0]):
        for j in range(0, edge_image.shape[1]):
            neighboring_indices = []
            if edge_image[i, j] == 0:
                if edge_image[i, j + 1] == 0:
                    neighboring_indices.append(5)
                if edge_image[i - 1, j + 1] == 0:
                    neighboring_indices.append(2)
                if edge_image[i - 1, j] == 0:
                    neighboring_indices.append(1)
                if edge_image[i - 1, j - 1] == 0:
                    neighboring_indices.append(0)
                if edge_image[i, j - 1] == 0:
                    neighboring_indices.append(3)
                if edge_image[i + 1, j - 1] == 0:
                    neighboring_indices.append(6)
                if edge_image[i + 1, j] == 0:  
                    neighboring_indices.append(7)
                if edge_image[i + 1, j + 1] == 0:
                    neighboring_indices.append(8)

                first_occurrence = get_first_occurrence(neighboring_indices, edm1_occurrences_sorted_list)
                edm2_matrix_flattened[first_occurrence] += 1
    edm2_matrix = edm2_matrix_flattened.reshape(3,3)   
    return edm2_matrix

In [26]:
def apply_edm(X_train_preprocess):
    edge_images = [Laplacian_filter(img) for img in tqdm(X_train_preprocess)]
    
    edm1_matrices = [get_edm1_matrix(edge_img) for edge_img in tqdm(edge_images)]
    edm2_matrices = [get_edm2_matrix(edge_images[i], edm1_matrices[i]) for i in tqdm(range(len(edge_images)))]
    
    edm1_matrices = np.array(edm1_matrices)
    edm2_matrices = np.array(edm2_matrices)

    edm1_matrices = edm1_matrices.reshape(-1,9)
    edm2_matrices = edm2_matrices.reshape(-1,9)
    return edm1_matrices, edm2_matrices

In [27]:
# X_edm1, X_edm2 = apply_edm(X_train_preprocess)
# print(X_edm1.shape)
# print(X_edm2.shape)

In [28]:
# EDM_features = np.concatenate((X_edm1, X_edm2), axis=1)
# print(EDM_features.shape)

In [29]:
def apply_additional_edm_features(X_edm1, X_edm2):
    edges_direction = np.max(X_edm1, axis=1)
    edges_direction = edges_direction.reshape(-1,1)
    homogeneity = np.array([x/np.sum(x) for x in X_edm1])
    pixel_regularity = np.array([x/x[4] for x in X_edm1])
    edges_regularity = np.array([x / x[4] for x in X_edm2])
    return edges_direction, homogeneity, pixel_regularity, edges_regularity

In [30]:
# edges_direction, homogeneity, pixel_regularity, edges_regularity = apply_additional_edm_features(X_edm1, X_edm2)
# print(edges_direction.shape)
# print(homogeneity.shape)
# print(pixel_regularity.shape)
# print(edges_regularity.shape)

In [31]:
# edm_features = np.concatenate((edges_direction, homogeneity, pixel_regularity, edges_regularity), axis=1)

# ⚙️ Preprocessing Module

In [32]:
class Preprocessing():
    def __init__(self, preprocess_pipe):
        self.preprocess_pipe = preprocess_pipe
        
    def preprocess_data(self, X, test=False):
        fixed_len = 128 * 350
        X_preprocess = [preprocess(i) for i in tqdm(X)]
        X_preprocess = np.array(X_preprocess)
        X_hog = apply_hog(X_preprocess)
        X_sift = apply_sift(X_preprocess)
        X_sift_padded = pad_sift_descriptors(X_sift, fixed_len)
        X_features = np.concatenate((X_hog, X_sift_padded), axis=1)
        if test:
            X_features_transformed = self.preprocess_pipe.transform(X_features)
        else:
            X_features_transformed = self.preprocess_pipe.fit_transform(X_features)
            with open('preprocess_pipe.pkl', 'wb') as f:
                pickle.dump(self.preprocess_pipe, f)
        return X_features_transformed
    
    def preprocess_test_data(self, X):
        fixed_len = 128 * 350
        X_preprocess = preprocess(X)
        X_preprocess = [np.array(X_preprocess)]
        X_hog = apply_hog(X_preprocess)
        X_sift = apply_sift(X_preprocess)
        X_sift_padded = pad_sift_descriptors(X_sift, fixed_len)
        X_features = np.concatenate((X_hog, X_sift_padded), axis=1)
        X_features_transformed = self.preprocess_pipe.transform(X_features)
        return X_features_transformed

# 🚀 Model Development

##
<a id='4.11'></a>
<p style="font-size: 34px; color: #FFFFFF; /* Set text color to black */ 
  font-family: 'Roboto'; 
  text-align: center; 
  padding: 10px 20px; /* Add padding for spacing */
  background-image: linear-gradient(to right, #9746ff, #000000); 
  border-radius: 5px 5px;"><strong>PyTorch Model</strong></p>

In [33]:
class PyTorchClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim1, hidden_dim2, output_dim, learning_rate=0.0002 , epoch=50):
        super().__init__()
        self.input_dim = input_dim
        self.hidden_dim1 = hidden_dim1
        self.hidden_dim2 = hidden_dim2
        self.output_dim = output_dim
        self.best_accuracy = -1  # Initialize with a value that will definitely be improved upon
        self.learning_rate = learning_rate
        self.epoch = epoch
        self.model = self.create_model()

    def create_model(self):
        model = nn.Sequential(
            nn.Linear(self.input_dim, self.hidden_dim1),
            nn.ReLU(),
            nn.Linear(self.hidden_dim1, self.hidden_dim2),
            nn.ReLU(),
            nn.Linear(self.hidden_dim2, self.output_dim),
        )
        return model
        
    
    def fit(self, X_train_features, X_val_features, y_train_labels, y_val_labels, labels):
        y_train =  [labels.index(i) for i in y_train_labels]
        y_val = [labels.index(i) for i in y_val_labels]
        
        lb = LabelBinarizer()
        y_train_one_hot = lb.fit_transform(y_train)
        y_val_one_hot = lb.fit_transform(y_val)
        
        X_train_tensor = torch.FloatTensor(X_train_features)
        y_train_tensor = torch.LongTensor(y_train_one_hot)

        X_val_tensor = torch.FloatTensor(X_val_features)
        y_val_tensor = torch.LongTensor(y_val_one_hot)
        
        # Create a dataset
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        test_dataset = TensorDataset(X_val_tensor, y_val_tensor)

        # Create a dataloader
        train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)

        # Create a tqdm object
        progress_bar = tqdm(range(self.epoch), desc="Epoch", leave=False)

        for self.epoch in progress_bar:
            total_loss = 0
            for X_batch, y_batch in train_loader:
                optimizer.zero_grad()
                outputs = self.model(X_batch)
                loss = criterion(outputs, torch.max(y_batch, 1)[1])
                loss.backward()
                optimizer.step()
                total_loss += loss.item()

            # Calculate accuracy on the validation set
            val_acc = 0
            with torch.no_grad():
                for X_val, y_val in test_loader:
                    outputs = self.model(X_val)
                    _, predicted = torch.max(outputs, 1)
                    val_acc += (predicted == torch.max(y_val, 1)[1]).sum().item()
            accuracy = val_acc / len(y_val_tensor)

            # If the current model has better accuracy, save the model parameters
            if accuracy > self.best_accuracy:
                self.best_accuracy = accuracy
                self.best_model_state = deepcopy(self.model.state_dict(prefix="model."))

            # Update the progress bar
            progress_bar.set_postfix({'Loss': f'{total_loss:.4f}', 'Accuracy': f'{self.best_accuracy:.4f}'})
            
        # Save the best model parameters to the model
        self.save_best_model('best_model.pth')

    def predict(self, X):
        X_tensor = torch.FloatTensor(X)
        with torch.no_grad():
            predictions = self.model(X_tensor)
        _, predicted = torch.max(predictions, 1)
        return predicted.numpy()

    def save_best_model(self, filepath):
        torch.save(self.best_model_state, filepath)

## 💪 Model Training

In [36]:
labels = ['Scheherazade New', 'Marhey', 'Lemonada', 'IBM Plex Sans Arabic']

preprocess_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=0.99)),
])

In [None]:
X_data, y_labels, _ = load_images()

with open('X_data.pkl', 'wb') as f:
    pickle.dump(X_data, f)

with open('y_labels.pkl', 'wb') as f:
    pickle.dump(y_labels, f)

In [34]:
# with open('X_data.pkl', 'rb') as f:
#     X_data = pickle.load(f)

# with open('y_labels.pkl', 'rb') as f:
#     y_labels = pickle.load(f)

In [112]:
# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_data, y_labels, test_size=0.20, random_state=42, stratify=y_labels)

In [37]:
print(len(X_train), len(y_train))
print(len(X_val), len(y_val))

3197 3197
800 800


In [114]:
with open('X_train.pkl', 'wb') as f:
    pickle.dump(X_train, f)
        
with open('X_val.pkl', 'wb') as f:
    pickle.dump(X_val, f)
        
with open('y_train.pkl', 'wb') as f:
    pickle.dump(y_train, f)
        
with open('y_val.pkl', 'wb') as f:
    pickle.dump(y_val, f)

In [35]:
# with open('X_train.pkl', 'rb') as f:
#     X_train = pickle.load(f)
    
# with open('X_val.pkl', 'rb') as f:
#     X_val = pickle.load(f)
    
# with open('y_train.pkl', 'rb') as f:
#     y_train = pickle.load(f)
    
# with open('y_val.pkl', 'rb') as f:
#     y_val = pickle.load(f)

In [None]:
preprocess_module = Preprocessing(preprocess_pipe)
X_train_features = preprocess_module.preprocess_data(X_train)
with open('X_train_features.pkl', 'wb') as f:
    pickle.dump(X_train_features, f)
    
input_dim = X_train_features.shape[1]
print(input_dim)

In [None]:
X_val_features = preprocess_module.preprocess_data(X_val, test=True)
with open('X_val_features.pkl', 'wb') as f:
    pickle.dump(X_val_features, f)

In [None]:
# with open('preprocess_pipe.pkl', 'rb') as f:
#     preprocess_pipe = pickle.load(f)
    

In [None]:
# with open('X_train_features.pkl', 'rb') as f:
#     X_train_features = pickle.load(f)
    
# with open('X_val_features.pkl', 'rb') as f:
#     X_val_features = pickle.load(f)
    
# with open('y_train.pkl', 'rb') as f:
#     y_train = pickle.load(f)
        
# with open('y_val.pkl', 'rb') as f:
#     y_val = pickle.load(f)

In [None]:
pytorch_model = PyTorchClassifier(input_dim, 512, 256, len(labels), learning_rate=0.00025, epoch=50)
pytorch_model.fit(X_train_features, X_val_features, y_train, y_val, labels)

# 🚨 Testing

In [47]:
def evaluate(y_pred, y_test):
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy: {accuracy*100:.4f}%')

In [None]:
X_test = []
y_test = []
for i in tqdm(labels):
    for filename in os.listdir(f'content/train/{i}'):
        img = cv2.imread(f'content/train/{i}/{filename}', cv2.IMREAD_GRAYSCALE)
        X_test.append(img)
        y_test.append(i)

In [None]:
y_test =  [labels.index(i) for i in y_test]

In [49]:
with open('preprocess_pipe.pkl', 'rb') as f:
    preprocess_pipe = pickle.load(f)

PyTorchClassifier(
  (model): Sequential(
    (0): Linear(in_features=2981, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=4, bias=True)
  )
)

In [None]:
preprocess_module = Preprocessing(preprocess_pipe)
X_test_features_transformed = preprocess_module.preprocess_data(X_test, test=True)

pytorch_classifier = PyTorchClassifier(input_dim, 512, 256, len(labels), learning_rate=0.00025, epoch=50)
pytorch_classifier.load_state_dict(torch.load("best_model.pth"))
pytorch_classifier.eval()

In [None]:
print("Model's state_dict:")
for param_tensor in pytorch_classifier.state_dict():
    print(param_tensor, "\t", pytorch_classifier.state_dict()[param_tensor].size())

In [None]:
y_pred = pytorch_classifier.predict(X_test_features_transformed)

In [55]:
accuracy = evaluate(y_pred, y_test)

Accuracy: 96.5000%


#  📊 Performance Analysis

In [56]:
models_accuracy = {
    "PyTorch Neural Network": 96.5,
    "Stacking Classifier": 96,
    "Logistic Regression": 95.5,
    "MLP Classifier": 92.75,
    "SVM": 91.25,
}

In [57]:
models = pd.DataFrame(models_accuracy.items(), columns=['Model', 'Score'])
models = models.sort_values(by = 'Score', ascending = False)

px.bar(data_frame = models, x = 'Score', y = 'Model', color = 'Score', template = 'plotly_dark', title = 'Models Comparison')

### Multilayer Perceptron

In [None]:
# # Build a shallow neural network model of 1 hidden layer with 256 neurons and relu activation function
# # Create a model of 1 hidden layer with 256 neurons and relu activation function and adam solver and softmax output layer
# model = MLPClassifier(hidden_layer_sizes=(256,), activation='relu', solver='adam', verbose=True)

# # Fit the model
# model.fit(X_train_transformed, y_train)

# # Predict the training data
# y_train_pred = model.predict(X_train_transformed)

# # Predict the testing data
# y_test_pred = model.predict(X_test_transformed)

# # Print the accuracy of the model
# print('Train accuracy: ', accuracy_score(y_train, y_train_pred)*100)

# print('Test accuracy: ', accuracy_score(y_test, y_test_pred)*100)

# # Print the classification report
# print('Train classification report: ', classification_report(y_train, y_train_pred, target_names=labels))

# print('Test classification report: ', classification_report(y_test, y_test_pred, target_names=labels))

# # Accuracy: 92.75%

### Logistic Regression

In [None]:
# lin_model = LogisticRegression()

# # Define the hyperparameters
# param_grid = {
#     'C': np.logspace(-4, 4, 30),
#     'penalty': ['l2'],
#     'solver': ['liblinear', 'saga', 'lbfgs'],
#     'warm_start': [True, False]
# }

# # Initialize the RandomizedSearchCV
# random_search = RandomizedSearchCV(lin_model, param_distributions=param_grid, n_iter=100, cv=5, verbose=2, random_state=42, n_jobs=-1)

# # Fit the model
# random_search.fit(X_train_transformed, y_train)
# relevant_columns = ['param_C', 'param_penalty', 'param_solver', 'param_warm_start', 'mean_test_score', 'std_test_score', 'rank_test_score']
# cv_results_df = pd.DataFrame(random_search.cv_results_)[relevant_columns].round(decimals=3).sort_values(by='rank_test_score')
# cv_results_df.head(10)

# # Print the best parameters
# print(random_search.best_params_)

# Best parameters of Logistic Regression: 
# {'warm_start': True, 'solver': 'saga', 'penalty': 'l2', 'C': 0.001}
# {'warm_start': True, 'solver': 'lbfgs', 'penalty': 'l2', 'C': 0.0006723357536499335}

In [None]:
# # Initialize the Logistic Regression model
# model = LogisticRegression(warm_start=True, solver='saga', penalty='l2', C=0.8, random_state=42)

# # Fit the model
# model.fit(X_train_transformed, y_train)

# # Predict the test data
# y_pred = model.predict(X_test_transformed)

# # Predict the train data
# y_pred_train = model.predict(X_train_transformed)

# # Print the training accuracy
# print(f"Training Accuracy: {accuracy_score(y_train, y_pred_train)*100}")

# # Print the testing accuracy
# print(f"Testing Accuracy: {accuracy_score(y_test, y_pred)*100}")

# # Print the f1 score of the training data
# print(f"Training F1 Score: {f1_score(y_train, y_pred_train, average='weighted')}")

# # Print the f1 score of the testing data
# print (f"Testing F1 Score: {f1_score(y_test, y_pred, average='weighted')}")

# # Print the classification report of the training data
# print(classification_report(y_train, y_pred_train, target_names=labels))

# # Print the classification report of the testing data
# print(classification_report(y_test, y_pred, target_names=labels))

# # Save the model
# joblib.dump(model, 'logistic_model.pkl')

# # Accuracy: 95.5%

### Support Vector Machine

In [None]:
# svm = SVC()

# param_dist = {
#     'C': np.logspace(-3, 3, 15), 
#     'kernel': ['poly', 'rbf'], 
#     'degree': [2, 3],
#     'gamma': ['scale', 'auto']
# }

# clf_searched = RandomizedSearchCV(svm, param_dist, n_iter=100, cv=5, random_state=42, n_jobs=-1, verbose=1)

# clf_searched.fit(X_train_transformed, y_train)
# relevant_columns = ['param_C', 'param_kernel', 'param_gamma', 'mean_test_score', 'std_test_score', 'rank_test_score']
# cv_results_df = pd.DataFrame(clf_searched.cv_results_)[relevant_columns].round(decimals=3).sort_values(by='rank_test_score')
# cv_results_df.head(10)

# Best parameters of the SVM
# {C : 7.196857 , kernel : 'rbf', gamma : 'scale'}

In [None]:
# # Initialize the SVM model
# svm = SVC(C=7.196857 , kernel='rbf', gamma='scale', random_state=42)

# # Fit the SVM on the training data
# svm.fit(X_train_transformed, y_train)

# # Predict the labels of the test set
# y_pred = svm.predict(X_test_transformed)
# # Predict the labels of the training set
# y_pred_train = svm.predict(X_train_transformed)

# # Print the accuracy of the SVM model on the training set
# print(f"Accuracy of SVM model on the training set: {accuracy_score(y_train, y_pred_train)*100}")

# # Print the accuracy of the SVM model on the test set
# print(f"Accuracy of SVM model: {accuracy_score(y_test, y_pred)*100}")

# # Print the classification report of the SVM model
# print(classification_report(y_train, y_pred_train, target_names=labels))

# # Print the classification report of the SVM model
# print(classification_report(y_test, y_pred, target_names=labels))

# # Save the SVM model as h5 file
# joblib.dump(svm, 'svm_model.pkl')

# # Accuracy: 91.25%