In [5]:
# Load the required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image as im
from tqdm import tqdm
import cv2
from tqdm import tqdm
from scipy.ndimage import rotate
import joblib
from sklearn.metrics import accuracy_score,f1_score,classification_report
import os

In [6]:
labels = [ 'Scheherazade New' , 'Marhey' , 'Lemonada' , 'IBM Plex Sans Arabic']
image_size = 600
def show_images(images,titles=None):
    #This function is used to show image(s) with titles by sending an array of images and an array of associated titles.
    # images[0] will be drawn with the title titles[0] if exists
    # You aren't required to understand this function, use it as-is.
    n_ims = len(images)
    if titles is None: titles = ['(%d)' % i for i in range(1,n_ims + 1)]
    fig = plt.figure()
    n = 1
    for image,title in zip(images,titles):
        a = fig.add_subplot(1,n_ims,n)
        if image.ndim == 2: 
            plt.gray()
        plt.imshow(image)
        a.set_title(title)
        plt.axis('off')
        n += 1
    fig.set_size_inches(np.array(fig.get_size_inches()) * n_ims)
    plt.show() 

In [7]:
# Load the images from fonts-dataset folder
def load_images():
    # Load the images from the fonts-dataset folder
    images_train = []
    labels_train = []
    filenames = []
    empty_images_filenames = ["360.jpeg","627.jpeg","853.jpeg"] 
    # Use tqdm to show a progress bar
    for i in tqdm(labels):
        for filename in os.listdir(f'fonts-dataset/{i}'):
            img = cv2.imread(f'fonts-dataset/{i}/{filename}', cv2.IMREAD_GRAYSCALE)
            # img = cv2.resize(img, (image_size, image_size))
            if i == "Lemonada" and filename in empty_images_filenames:
                print(filename)
                print("empty image")
                continue
            images_train.append(img)
            labels_train.append(i)
            filenames.append(filename)
    return images_train, labels_train,filenames



In [8]:
# Load the images
X_train, y_train_org, filenames = load_images()
# Change the y_train to numbers
y_train_org = [labels.index(i) for i in y_train_org]

 50%|█████     | 2/4 [00:15<00:15,  7.78s/it]

360.jpeg
empty image
627.jpeg
empty image
853.jpeg
empty image


100%|██████████| 4/4 [00:29<00:00,  7.46s/it]


In [9]:
def find_score(arr, angle):
    """
    Find the score of the skew angle to be used in deskewing the image
    
    Args:
    arr: the image array
    angle: the angle to rotate the image by
    
    Returns:
    hist: the histogram of the image
    score: the score of the skew angle
    """
    
    # mode{‘reflect’, ‘grid-mirror’, ‘constant’, ‘grid-constant’, ‘nearest’, ‘mirror’, ‘grid-wrap’, ‘wrap’}
    data = rotate(arr, angle, reshape=False, order=0, mode='constant', cval=0, prefilter=False)
    hist = np.sum(data, axis=1)
    score = np.sum((hist[1:] - hist[:-1]) ** 2)
    return hist, score

def rotate_image(image, angle):
    """
    Rotates an image by a given angle and fills the remaining pixels with white color.

    Args:
        image: A NumPy array representing the input image.
        angle: The rotation angle in degrees.

    Returns:
        A new NumPy array representing the rotated image.
    """
    # Get image height and width
    height, width = image.shape[:2]

    # Compute the rotation matrix
    rotation_matrix = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1)

    # Perform the rotation and fill the remaining pixels with white color
    rotated_image = cv2.warpAffine(image, rotation_matrix, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(1, 1, 1))

    return rotated_image

def deskew(binary_img):
    """
    Deskew the image
    
    Args:
    binary_img: the binary image
    
    Returns:
    pix: the deskewed image
    """
    bin_img = (binary_img // 255.0)
    # angles to check for skew angle = 45 degrees and 90 degrees and 180
    angles = np.array ([0 , 45 , 90 , 135 , 180 , 225 , 270 , 315])
    scores = []
    for angle in angles:
        hist, score = find_score(bin_img, angle)
        scores.append(score)

    best_score = max(scores)
    best_angle = angles[scores.index(best_score)]
    # print('Best angle: {}'.format(best_angle))

    # correct skew
    # data = rotate(bin_img, best_angle, reshape=False, order=0)
    data = rotate_image(bin_img, best_angle)
    img = im.fromarray((255 * data).astype("uint8"))

    pix = np.array(img)
    return pix

def preprocess(img):
    """
    Preprocess the image
    
    Args:
    img: the image
    
    Returns:
    img: the preprocessed image
    """
    sharpen_kernel = np.array([[0,-1, 0], [-1,5,-1], [0,-1,0]])
    img = cv2.medianBlur(img, 3) # To remove Salt and Pepper noise
    img = cv2.filter2D(img, -1, sharpen_kernel)  # Sharpen the image
    img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] # Convert the image to binary
    deskewed_img = deskew(img) # Deskew the image
    final_img = cv2.bitwise_not(deskewed_img) if np.mean(deskewed_img) > 127 else deskewed_img # Invert the image if the mean is less than 127 
    final_img = cv2.resize(final_img, (image_size, image_size)) # Resize the image
    return final_img

In [10]:
import torch
# Load pipeline
pipeline = joblib.load('pipeline.pkl')

# Load the pytorch_model.pth
model_state_dict = torch.load('best_model.pth')

In [36]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.preprocessing import LabelBinarizer

class PyTorchClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, input_dim, hidden_dim1, hidden_dim2, output_dim , learning_rate=0.0002 , epoch=15):
        self.input_dim = input_dim
        self.hidden_dim1 = hidden_dim1
        self.hidden_dim2 = hidden_dim2
        self.output_dim = output_dim
        self.best_accuracy = -1  # Initialize with a value that will definitely be improved upon
        self.learning_rate = learning_rate
        self.epoch = epoch
        self.model = self.create_model()

    def create_model(self):
        model = nn.Sequential(
            nn.Linear(self.input_dim, self.hidden_dim1),
            nn.ReLU(),
            nn.Linear(self.hidden_dim1, self.hidden_dim2),
            nn.ReLU(),
            nn.Linear(self.hidden_dim2, self.output_dim),
        )
        return model
    
    def fit(self, X_train, y_train):
        lb = LabelBinarizer()
        y_train_one_hot = lb.fit_transform(y_train)
        # y_val_one_hot = lb.fit_transform(y_val)

        X_train_tensor = torch.FloatTensor(X_train)
        y_train_tensor = torch.LongTensor(y_train_one_hot)

        # X_val_tensor = torch.FloatTensor(X_val)
        # y_val_tensor = torch.LongTensor(y_val_one_hot)
        
        # Create a dataset
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        # test_dataset = TensorDataset(X_val_tensor, y_val_tensor)

        # Create a dataloader
        train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
        # test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(self.model.parameters(), lr= self.learning_rate)

        # Create a tqdm object
        progress_bar = tqdm(range(self.epoch), desc="Training", leave=False)

        for epoch in progress_bar:
            total_loss = 0
            for X_batch, y_batch in train_loader:
                optimizer.zero_grad()
                outputs = self.model(X_batch)
                loss = criterion(outputs, torch.max(y_batch, 1)[1])
                loss.backward()
                optimizer.step()
                total_loss += loss.item()

            # Calculate accuracy on the validation set
            # val_outputs = self.model(X_val_tensor)
            # val_predictions = torch.max(val_outputs, 1)[1]
            # accuracy = (val_predictions == torch.max(y_val_tensor, 1)[1]).sum().item() / len(y_val_tensor)
            
            accuracy = accuracy_score(y_train, self.predict(X_train))

            # If the current model has better accuracy, save the model parameters
            # if accuracy > self.best_accuracy:
            #     self.best_accuracy = accuracy
            #     self.best_model_params = self.model.state_dict()

            # Update the progress bar
            progress_bar.set_postfix({'Loss': f'{total_loss:.4f}', 'Accuracy': f'{accuracy:.4f}'})

    def predict(self, X):
        X_tensor = torch.FloatTensor(X)
        with torch.no_grad():
            # Set the best_model_params to the model
            predictions = self.model(X_tensor)
        _, predicted = torch.max(predictions, 1)
        return predicted.numpy()

    def save_best_model(self, filepath):
        torch.save(self.best_model_params, filepath)
        
    def load_model(self, model_state_dict):
        self.model.load_state_dict(model_state_dict)


In [37]:
from skimage.feature import hog
def forward(img):
    """
    Forward the image through the pipeline
    
    Args:
    img: the image
    
    Returns:
    pred: the prediction of the image
    """
    img = preprocess(img)
    # Get HOG features
    hog_features = hog(img,orientations= 16, pixels_per_cell=(32, 32), cells_per_block=(4, 4), block_norm= 'L2-Hys')
    hog_features = np.array(hog_features)
    
    # Get SIFT features
    sift_obj = cv2.SIFT_create()
    kp, des = sift_obj.detectAndCompute(img, None)
    if des is None:
        # Add a row of zeros to the SIFT descriptors
        des = np.zeros((1, 128))
    des = des.flatten()
    
    fixed_length = 128 * 350
    # If the length of the SIFT descriptors is less than 128 * 350, pad the descriptors with zeros to make the length 128 * 350 , if it is greater than 128 * 350, truncate the descriptors to make the length 128 * 350
    if len(des) < fixed_length:
        padded_des = np.zeros(fixed_length - len(des))
        des = np.concatenate((des, padded_des))
    else:
        des = des[:fixed_length]
    padded_des = des
    
    padded_des = np.array(padded_des)
    features = np.concatenate((hog_features, padded_des))
    # Apply pipeline
    features = pipeline.transform([features])
    # Initialize the model
    model = PyTorchClassifier(input_dim=features.shape[1], hidden_dim1=512, hidden_dim2=256, output_dim=4)
    # Create the model
    model.create_model()
    # Load the model
    model.load_model(model_state_dict)
    
    
    
    # Predict the image
    pred = model.predict(features)
    return pred

In [38]:
# Get a random batch of images

def get_random_batch(X_train, y_train, batch_size=200):
    """
    Get a random batch of images
    
    Args:
    X_train: the images
    y_train: the labels
    batch_size: the batch size
    
    Returns:
    X_batch: the batch of images
    y_batch: the batch of labels
    """
    indices = np.random.choice(len(X_train), batch_size)
    X_batch = [X_train[i] for i in indices]
    y_batch = [y_train[i] for i in indices]
    return X_batch, y_batch

In [39]:
# Use get_random_batch to get a batch of images
X_batch, y_batch = get_random_batch(X_train, y_train_org)

In [40]:
# Count all the wrong predictions
wrong_predictions = 0
# Count the number of images
num_images = 0
# Count the number of correct predictions
correct_predictions = 0

# Loop through all the images
for i in tqdm(range(len(X_batch))):
    # Get the prediction of the image
    pred = forward(X_batch[i])
    # Get the actual label of the image
    actual_label = y_batch[i]
    # Increment the number of images
    num_images += 1
    # If the prediction is correct, increment the number of correct predictions
    if pred == actual_label:
        correct_predictions += 1
    # If the prediction is wrong, increment the number of wrong predictions
    else:
        wrong_predictions += 1
        
# Print the accuracy
print(f'Accuracy: {correct_predictions / num_images * 100}%')

# Print the number of wrong predictions
print(f'Number of wrong predictions: {wrong_predictions}')

# Print the number of correct predictions
print(f'Number of correct predictions: {correct_predictions}')

100%|██████████| 200/200 [02:13<00:00,  1.49it/s]

Accuracy: 100.0%
Number of wrong predictions: 0
Number of correct predictions: 200





In [41]:
# Calculate the time taken to preprocess an image and make a prediction try it on the X_batch and take average time
import time
times = []
for i in tqdm(range(len(X_batch))):
    start = time.time()
    forward(X_batch[i])
    end = time.time()
    times.append(end - start)
    
# Calculate the average time
average_time = np.mean(times)
print(f'Average time taken to preprocess an image and make a prediction: {average_time} seconds')

100%|██████████| 200/200 [02:12<00:00,  1.51it/s]

Average time taken to preprocess an image and make a prediction: 0.6602267432212829 seconds



