In [None]:
import os
import numpy as np
import cv2
import preprocessing
import feature_extraction 
from sklearn.neighbors import KNeighborsClassifier
from joblib import dump, load  # For model persistence
import random

In [None]:
def load_images_from_folder(folder, num_images_per_class=50):
    images = {}
    for class_name in os.listdir(folder):
        class_path = os.path.join(folder, class_name)
        # Get a list of all filenames in the class_path directory
        all_files = os.listdir(class_path)
        # Choose random filenames from the list
        random_files = random.sample(all_files, min(num_images_per_class, len(all_files)))
        print(class_name)
        category_images = []
        for filename in random_files:
            image_path = os.path.join(class_path, filename)
            image = cv2.imread(image_path)
            image = preprocessing.preprocess(image)
            category_images.append(image)
        
        images[class_name] = category_images
    
    return images

# Usage example
data_dir = "fonts-dataset"
num_images_per_class = 50
classes_dic = {"IBM Plex Sans Arabic": 0, "Lemonada": 1, "Marhey": 2, "Scheherazade New": 3}

images = load_images_from_folder(data_dir, num_images_per_class)
test = load_images_from_folder(data_dir, 10)
print("Loading data is done ........")

In [None]:
# Load kmeans centers
centers = feature_extraction.kmeans_centers()

# Create histograms for each class_name for each image in images
X_train = []
Y_train = []
for class_name, class_images in images.items():
    for image in class_images:
        X_train.append(feature_extraction.histogram_from_sift(image, centers))
        Y_train.append(classes_dic[class_name])

# Create histograms for each class_name for each image in test
X_test = []
Y_test = []
for class_name, class_images in test.items():
    for image in class_images:
        X_test.append(feature_extraction.histogram_from_sift(image, centers))
        Y_test.append(classes_dic[class_name])

In [None]:
print(X_train[0])

# Step 2: Train kNN Model
knn_model = KNeighborsClassifier(n_neighbors=5)  # Example: k=5
knn_model.fit(X_train, Y_train)

# Step 3: Save Trained Model
dump(knn_model, 'models/knn_model.pkl')

In [None]:
# Step 3: Make Predictions
predicted_labels = knn_model.predict(X_train)

# Step 4: Calculate Accuracy
accuracy = np.mean(predicted_labels == Y_train)
print("Accuracy:", accuracy)

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Train SVM model
svm_model = SVC(kernel='rbf')  # You can adjust the kernel as needed
svm_model.fit(X_train, Y_train)

# Make predictions on test data
svm_predictions = svm_model.predict(X_train)

# Calculate accuracy
svm_accuracy = accuracy_score(Y_train, svm_predictions)
print("SVM Accuracy:", svm_accuracy)
dump(svm_model, 'models/svm_model.pkl')

In [None]:
from sklearn.naive_bayes import GaussianNB

# Train Naive Bayes model
nb_model = GaussianNB()
nb_model.fit(X_train, Y_train)

# Make predictions on test data
nb_predictions = nb_model.predict(X_train)

# Calculate accuracy
nb_accuracy = accuracy_score(Y_train, nb_predictions)
print("Naive Bayes Accuracy:", nb_accuracy)

In [None]:
# Shallow neural network classifier (2 hidden layers)
from sklearn.neural_network import MLPClassifier
# 2 hidden layers with 100 and 50 neurons
nn_model = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=1000)
nn_model.fit(X_train, Y_train)

# Save NN Model
dump(nn_model, 'models/nn_model.pkl')

# Load NN Model
nn_model = load('models/nn_model.pkl')

# Predict Test Data
Y_pred = nn_model.predict(X_train)

# Calculate Accuracy
accuracy = accuracy_score(Y_train, Y_pred)
print("Accuracy: %", accuracy * 100)