In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix, classification_report

# Load the images and labels
images_file = 'images.csv'
labels_file = 'labels.csv'

x = pd.read_csv(images_file, header=None).values
y = pd.read_csv(labels_file, header=None).values.flatten()  # Flatten to 1D array

# Adjust labels if needed (ensure labels are in range [0, 27])
if np.max(y) >= 28:
    y -= 1  

# EDA: Visualize the distribution of the labels
plt.figure(figsize=(10, 6))
sns.countplot(y)
plt.title('Distribution of Labels')
plt.xlabel('Label')
plt.ylabel('Count')
plt.show()

# EDA: Display a few sample images from each class
plt.figure(figsize=(12, 12))
for i in range(0, 28):
    plt.subplot(7, 4, i + 1)
    sample_image = x[y == i][0].reshape(32, 32)
    plt.imshow(sample_image, cmap='gray')
    plt.title(f'Label: {i}')
    plt.axis('off')
plt.tight_layout()
plt.show()

# Normalize images to [0, 1]
x = x / 255.0

# Reshape images to 32x32x1 (grayscale)
x = x.reshape(-1, 32, 32, 1)

# Split data into training and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Data Augmentation
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=False,
    fill_mode='nearest'
)
datagen.fit(x_train)

# Start Building the model
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 1)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(28, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
    tf.keras.callbacks.ModelCheckpoint('arabic_letters_model.keras', save_best_only=True)
]

# Train the model
history = model.fit(
    datagen.flow(x_train, y_train, batch_size=32),
    epochs=100,  # Adjust number of epochs as needed
    validation_data=(x_test, y_test),
    callbacks=callbacks
)

# Load the best model
model = tf.keras.models.load_model('arabic_letters_model.keras')

# Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print(f'Loss: {loss}')
print(f'Accuracy: {accuracy}')

# Generate predictions and create the confusion matrix
y_pred = model.predict(x_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Create confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred_classes)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()

# Optionally, print a classification report for more detailed metrics
print(classification_report(y_test, y_pred_classes))


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Loss: 0.1817522644996643
Accuracy: 0.9430803656578064
