In [1]:
import numpy as np
import pandas as pd
import cv2
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical

In [2]:
# Path to your dataset directory
csv_directory = 'D:/Documents/Thesis/Code/DataSet/english.csv'

# List all image files in the dataset directory
data = pd.read_csv(csv_directory)

In [3]:
# Extract the image paths and corresponding labels
image_paths = data['image'].values
labels = data['label'].values

In [4]:
# Clean and preprocess label data
unique_labels = np.unique(labels)
label_mapping = {label: index for index, label in enumerate(unique_labels)}


In [5]:
# Map labels to numerical values
clean_labels = []
clean_image_paths = []
for i in range(len(labels)):
    try:
        label = labels[i]
        clean_labels.append(label_mapping[label])
        clean_image_paths.append(image_paths[i])
    except KeyError:
        print(f"Ignoring sample: {image_paths[i]} with label: {label}")


In [6]:
# Convert the labels to numerical values
labels = np.array(clean_labels)
image_paths = np.array(clean_image_paths)

# Split the dataset into training and testing sets
train_paths, test_paths, train_labels, test_labels = train_test_split(
    image_paths, labels, test_size=0.2, random_state=42)

# Preprocess the training images
train_images = []

In [7]:
for image_path in train_paths:
    # Read the image
    image = cv2.imread(image_path)
    
    # Validate the image
    if image is None:
        print(f"Unable to read image: {image_path}")
        continue
    
    # Resize the image to a fixed size
    image = cv2.resize(image, (32, 32))
    
    # Validate the image size
    if image.size == 0:
        print(f"Empty image: {image_path}")
        continue
    
    # Normalize the pixel values to [0, 1]
    image = image / 255.0
    
    train_images.append(image)


In [8]:
# Convert the training images to NumPy array
train_images = np.array(train_images)


In [9]:
# Preprocess the testing images
test_images = []
for image_path in test_paths:
    # Read the image
    image = cv2.imread(image_path)
    
    # Validate the image
    if image is None:
        print(f"Unable to read image: {image_path}")
        continue
    
    # Resize the image to a fixed size
    image = cv2.resize(image, (32, 32))
    
    # Validate the image size
    if image.size == 0:
        print(f"Empty image: {image_path}")
        continue
    
    # Normalize the pixel values to [0, 1]
    image = image / 255.0
    
    test_images.append(image)

In [10]:
# Convert the testing images to NumPy array
test_images = np.array(test_images)

In [11]:
# Determine the number of classes in your dataset
num_classes = len(np.unique(labels))


In [12]:
# Convert the labels to one-hot encoded vectors
train_labels = to_categorical(train_labels, num_classes)
test_labels = to_categorical(test_labels, num_classes)


In [13]:
# Build the model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))


In [14]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [15]:
# Train the model
model.fit(train_images, train_labels, epochs=10, batch_size=32, validation_data=(test_images, test_labels))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1632471c040>

In [16]:
# Evaluate the model
loss, accuracy = model.evaluate(test_images, test_labels)
print('Test Loss:', loss)
print('Test Accuracy:', accuracy)

Test Loss: 1.7094470262527466
Test Accuracy: 0.6348973512649536


In [17]:
# Make predictions
predictions = model.predict(test_images)



In [18]:
# Convert predictions to class labels
predicted_labels = np.argmax(predictions, axis=1)

# Convert one-hot encoded labels to actual labels
actual_labels = np.argmax(test_labels, axis=1)


In [19]:
# Display the predicted labels and actual labels
for i in range(len(predicted_labels)):
    predicted_label = predicted_labels[i]
    actual_label = actual_labels[i]
    print(f"Predicted: {predicted_label}, Actual: {actual_label}")

Predicted: 53, Actual: 57
Predicted: 22, Actual: 32
Predicted: 45, Actual: 3
Predicted: 38, Actual: 12
Predicted: 0, Actual: 50
Predicted: 55, Actual: 55
Predicted: 32, Actual: 32
Predicted: 26, Actual: 26
Predicted: 51, Actual: 41
Predicted: 26, Actual: 26
Predicted: 53, Actual: 29
Predicted: 40, Actual: 59
Predicted: 54, Actual: 3
Predicted: 46, Actual: 20
Predicted: 18, Actual: 1
Predicted: 54, Actual: 54
Predicted: 32, Actual: 32
Predicted: 0, Actual: 24
Predicted: 17, Actual: 17
Predicted: 29, Actual: 29
Predicted: 48, Actual: 48
Predicted: 54, Actual: 28
Predicted: 48, Actual: 48
Predicted: 48, Actual: 48
Predicted: 0, Actual: 0
Predicted: 12, Actual: 16
Predicted: 39, Actual: 41
Predicted: 53, Actual: 42
Predicted: 16, Actual: 16
Predicted: 61, Actual: 59
Predicted: 47, Actual: 18
Predicted: 7, Actual: 19
Predicted: 24, Actual: 50
Predicted: 35, Actual: 2
Predicted: 50, Actual: 50
Predicted: 0, Actual: 0
Predicted: 7, Actual: 7
Predicted: 53, Actual: 53
Predicted: 59, Actual: 59