# Libraries

In [3]:
## Directories
import random
import shutil
##
import pandas as pd
import numpy as np 
import os
## Graphs
import matplotlib.pyplot as plt
import seaborn as sns
# TensorFlow
import tensorflow as tf
from tensorflow.keras import models, layers
# Keras
from keras.callbacks import EarlyStopping
## from keras.applications.vgg16 import preprocess_input, decode_predictions
# Image processing
from keras.preprocessing import image
# Metrics
from sklearn.metrics import classification_report, confusion_matrix
# Time
import time

## Import Functions

%run Functions.ipynb

## Move photos to Train, Validation and Test

### Define directorys

In [4]:
# Define the main directory
main_directory = "Esquerda\Images_Folder"

# Define the subdirectories
## Letters of the alphabet
letters = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
## Data set is divided in 3: Train, Validation, and Test
datasets = ['Train', 'Test', 'Validation']
## Dictionary for the predictions given they return values from 0 to 25
mapping_dict = {
    0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I', 9: 'J',
    10: 'K', 11: 'L', 12: 'M', 13: 'N', 14: 'O', 15: 'P', 16: 'Q', 17: 'R', 18: 'S',
    19: 'T', 20: 'U', 21: 'V', 22: 'W', 23: 'X', 24: 'Y', 25: 'Z'
}

### 70% - Train, 15% - Validation, 15% - Test

In [5]:
# Define the percentage splits
train_percentage = 0.7
validation_percentage = 0.15
test_percentage = 0.15

### Distribution

#### Make directories

In [10]:
for dataset in datasets:
    for letter in letters:
        # Create destination directories if they don't exist
        destination_dir = os.path.join(main_directory, dataset, letter)
        os.makedirs(destination_dir, exist_ok=True)

#### Image Distribution

In [12]:
test_letter_count = []
total_letter_count = 0

loop = 0
for dataset in datasets:
    loop += 1
    for letter in letters:

        # Source directory for the letters
        source_dir = os.path.join(main_directory, letter)

        # List files in the source directory
        files = os.listdir(source_dir)
        num_files = len(files)

        # Calculate the number of files for each destination
        train_count = int(num_files * train_percentage)
        test_count = int(num_files * test_percentage)
        validation_count = num_files - train_count - test_count
        total_letter_count += num_files

        # Shuffle the files
        random.shuffle(files)

        # Move files to their respective destinations based on percentages
        for i, file in enumerate(files):
            if i < train_count:
                dest = os.path.join(main_directory, 'Train', letter, file) 
            elif i < train_count + validation_count:
                dest = os.path.join(main_directory, 'Validation', letter, file)
            else:
                dest = os.path.join(main_directory, 'Test', letter, file)
            shutil.move(os.path.join(source_dir, file), dest)

        if loop == 1:
            test_letter_count.append(test_count)
            print('------ LETTER', letter, '------')
            print('Total Number:',num_files, 'Train count:', train_count, 'Validation count:', validation_count, 'Test count:', test_count)

------ LETTER A ------
Total Number: 0 Train count: 0 Validation count: 0 Test count: 0
------ LETTER B ------
Total Number: 0 Train count: 0 Validation count: 0 Test count: 0
------ LETTER C ------
Total Number: 0 Train count: 0 Validation count: 0 Test count: 0
------ LETTER D ------
Total Number: 0 Train count: 0 Validation count: 0 Test count: 0
------ LETTER E ------
Total Number: 0 Train count: 0 Validation count: 0 Test count: 0
------ LETTER F ------
Total Number: 0 Train count: 0 Validation count: 0 Test count: 0
------ LETTER G ------
Total Number: 0 Train count: 0 Validation count: 0 Test count: 0
------ LETTER H ------
Total Number: 0 Train count: 0 Validation count: 0 Test count: 0
------ LETTER I ------
Total Number: 0 Train count: 0 Validation count: 0 Test count: 0
------ LETTER J ------
Total Number: 0 Train count: 0 Validation count: 0 Test count: 0
------ LETTER K ------
Total Number: 0 Train count: 0 Validation count: 0 Test count: 0
------ LETTER L ------
Total Num

### Total number of photos

In [None]:
total_letter_count

# Augmentation

In [None]:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2, #lay down ESCREVER 
    zoom_range=0.2,
    fill_mode='nearest', # Strategy used for filling in newly created pixels, which can appear after a rotation or a width/height shift.
    rescale=1./255 # Our original images consist in RGB coefficients in the 0-255, but such values would be too high for our models to process (given a typical learning rate), so we target values between 0 and 1 instead by scaling with a 1/255. factor.
    #horizontal_flip=True
)

### Train

In [None]:
train_generator = train_datagen.flow_from_directory(
    directory=r"./Images_Folder/Train",
    target_size=(150, 150),
    batch_size=20,
    #color_mode='grayscale',
    class_mode="categorical",
    #classes=letters,
    shuffle=True,
    seed=42
)

In [None]:
count_images_generator(train_generator)

In [None]:
images_train, labels_traind = next(train_generator)  # Load a batch of images and labels
plt.imshow(image.array_to_img(images_train[1]))

### Validate

In [None]:
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255
)

In [None]:
validation_generator = test_datagen.flow_from_directory(
    directory=r"./Images_Folder/Validation",
    target_size=(150, 150),
    batch_size=20,
    class_mode="categorical",
    #classes=letters,
    shuffle=False,
    seed=42
)

In [None]:
count_images_generator(validation_generator)

# Modeling

In [None]:
num_of_classes = len(letters)
num_of_classes

In [None]:
# Your existing CNN layers
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), input_shape=(150, 150, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())

# Fully connected layers
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(num_of_classes, activation='softmax'))

model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['acc'])


#### Since our data set is small

In [None]:
num_train_spets = len(train_generator)
num_val_spets = len(validation_generator)

print('Number of Train steps:',  num_train_spets)
print('Number of Validation steps:',  num_val_spets)

Why use fit and not fit_generator?

In [None]:
tic = time.perf_counter()

## Adicionar early stopping
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)

history = model.fit(
    train_generator,
    steps_per_epoch = num_train_spets,
    epochs=100,
    validation_data=validation_generator,
    validation_steps=num_val_spets, 
    callbacks=[es]
)

toc = time.perf_counter()

# Prediction

In [None]:
test_generator = test_datagen.flow_from_directory(
    directory=r"./Images_Folder/Test",
    target_size=(150, 150),
    batch_size=20,
    class_mode="categorical",
    #classes=letters,
    shuffle=False,
    seed=42
)

In [None]:
count_images_generator(test_generator)

In [None]:
images_pred, labels_pred = next(test_generator)  # Load a batch of images and labels
plt.imshow(image.array_to_img(images_pred[9]))

In [None]:
pred = model.predict(test_generator)
results = np.argmax(pred,axis=-1)
mapped_results = [mapping_dict.get(item) for item in results]

In [None]:
expected = []
for i in range(len(letters)):
    for j in range(test_letter_count[i]):
        expected.append(letters[i])

## Classification Report

In [None]:
print(classification_report(expected, mapped_results, target_names=letters))

## Confusion Matrix

In [None]:
def draw_confusion_matrix(true, preds):
   conf_matx = confusion_matrix(true, preds)
   sns.heatmap(
      conf_matx, 
      annot=True, 
      annot_kws={"size": 12},
      fmt='g', 
      cbar=False, 
      cmap="viridis",
      xticklabels=letters,
      yticklabels=letters

   )
   plt.show()

In [None]:
draw_confusion_matrix(expected, mapped_results)

## Prediction Analysis

In [None]:
array_prediction=np.array([expected,mapped_results]).T
df_prediction=pd.DataFrame(array_prediction, columns=['Expected','Predicted'])
df_prediction

# Save log
May not save the model due to it's size

In [None]:
# May not save the model due to it's size
save_model_log()

In [None]:
save_model()

# Move back

In [13]:
for letter in letters:
    for dataset in datasets:
        source_dir = os.path.join(main_directory, dataset, letter)
        files = os.listdir(source_dir)
        for file in files:
            dest = os.path.join(main_directory, letter, file)
            shutil.move(os.path.join(source_dir, file), dest)