In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import os
import shutil
import random
import pathlib
import random
from tensorflow.keras.preprocessing.image import ImageDataGenerator

We'll create a function that will:

List all the class folders and Create 'train' and 'test' directories

For each class folder:
- List all files
- Shuffle the files
- Split the files into train and test sets (e.g., 80% train, 20% test)
- Copy the files to the respective train and test directories, maintaining the class structure

In [3]:
def split_data(source_dir, train_dir, test_dir, split_ratio=0.8):
    # Create train and test directories if they don't exist
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    # Get all class folders
    class_folders = [f for f in os.listdir(source_dir) if os.path.isdir(os.path.join(source_dir, f))]

    for class_folder in class_folders:
        # Create class folders in train and test directories
        os.makedirs(os.path.join(train_dir, class_folder), exist_ok=True)
        os.makedirs(os.path.join(test_dir, class_folder), exist_ok=True)

        # Get all files in the class folder
        class_path = os.path.join(source_dir, class_folder)
        files = [f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]

        # Shuffle the files
        random.shuffle(files)

        # Split the files
        split_index = int(len(files) * split_ratio)
        train_files = files[:split_index]
        test_files = files[split_index:]

        # Copy files to train directory
        for file in train_files:
            src = os.path.join(class_path, file)
            dst = os.path.join(train_dir, class_folder, file)
            shutil.copy2(src, dst)

        # Copy files to test directory
        for file in test_files:
            src = os.path.join(class_path, file)
            dst = os.path.join(test_dir, class_folder, file)
            shutil.copy2(src, dst)

    print("Data split complete!")

In [4]:
source_directory = "/content/drive/MyDrive/AMHCD_64"
train_directory = "/content/drive/MyDrive/AMHCD_64/training"
test_directory = "/content/drive/MyDrive/AMHCD_64/testing"

split_data(source_directory, train_directory, test_directory)

Data split complete!


In [6]:
def remove_folder(path):
    # Check if the folder exists
    if os.path.exists(path):
        # Remove the folder and all its contents
        shutil.rmtree(path)
        print(f"Folder '{path}' has been removed successfully.")
    else:
        print(f"Folder '{path}' does not exist.")

# Removing the ipynb checkpoint folder

remove_folder('/content/drive/MyDrive/AMHCD_64/training/test')
remove_folder('/content/drive/MyDrive/AMHCD_64/testing/test')
remove_folder('/content/drive/MyDrive/AMHCD_64/training/testing')
remove_folder('/content/drive/MyDrive/AMHCD_64/testing/testing')
remove_folder('/content/drive/MyDrive/AMHCD_64/training/train')
remove_folder('/content/drive/MyDrive/AMHCD_64/testing/train')
remove_folder('/content/drive/MyDrive/AMHCD_64/training/training')
remove_folder('/content/drive/MyDrive/AMHCD_64/testing/training')


Folder '/content/drive/MyDrive/AMHCD_64/training/test' has been removed successfully.
Folder '/content/drive/MyDrive/AMHCD_64/testing/test' has been removed successfully.
Folder '/content/drive/MyDrive/AMHCD_64/training/testing' has been removed successfully.
Folder '/content/drive/MyDrive/AMHCD_64/testing/testing' has been removed successfully.
Folder '/content/drive/MyDrive/AMHCD_64/training/train' has been removed successfully.
Folder '/content/drive/MyDrive/AMHCD_64/testing/train' has been removed successfully.
Folder '/content/drive/MyDrive/AMHCD_64/training/training' has been removed successfully.
Folder '/content/drive/MyDrive/AMHCD_64/testing/training' has been removed successfully.


In [7]:
# Check all the classes

data_dir = pathlib.Path('/content/drive/MyDrive/AMHCD_64/training')
class_names = np.array(sorted([item.name for item in data_dir.glob('*')]))
print(class_names)

['ya' 'yab' 'yach' 'yad' 'yadd' 'yae' 'yaf' 'yag' 'yagh' 'yagw' 'yah'
 'yahh' 'yaj' 'yak' 'yakw' 'yal' 'yam' 'yan' 'yaq' 'yar' 'yarr' 'yas'
 'yass' 'yat' 'yatt' 'yaw' 'yax' 'yay' 'yaz' 'yazz' 'yey' 'yi' 'yu']


In [8]:
# Create augmented data generator instance

train_datagen_augmented = ImageDataGenerator(rescale=1/255.,
                                             rotation_range=20,
                                             width_shift_range=0.2,
                                             height_shift_range=0.2,
                                             zoom_range=0.2,
                                             horizontal_flip=True)

# Create batches of augmented data for training

train_data_augmented = train_datagen_augmented.flow_from_directory(train_directory,
                                                                  target_size=(224, 224),
                                                                  batch_size=32,
                                                                  class_mode='categorical')

# Create batches of augmented data for testing

test_datagen = ImageDataGenerator(rescale=1/255.)
test_data = test_datagen.flow_from_directory(test_directory,
                                              target_size=(224, 224),
                                              batch_size=32,
                                              class_mode='categorical')

Found 20608 images belonging to 33 classes.
Found 5152 images belonging to 33 classes.


In [9]:
# Create our first model

model_1 = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(10, 3, activation='relu', input_shape=(224, 224, 3)),
    tf.keras.layers.Conv2D(10, 3, activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Conv2D(10, 3, activation='relu'),
    tf.keras.layers.Conv2D(10, 3, activation='relu'),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(33, activation='softmax')
])

model_1.compile(loss = tf.keras.losses.CategoricalCrossentropy(),
               optimizer = tf.keras.optimizers.Adam(),
               metrics = ['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
# Checking the model

model_1.summary()

In [11]:
# We fit our model

history_1 = model_1.fit(train_data_augmented,
                        epochs=10,
                        steps_per_epoch=len(train_data_augmented))

Epoch 1/10


  self._warn_if_super_not_called()


[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m330s[0m 496ms/step - accuracy: 0.1471 - loss: 3.1977
Epoch 2/10
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 3/10


  self.gen.throw(typ, value, traceback)


[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m329s[0m 505ms/step - accuracy: 0.3903 - loss: 2.2205
Epoch 4/10
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 5/10
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m376s[0m 497ms/step - accuracy: 0.5455 - loss: 1.6287
Epoch 6/10
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 7/10
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m376s[0m 489ms/step - accuracy: 0.5953 - loss: 1.4159
Epoch 8/10
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 9/10
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m317s[0m 480ms/step - accuracy: 0.6370 - loss: 1.2708
Epoch 10/10
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10us/step - accuracy: 0.0000e+00 - loss: 0.0000e+0

We have an error we will need to investigate: One of two epoches is skipped

It's not a concerning error, we managed to train, but it's something in the design of the code

In [24]:
test_results = model_1.evaluate(test_data)

[1m161/161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 138ms/step - accuracy: 0.9284 - loss: 0.2911


92% accuracy on the test test, very good !