<h1> Choose an image to test the model </h1>

In [6]:

import json
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from PIL import Image
import pandas as pd
import numpy as np
from tkinter import filedialog
from tkinter import Tk
import os

models_paths = [
    # r"C:\Users\Dell\Desktop\Bachelor\image-classification\misc\runned\1800 dataset\16\model img_size_16.keras",
    # r"C:\Users\Dell\Desktop\Bachelor\image-classification\misc\runned\1800 dataset\32\model img_size_32.keras",
    # r"C:\Users\Dell\Desktop\Bachelor\image-classification\misc\runned\1800 dataset\64\model img_size_64.keras",
    # r"C:\Users\Dell\Desktop\Bachelor\image-classification\misc\runned\1800 dataset\128\model img_size_128.keras",
    # r"C:\Users\Dell\Desktop\Bachelor\image-classification\misc\runned\1800 dataset\256\model img_size_256.keras",
    # r"C:\Users\Dell\Desktop\Bachelor\image-classification\misc\runned\1800 dataset\300\model img_size_300.keras",

    # r"C:\Users\Dell\Desktop\Bachelor\image-classification\misc\runned\2500 dataset\16\model img_size_16.keras",
    # r"C:\Users\Dell\Desktop\Bachelor\image-classification\misc\runned\2500 dataset\32\model img_size_32.keras",
    # r"C:\Users\Dell\Desktop\Bachelor\image-classification\misc\runned\2500 dataset\64\model img_size_64.keras",
    # r"C:\Users\Dell\Desktop\Bachelor\image-classification\misc\runned\2500 dataset\128\model img_size_128.keras",
    # r"C:\Users\Dell\Desktop\Bachelor\image-classification\misc\runned\2500 dataset\256\model img_size_256.keras",
    # r"C:\Users\Dell\Desktop\Bachelor\image-classification\misc\runned\2500 dataset\300\model img_size_300.keras",
                r"C:\Users\Dell\Desktop\Bachelor\image-classification\misc\runned\visual clustering\baa taa thaa\model_5616_images_img_size_128.keras"
                ]

# Load the class labels
model_info = r"C:\Users\Dell\Desktop\Bachelor\image-classification\misc\runned\visual clustering\baa taa thaa\model_info.json"
with open(model_info, 'r', encoding='utf-8') as f:
    # this line was to create the json : json.dump({'index_to_label': index_to_label, 'model_name': model_name}, f, ensure_ascii=False, indent=4)
    # we only want the index_to_label
    class_labels = json.load(f)['index_to_label']

# Open a file chooser and get the image path
print("Please select an image to classify...")
root = Tk()
root.attributes('-topmost', True) # bring the window to the front
root.withdraw() # we don't want a full GUI, so keep the root window from appearing
root.update()
image_path = filedialog.askopenfilename() # show an "Open" dialog box and return the path to the selected file
# open the image in windows using pillow
img = Image.open(image_path)
img.show()

root.destroy()

table_headers = [model_path.split("\\")[-3] + " , " + model_path.split("\\")[-2] for model_path in models_paths]

predictions_dict = {label: {} for label in class_labels.values()}
printed_text = ""

# Use the model to predict the class of the image
for model_path in models_paths:
    trained_data_size = 1800
   
    with open(model_info, 'r', encoding='utf-8') as f:
        img_size = int(json.load(f)['image_size'])
    current_model_name = str(trained_data_size) + " ," + str(img_size)
    print("\nloading model that was trained on dataset : ", str(trained_data_size) , ", with image size : ", str(img_size) , " ...")
   
    model = tf.keras.models.load_model(model_path)
    img = image.load_img(image_path, target_size=(img_size, img_size))
    # Preprocess the image
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = img.astype('float32')/255 

    model_prediction = model.predict(img)
    highest_probability_class_index = np.argmax(model_prediction)
    sorted_predictions = sorted(enumerate(model_prediction[0]), key=lambda x: x[1], reverse=True)    
    printed_text += f"Predictions for model trained on dataset of size {trained_data_size} and image size {img_size}:\n"
    for i, probability in sorted_predictions:
        entity_name = class_labels[str(i)]
        predictions_dict[entity_name][current_model_name] = probability
        printed_text += f'({class_labels[str(i)]}) :  {probability:.4f}'
        if(i == highest_probability_class_index):
            printed_text += ' <---------------------------------------------------'   
        printed_text += '\n'

predictions_df = pd.DataFrame(predictions_dict).transpose()
predictions_table = predictions_df.style.background_gradient(cmap='tab20_r').set_table_styles(
    [dict(selector="th", props=[("text-align", "center")])]
)
display(predictions_table)
print(printed_text)

Please select an image to classify...


TypeError: can only concatenate str (not "int") to str

<h1> Choose a Dataset to test the model </h1>

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score
import pandas as pd
import shutil
import os
import numpy as np

def copy_models_to_kaggle(model_paths):
    os.makedirs('/kaggle/working/models', exist_ok=True)
    new_paths = []
    for model_path in model_paths:
        model_trained_data_size = model_path.split("/")[3]
        model_trained_img_size = model_path.split("/")[5]
        new_model_path = '/kaggle/working/models/' + model_trained_data_size + "_dataset" + "_imgsize_" + model_trained_img_size + "_model.keras"
        new_paths.append(new_model_path)
        if os.path.exists(new_model_path):
            # raise Exception(f"model already exists at {new_model_path}")
            print(f"model already exists at {new_model_path}")
        shutil.copy(model_path, new_model_path)
    return new_paths
 

batch_size = 32
test_dataset_paths = [
    '/kaggle/input/sample-1800',
    '/kaggle/input/sample-2500',
    '/kaggle/input/clean-2500-letters-only',
    '/kaggle/input/clean-1800-letters-only'
]
models_paths = [
        '/kaggle/input/1800-model/keras/16/1/model img_size_16.keras',
        '/kaggle/input/1800-model/keras/32/1/model img_size_32.keras',
        '/kaggle/input/1800-model/keras/64/1/model img_size_64.keras',
        '/kaggle/input/1800-model/keras/128/1/model img_size_128.keras',
        '/kaggle/input/1800-model/keras/256/1/model img_size_256.keras',
        '/kaggle/input/1800-model/keras/300/1/model img_size_300.keras',

        '/kaggle/input/2500-model/keras/16/1/model img_size_16.keras',
        '/kaggle/input/2500-model/keras/32/1/model img_size_32.keras',
        '/kaggle/input/2500-model/keras/64/1/model img_size_64.keras',
        '/kaggle/input/2500-model/keras/128/1/model img_size_128.keras',
        '/kaggle/input/2500-model/keras/256/1/model img_size_256.keras',
        '/kaggle/input/2500-model/keras/300/1/model img_size_300.keras'
]
print("copying models to kaggle to be able to work with WRITE ACESS ...")
copied_models_paths = copy_models_to_kaggle(models_paths)
print(f"loading models in RAM...")
model_path_to_model = {}
for model_path in copied_models_paths:
    print(f"loading model in : {model_path} ...")
    model = load_model(model_path)
    model_path_to_model[model_path] = model

all_datasets_accuracies = {}
for dataset_path in test_dataset_paths:
    dataset_name  = dataset_path.split("/")[-1]
    print(f"Testing dataset : {dataset_path} ...")
    dataset_test_accuracies = {}
    for model_path in copied_models_paths:
        # /kaggle/working/models/2500-model_dataset_imgsize_300_model.keras
        trained_data_size = int(model_path.split("/")[4].split("-")[0])
        print(f"trained data size : {trained_data_size}")
        img_size = int(model_path.split("/")[4].split("_")[-2])
        print(f"image size : {img_size}")
        current_model_name =  str(trained_data_size) + " ," + str(img_size) + "_model"
        print(f"using : {current_model_name} ...")
        model = model_path_to_model[model_path]

        print(f"Testing model : {current_model_name} ...")
        test_datagen = ImageDataGenerator(rescale=1./255)
        test_generator = test_datagen.flow_from_directory(
            dataset_path,
            target_size=(img_size, img_size),
            batch_size=batch_size,
            class_mode='categorical',
            shuffle=False)
        print()

        # Predict the output
        steps = int(np.ceil(test_generator.samples / test_generator.batch_size))
        predictions = model.predict(test_generator, steps=steps)
        predictions = predictions.argmax(axis=-1)                    
        true_classes = test_generator.classes
        accuracy = round(accuracy_score(true_classes, predictions), 3)
        dataset_test_accuracies[current_model_name] = accuracy
        print(f"Accuracy of model {current_model_name} on dataset {dataset_name} : {accuracy}")
    all_datasets_accuracies[dataset_name] = dataset_test_accuracies

print(f"all datasets accuracies : {all_datasets_accuracies}")
accuracies_df = pd.DataFrame(all_datasets_accuracies).transpose()
accuracies_df = accuracies_df.style.background_gradient(cmap='tab20_r').set_table_styles([dict(selector="th", props=[("text-align", "center")])])
display(accuracies_df)


<h1> Move images from one folder to another</h1>


In [None]:
import os
import shutil

def move_files_with_rename(src_folder, dst_folder):
    # Iterate over all files in the source folder
    for filename in os.listdir(src_folder):
        src_file = os.path.join(src_folder, filename)
        dst_file = os.path.join(dst_folder, filename)

        # If a file with the same name exists in the destination folder, rename the file being moved
        if os.path.exists(dst_file):
            base, extension = os.path.splitext(filename)
            filename = f"{base}_changed{extension}"
            dst_file = os.path.join(dst_folder, filename)

        # Move the file
        shutil.move(src_file, dst_file)

# Use the function
src_folder = "C:/Users/Dell/Desktop/test/500/test"
dst_folder = "C:/Users/Dell/Desktop/test/500/train"
move_files_with_rename(src_folder, dst_folder)

<h1> Divide a dataset into train & test folders </h1>

In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Define the source directory and the target directories
source_dir = './data set/all/'
train_dir = './data set/train/'
test_dir = './data set/test/'

# Create the target directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Get a list of all the dog and cat image filenames
dog_images = [i for i in os.listdir(source_dir) if 'dog' in i]
cat_images = [i for i in os.listdir(source_dir) if 'cat' in i]

# Split the filenames into training and testing sets
train_dogs, test_dogs = train_test_split(dog_images, test_size=0.25)
train_cats, test_cats = train_test_split(cat_images, test_size=0.25)

# Function to move files
def move_files(files, target_dir):
    for file in files:
        shutil.move(source_dir + file, target_dir + file)

# Move the corresponding files into the appropriate directories
move_files(train_dogs, train_dir)
move_files(test_dogs, test_dir)
move_files(train_cats, train_dir)
move_files(test_cats, test_dir)


<h1> Join folders containing different letter positions into one folder </h1>

In [None]:
import os
import shutil

root_dir = r"C:\Users\Dell\Desktop\datasets\1500 (also contains paragraphs)\1500 (also contains paragraphs)\isolated_alphabets_per_alphabet"
# dest is beside the root directory
dest_dir = os.path.join(os.path.dirname(root_dir), "joined")

# Get a list of all subdirectories in the root directory
subdirs = [d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
print("started")
if(os.path.exists(dest_dir)):
    print("deleting existing root directory...")
    shutil.rmtree(dest_dir)
for subdir in subdirs:
    print("woring in ", subdir)
    if '_' in subdir: # because some subdirectories (numbers) don't have an underscore in their naems and dont need joining
        letter_name = subdir.split('_')[0]
        new_dir = os.path.join(dest_dir, letter_name)
        os.makedirs(new_dir, exist_ok=True)
        files = [f for f in os.listdir(os.path.join(root_dir, subdir))]
        for file in files:
            shutil.copy(os.path.join(root_dir, subdir, file), os.path.join(new_dir, file))
    else:
        shutil.copytree(os.path.join(root_dir, subdir), os.path.join(dest_dir, subdir))
print("done")        

<h1> same as above code but copy folders only not the files they contain </h1>

In [None]:
import os
import shutil

root_dir = r"C:\Users\Dell\Desktop\datasets\1800 - position categorized\1800 - position categorized\HMBD-v1-master\Dataset"
# dest is beside the root directory
dest_dir = os.path.join(os.path.dirname(root_dir), "joined")

# Get a list of all subdirectories in the root directory
subdirs = [d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
print("started")
if(os.path.exists(dest_dir)):
    print("deleting existing root directory...")
    shutil.rmtree(dest_dir)
for subdir in subdirs:
    print("woring in ", subdir)
    if '_' in subdir: # because some subdirectories (numbers) don't have an underscore in their naems and dont need joining
        letter_name = subdir.split('_')[0]
    else :
        letter_name = subdir    
    new_dir = os.path.join(dest_dir, letter_name)
    os.makedirs(new_dir, exist_ok=True)
    shutil.copytree(os.path.join(root_dir, subdir), os.path.join(new_dir, subdir))
print("done")        

<h1> move all files from directory to its parent directory , for all subdirectories in the dataset </h1>

In [None]:
# <h1> extract all files from directory to its parent directory , for all subdirectories in the dataset </h1>

import os
import shutil
def join_files_in_subdirs(dest,current_dir):
    if(not os.path.isdir(current_dir)):
        shutil.copy(current_dir, dest)
    else:
        for subdir in os.listdir(current_dir):
            join_files_in_subdirs(dest, os.path.join(current_dir, subdir))
        contains_files_only = all(os.path.isfile(os.path.join(current_dir, child_dir)) for child_dir in os.listdir(current_dir))
        if contains_files_only:
           print ("done with ", current_dir)
           

    
root_dir = r"C:\Users\Dell\Desktop\datasets\1800 - position categorized\joined"
dest_dir = os.path.join(os.path.dirname(root_dir), "joined & merged")
if(os.path.exists(dest_dir)):
    print("deleting existing root directory...")
    shutil.rmtree(dest_dir)
os.makedirs(dest_dir, exist_ok=True)

for subdir in os.listdir(root_dir):
    merging_dest = os.path.join(dest_dir, subdir)
    os.makedirs(merging_dest, exist_ok=True)
    join_files_in_subdirs(merging_dest, os.path.join(root_dir, subdir))
print("done merging ")
# open the parent directory of the root directory upon finishing
os.startfile(os.path.dirname(dest_dir))

    

<h1> Clear kaggle output folder </h1>

In [None]:
import os
import shutil

def clear_directory(dir_path):
    for filename in os.listdir(dir_path):
        file_path = os.path.join(dir_path, filename)
        if os.path.isfile(file_path) or os.path.islink(file_path):
            os.unlink(file_path)  # remove file or symlink
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)  # remove directory

# Usage:
dir_path = "/kaggle/working/"
clear_directory(dir_path)

<h1> Create a 1 % sample of the dataset in another folder , just to test if the code runs correctly </h1>

In [None]:
import random
import os
import shutil


test_dataset_paths = r"C:\Users\Dell\Desktop\Bachelor\datasets\26 letters\1800\1800 letters only"
dest_dir = r"C:\Users\Dell\Desktop\Sample_dataset_1800"

# Get a list of all subdirectories in the root directory
print("started")
if(os.path.exists(dest_dir)):
    print("deleting existing root directory...")
    shutil.rmtree(dest_dir)
subdirs = [d for d in os.listdir(test_dataset_paths)]
percent_to_copy = 0.01
for subdir in subdirs:
    src_entity_dir = os.path.join(test_dataset_paths, subdir)
    dest_entity_dir = os.path.join(dest_dir, subdir)
    os.makedirs(dest_entity_dir, exist_ok=True)
    files = [f for f in os.listdir(src_entity_dir)]
    num_files = len(files) 
    num_files_to_copy = int(num_files * percent_to_copy)
    # shuffle the files
    random.shuffle(files)
    for i in range(num_files_to_copy):
        shutil.copy(os.path.join(src_entity_dir, files[i]), dest_entity_dir)
    print("done with ", subdir , " , copied only", num_files_to_copy, " files")
print("done")


  

<h1> Merge 2 datasets </h1>

In [None]:
# merge 2 datasets into one ,

import os
import shutil

first_dataset_path = r"C:\Users\Dell\Desktop\Bachelor\datasets\26 letters\1800\1800 letters only"
second_dataset_path = r"C:\Users\Dell\Desktop\Bachelor\datasets\26 letters\2500\2500 letters only"
dest_dir = r"C:\Users\Dell\Desktop\Bachelor\datasets\26 letters\Two datasets merged"

# make a set of all subfodlers names in the first dataset
first_dataset_entity_names = set(os.listdir(first_dataset_path))
second_dataset_entity_names = set(os.listdir(second_dataset_path))
if first_dataset_entity_names != second_dataset_entity_names:
    print("datasets don't have the same entities")
    print("")

for subdir in os.listdir(first_dataset_path):
    current_entity_dir = os.path.join(first_dataset_path, subdir)
