# Image Recognition and Classification AI

In [None]:
# Required libraries:
# keras, tensorflow, pillow, pandas, matplotlib, tabulate, pip install PyQt5

from keras.models import load_model
from PIL import Image, ImageOps
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from datetime import datetime
import shutil

## User specified directories

In [None]:
### Specify directories ###

# Load the model
# model_dir = 'Animal_AI_model/animal_keras_model.h5'
model_dir = '3yp/trained_classifier'

# Path to image folder
image_dir = '3yp/classifier_testing'

# Class labels txt file
# labels_file_dir = 'Animal_AI_model/labels.txt'
labels_file_dir = '3yp/classifier.txt'

# CONF Threshold
conf_threshold = 0.9

# Auto Sort images
auto_sort_images = True

##################

In [None]:
model = load_model(model_dir)



## Data preparation:
- Creates results dataframe
- Reads labels file

In [None]:
def create_df_load_classes():
    
    # Create pandas dataframe to store data
    results_df = pd.DataFrame(columns=['Image_Name','Class_1','CONF_class_1','Class_2','CONF_class_2','Class3','CONF_class_3'])
    
    # Read text file into array
    with open(labels_file_dir) as f:
        lines = f.readlines()    
        
    # Clean each array entry by removing \n and number
    final_clss_list = []

    for entry in lines:
        # Remove \n
        entry = entry.rstrip()

        # Remove index number
        entry = entry.split(" ", 1)

        # Append to new list
        final_clss_list.append(entry[1])
        
    return results_df, final_clss_list

## Runs one inference & get results

- Prepares image for inference
- Runs the inference
- Then retrieves top 3 classes, if above certain threshold
- print class name and CONF level

In [None]:
def run_one_inference(results_df, final_clss_list, img, curr_time):
    
    #### Prepares for inference ####
    # Load image
    image = Image.open(os.path.join(image_dir, img))

    # Creates array of the right shape to feed into the keras model
    # The 'length' or number of images you can put into the array is
    # determined by the first position in the shape tuple, in this case 1.
    data = np.ndarray(shape=(1, 224, 224, 3), dtype=np.float32)

    #resize the image to a 224x224 with the same strategy as in TM2:
    #resizing the image to be at least 224x224 and then cropping from the center
    size = (224, 224)
    image = ImageOps.fit(image, size, Image.ANTIALIAS)

    #turn the image into a numpy array
    image_array = np.asarray(image)
    # Normalize the image
    normalized_image_array = (image_array.astype(np.float32) / 127.0) - 1
    # Load the image into the array
    data[0] = normalized_image_array
    
    
    
    #### run the inference & get scores ####
    prediction = model.predict(data)

    # Convert to list
    prediction = prediction[0].tolist()
#     print(prediction)



    #### Prints top 3 classes and CONF level (if classes =>3) ####
    if len(prediction) >= 3:
        num_classes = 3
    else:
        num_classes = len(prediction)

    # Sort by CONF level
    prediction_sorted = sorted(prediction, reverse=True)
#     print(prediction_sorted)
    
    
    
    #### Match CONF level to class name & add entry to DF ####
    new_df_entry = []

    # Get filename
    new_df_entry.append(img)

    for i in range(num_classes):
        curr_val = prediction_sorted[i]

        # Only proceed to append if prediction score exceeds threshold
        if curr_val >= conf_threshold:
            idx = prediction.index(curr_val)
            new_df_entry.append(final_clss_list[idx])
            new_df_entry.append(round(curr_val, 4))
            
            
    # Auto move images into respective folders if asked to
    if auto_sort_images:
        
        # Create categorised folder to copy images into
        categorised_dir = os.path.join(image_dir, f'Categorised_Images_{curr_time}')
        if not os.path.exists(categorised_dir):
            os.makedirs(categorised_dir)
                
        # Retrieve class name
        curr_val2 = prediction_sorted[0]
        if curr_val2 >= conf_threshold:
            main_class_name = final_clss_list[prediction.index(curr_val2)]
        else:
            main_class_name = 'Not Sure'
            
        # Copy images over  
        copy_one_image(img, main_class_name, categorised_dir)

        
    # Pad list
    df_row_length = 7
    if len(new_df_entry)<df_row_length:
        new_df_entry += [''] * (df_row_length - len(new_df_entry))

    # Add to DF
    updated_df = pd.Series(new_df_entry, index = results_df.columns)
    results_df = results_df.append(updated_df, ignore_index=True)
    
    return results_df

## Moves images to correct folders function

In [None]:
def copy_one_image(img, main_class_name, categorised_dir):
    
    # Create new folder if does not exist
    target_path = os.path.join(categorised_dir, main_class_name)
    if not os.path.exists(target_path):
        os.makedirs(target_path)
        
    # Copy image over to right folder
    orginal_dir = os.path.join(image_dir, img)
    shutil.copy2(orginal_dir, target_path)

## Runs inference for all

In [None]:
def run_inference(results_df, final_clss_list):
    
    # Get list of all images
    img_list = [name for name in os.listdir(image_dir) 
            if os.path.isfile(os.path.join(image_dir, name)) and name.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff'))]
    
    print(f"Analysing {len(img_list)} suitable images... Pls wait\n")
    
    
    # Create output folder
    curr_time = datetime.now().strftime("%m_%d-%H.%M.%S")
    results_dir = os.path.join(image_dir, f"Analysis_Results_{curr_time}")
    if not os.path.exists(results_dir):
        os.makedirs(results_dir)
    
    # Repeats inference for all images in the folder
    for j, img in enumerate(img_list):
        results_df = run_one_inference(results_df, final_clss_list, img, curr_time)
        if j%5==0:
            print(f"{j}/{len(img_list)} images analysed...")
        elif j==len(img_list)-1:
            print(f"{j+1}/{len(img_list)} images analysed...")
    
    # Save to CSV
    results_df.to_csv(f'{results_dir}/results.csv', encoding='utf-8')
    print("\n>> Analysis complete! Generating plots...")
    
    # Moving images progress
    if auto_sort_images:
        print("\n>> Sorting images into the correct folders... See the 'Categorised_Images' folder in your images folder for results.")
    
    return results_df, results_dir

## Generates plots

In [None]:
def generate_plots(results_df, results_dir):
    
    # Counts frequency of each occurence
    results_df['Class_1'] = results_df['Class_1'].replace('', 'Not identified')
    freq_table = results_df['Class_1'].value_counts()

    # Plots bar graph and saves it
    fig1 = plt.figure(figsize=(10, 6), dpi=80)
    freq_table.plot.bar()
    plt.xticks(rotation=30, ha='right', wrap=True)
    plt.title('Bar Chart - Number of images per type', fontsize=18)
    plt.xlabel('Image Type', fontsize=16)
    plt.ylabel('Frequency', fontsize=16)
    plt.subplots_adjust(bottom=0.3)
    plt.savefig(f"{results_dir}/bar_chart.jpg")
    plt.close()

    # Plots pie chart and saves it
    fig2 = plt.figure(figsize=(10, 6), dpi=80)
    freq_table.plot.pie(autopct='%1.2f%%')
    plt.title('Pie Chart - Type Compositon', fontsize=18)
    plt.subplots_adjust(bottom=0.1, left=0.2, right=0.8)
    plt.savefig(f"{results_dir}/pie_chart.jpg")
    plt.close()

## Main Function

In [None]:
def main():
    results_df, final_clss_list = create_df_load_classes()
    results_df, results_dir = run_inference(results_df, final_clss_list)
    generate_plots(results_df, results_dir)
    
    print("\n>> All done. Results CSV and Plots saved in the 'Analysis_Results' folder with your test images.")
    
    # Some instructions
    print("\n-----------------------------------------\n")
    print("**HOW TO UNDERSTAND THE RESULTS CSV:**")
    print("Class_1 refers to most confident type of class identified, with the associated confidence score in CONF_class_1 column. The next confident class is Class_2 and so on.") 
    print("If certain columns are blank, it means the confidence level is below the threshold you set. Decrease the threshold to get more data, if needed.")

In [None]:
# main()

Analysing 27 suitable images... Pls wait

0/27 images analysed...
5/27 images analysed...
10/27 images analysed...
15/27 images analysed...
20/27 images analysed...
25/27 images analysed...
27/27 images analysed...

Analysis complete! Generating plots...

Sorting images into the correct folders... See the 'Categorised_Images' folder in your images folder for results.

All done. Results CSV and Plots saved in the 'Analysis_Results' folder with your test images.

-----------------------------------------

**HOW TO UNDERSTAND THE RESULTS CSV:**
Class_1 refers to most confident type of class identified, with the associated confidence score in CONF_class_1 column. The next confident class is Class_2 and so on.
If certain columns are blank, it means the confidence level is below the threshold you set. Decrease the threshold to get more data, if needed.
