In [None]:
import numpy as np
import pandas as pd
import os
from sklearn.metrics import confusion_matrix
import seaborn as sn; sn.set(font_scale=1.4)
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
import cv2
import tensorflow as tf
from tqdm import tqdm

#Constants
IMAGE_SIZE = (150, 150)

In [None]:
# Resize images
images = []
files_names = []
orig_images=[]
output = []

for file in tqdm(os.listdir("images")):
    #print(file)
    # Get the path name of the image
    img_path = os.path.join("images", file)

    # Open and resize the img
    image = cv2.imread(img_path)

    orig_images.append(image) # the original file
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, IMAGE_SIZE)
    images.append(image)
    files_names.append(file)

labels = np.array(files_names)

In [None]:
df_assets=pd.DataFrame({"Filename":files_names, "images": images})
df_assets

In [None]:
# Add category data
# Import CSV file
df_label = pd.read_csv("labels.csv")
df_assets= df_assets.merge(df_label, how='left', left_on="Filename", right_on="Filename")
df_assets

In [None]:
# Quick breakdown of the categories of images
df_assets.groupby(["Species","Label"]).size()

In [None]:
#Split the dataframe into two random samples (80% and 20%) for training and testing
train, test = train_test_split(df_assets, test_size=0.2)
train.shape

## Steps are:

# Build the model,
1. Compile the model,
2. Train / fit the data to the model,
3. Evaluate the model on the testing set,

In [None]:

import os
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

import tensorflow as tf
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3),data_format="channels_last", activation = 'relu', input_shape = (150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2,2,data_format="channels_last"),
    tf.keras.layers.Conv2D(32, (3, 3),data_format="channels_last", activation = 'relu'),
    tf.keras.layers.MaxPooling2D(2,2,data_format="channels_last"),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation=tf.nn.relu),
    tf.keras.layers.Dense(9, activation=tf.nn.softmax)
])

In [None]:
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
images=np.array(list(train.loc[:,"images"]), dtype = 'float32')
labels = np.array(list(train.loc[:,"Label"]), dtype = 'int32')

model_fit = model.fit(images,labels , batch_size=128, epochs=20, validation_split = 0.2)

In [None]:
def plot_accuracy_loss(history):
    """
        Plot the accuracy and the loss during the training of the nn.
    """
    fig = plt.figure(figsize=(10,5))

    # Plot accuracy
    plt.subplot(221)
    plt.plot(history.history['accuracy'],'bo--', label = "acc")
    plt.plot(history.history['val_accuracy'], 'ro--', label = "val_acc")
    plt.title("train_acc vs val_acc")
    plt.ylabel("accuracy")
    plt.xlabel("epochs")
    plt.legend()

    # Plot loss function
    plt.subplot(222)
    plt.plot(history.history['loss'],'bo--', label = "loss")
    plt.plot(history.history['val_loss'], 'ro--', label = "val_loss")
    plt.title("train_loss vs val_loss")
    plt.ylabel("loss")
    plt.xlabel("epochs")

    plt.legend()
    plt.show()
plot_accuracy_loss(model_fit)

# Steps to do:
1. Run the model against the test data (continue with https://www.kaggle.com/code/vincee/intel-image-classification-cnn-keras/notebook)
2. Confuision Matrix (measure of fit) (see https://www.w3schools.com/python/python_ml_confusion_matrix.asp)
3. Display results