# Setting Google environment and importing libraries

In [1]:
# connecting google drive to google colab
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# importing libraries
import pandas as pd
import numpy as np
import cv2
import os
import datetime

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report

import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, losses, optimizers
from tensorflow.keras.models import Sequential, Model

# Exploratory and data preparation

### Image sizes and file paths

In [3]:
# list of folders with pictures
list_of_apples = os.listdir('drive/MyDrive/Apples/')
list_of_apples

['Apple_B', 'Apple_C', 'Apple_F']

In [4]:
# creating a list of folders
folder_path = 'drive/MyDrive/Apples/'
list_of_folders = [folder_path + apple_kind for apple_kind in list_of_apples]
list_of_folders

['drive/MyDrive/Apples/Apple_B',
 'drive/MyDrive/Apples/Apple_C',
 'drive/MyDrive/Apples/Apple_F']

### Data preparation

In [5]:
# function creates full resized and normalized dataset for specific kind of apple (for given path)
# function assumes that in the folder is only one kind of an apple
# function removes duplicates, but does not split data into training and test
def create_subset_of_class(folder_path:str, fixed_size:tuple):

  picture_data_list = []

  for pic in os.listdir(folder_path):

    pic_path = os.path.join(folder_path, pic)
    img = cv2.imread(pic_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype('float32')
    img = img / 255
    img = cv2.resize(src=img, dsize=fixed_size, interpolation=cv2.INTER_AREA)

    picture_data_list.append(img)
  
  picture_data_arr = np.unique(picture_data_list, axis=0)

  return picture_data_arr

In [6]:
# training and test split
dataset = {}

for folder_number, folder_name in enumerate(list_of_folders):
  picture_array = create_subset_of_class(folder_name, fixed_size=(255, 255))
  dataset[folder_number] = {'class_number': folder_number,
                            'class_name': folder_name.split(sep="/")[-1],
                            'training_data_shape': picture_array[:500].shape,
                            'training_data': picture_array[:500],
                            'test_data_shape': picture_array[500:].shape,
                            'test_data': picture_array[500:]}


In [7]:
# initializie training and test dataset
X_train = dataset.get(0).get("training_data")
X_test = dataset.get(0).get("test_data")
print(f"class 0 training shape: {X_train.shape}, class 0 test shape: {X_test.shape}")

class 0 training shape: (500, 255, 255, 3), class 0 test shape: (126, 255, 255, 3)


In [8]:
# creating final training and test dataset
max_class = max(dataset.keys())+1
for apple_class in range(1, max_class):
  X_train = np.concatenate([X_train, dataset.get(apple_class).get("training_data")], axis=0)
  X_test = np.concatenate([X_test, dataset.get(apple_class).get("test_data")], axis=0)
print(f"full training shape: {X_train.shape}, full test shape: {X_test.shape}")

full training shape: (1500, 255, 255, 3), full test shape: (2157, 255, 255, 3)


In [9]:
# initializie training and test targets
y_train = np.array([dataset.get(0).get("class_name")])
y_train = np.repeat(y_train, dataset.get(0).get("training_data_shape")[0])

y_test = np.array([dataset.get(0).get("class_name")])
y_test = np.repeat(y_test, dataset.get(0).get("test_data_shape")[0])

In [10]:
# creating final training and test targets
max_class = max(dataset.keys())+1
for apple_class in range(1, max_class):
  y_temp_train = np.array([dataset.get(apple_class).get("class_name")])
  y_temp_train = np.repeat(y_temp_train, dataset.get(apple_class).get("training_data_shape")[0])
  y_train = np.concatenate([y_train, y_temp_train], axis=0)

  y_temp_test = np.array([dataset.get(apple_class).get("class_name")])
  y_temp_test = np.repeat(y_temp_test, dataset.get(apple_class).get("test_data_shape")[0])
  y_test = np.concatenate([y_test, y_temp_test], axis=0)

In [11]:
# check quantity of tragets
unique_train, counts_train = np.unique(y_train, return_counts=True)
unique_test, counts_test = np.unique(y_test, return_counts=True)

print("Target for training:", dict(zip(unique_train, counts_train)))
print("Target for test:", dict(zip(unique_test, counts_test)))

Target for training: {'Apple_B': 500, 'Apple_C': 500, 'Apple_F': 500}
Target for test: {'Apple_B': 126, 'Apple_C': 502, 'Apple_F': 1529}


In [12]:
# encoding y data
y_train_encoded = OneHotEncoder().fit_transform(y_train.reshape(-1, 1)).toarray()
y_train_encoded = y_train_encoded.astype(np.float32)

y_test_encoded = OneHotEncoder().fit_transform(y_test.reshape(-1, 1)).toarray()
y_test_encoded = y_test_encoded.astype(np.float32)

print("y_train.shape: ", y_train_encoded.shape)
print("y_test.shape: ", y_test_encoded.shape)

y_train.shape:  (1500, 3)
y_test.shape:  (2157, 3)


# Model

In [13]:
modelCNN = keras.models.load_model('/content/drive/MyDrive/modelCNN.h5')

In [14]:
y_true = y_test_encoded.argmax(axis=1)

In [15]:
y_pred = modelCNN.predict(X_test).argmax(axis=1)



In [16]:
print(confusion_matrix(y_true, y_pred))

[[ 126    0    0]
 [  96  405    1]
 [   0    0 1529]]


In [17]:
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.57      1.00      0.72       126
           1       1.00      0.81      0.89       502
           2       1.00      1.00      1.00      1529

    accuracy                           0.96      2157
   macro avg       0.86      0.94      0.87      2157
weighted avg       0.97      0.96      0.96      2157

