# Load the librarires

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import cv2
import os

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout

# random seeds for reproducibility
tf.random.set_seed(123)



2023-10-21 21:17:51.325103: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Load Data

In [2]:
folder_path_ripe = './data/images/train/ripe'
folder_path_raw = './data/images/train/raw'

image_files_ripe = [f for f in os.listdir(folder_path_ripe) if f.endswith(('.jpg', '.jpeg', '.png'))]
image_files_raw = [f for f in os.listdir(folder_path_raw) if f.endswith(('.jpg', '.jpeg', '.png'))]

# Loop through the image files and load each image

#RIPE
images_ripe = []
for file_name in image_files_ripe:
    image_path = os.path.join(folder_path_ripe, file_name)
    img = cv2.imread(image_path)

    if img is not None:
        #Convert the BGR image to RGB 
        images_ripe.append(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    else:
        print(f"Failed to load {file_name}")

#RAW
images_raw = []
for file_name in image_files_raw:
    image_path = os.path.join(folder_path_raw, file_name)
    img = cv2.imread(image_path)

    if img is not None:
        #Convert the BGR image to RGB 
        images_raw.append(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    else:
        print(f"Failed to load {file_name}")

# Prep the dataframe and shuffle
### Images -> X; Labels -> y
raw : 0 <br>
ripe : 1

In [13]:
#concat raw and ripe
y = [0 for x in range(len(images_raw))] + [1 for x in range(len(images_ripe))]
X=images_raw+images_ripe

#convert from list to np.array
X=np.array(X)
y=np.array(y)

#Shuffle
X_shuffled, y_shuffled = shuffle(X, y)

#Train Validation split
X_train, X_val, y_train, y_val = train_test_split(X_shuffled, y_shuffled, test_size=0.2, random_state=42, stratify=y)

## Image Dimensions

In [6]:
# Check out the data
print(f'X_train shape: {X_train.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'X_Validation shape: {X_val.shape}')
print(f'y_Validation shape: {y_val.shape}')

X_train shape: (916, 640, 640, 3)
y_train shape: (916,)
X_Validation shape: (230, 640, 640, 3)
y_Validation shape: (230,)


In [7]:
# range of x values
print(f'X range: {X_train.min()}-{X_train.max()}')
# y unique values
print(f'y values: {np.unique(y_train)}')
num_classes = len(np.unique(y_train))
print(f'Number of classes: {num_classes}')

X range: 0-255
y values: [0 1]
Number of classes: 2


In [8]:
# Define input image dimensions
img_rows, img_cols, colours = 640, 640, 3

# Reshape for Keras model types
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, colours)
X_val = X_val.reshape(X_val.shape[0], img_rows, img_cols, colours)

print(f'X_train shape: {X_train.shape}')
print(f'X_val shape: {X_val.shape}')

X_train shape: (916, 640, 640, 3)
X_val shape: (230, 640, 640, 3)


## Normalise: 0-1

In [9]:
# Scale from 0-1 to 0-255
X_train = X_train.astype('float32')
X_val = X_val.astype('float32')
X_train /= 255
X_val /= 255
print(f'X_train range: {X_train.min()}-{X_train.max()}')

X_train range: 0.0-1.0


---
# Model


In [15]:
from tensorflow import keras
from keras.layers import BatchNormalization, Activation

CNN_model = Sequential()

# Layer 1
CNN_model.add(Conv2D(32, kernel_size=(5, 5), padding='same', input_shape=(640, 640, 3)))
CNN_model.add(BatchNormalization())
CNN_model.add(Activation('relu'))
CNN_model.add(MaxPooling2D(pool_size=(2, 2)))
CNN_model.add(Dropout(0.25))

# Layer 2
CNN_model.add(Conv2D(64, kernel_size=(3, 3), padding='same'))
CNN_model.add(BatchNormalization())
CNN_model.add(Activation('relu'))
CNN_model.add(MaxPooling2D(pool_size=(2, 2)))
CNN_model.add(Dropout(0.25))

# Flatten
CNN_model.add(Flatten())

# Dense layers
CNN_model.add(Dense(128))
CNN_model.add(BatchNormalization())
CNN_model.add(Activation('relu'))
CNN_model.add(Dropout(0.5))
CNN_model.add(Dense(64, activation='relu'))
CNN_model.add(Dense(1, activation='sigmoid'))

CNN_model.summary()


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 640, 640, 32)      2432      
                                                                 
 batch_normalization_1 (Batc  (None, 640, 640, 32)     128       
 hNormalization)                                                 
                                                                 
 activation (Activation)     (None, 640, 640, 32)      0         
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 320, 320, 32)     0         
 2D)                                                             
                                                                 
 dropout_2 (Dropout)         (None, 320, 320, 32)      0         
                                                                 
 conv2d_4 (Conv2D)           (None, 320, 320, 64)     

In [16]:
CNN_model.compile(optimizer='adam', 
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])


In [17]:
history = CNN_model.fit(X_train, y_train,
                        batch_size=32,
                        epochs=10,
                        validation_data=(X_val, y_val),
                        shuffle=True)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [19]:
# Evaluating on the validation set
test_loss, test_accuracy = CNN_model.evaluate(X_val, y_val)
print("Validation accuracy: ", test_accuracy)
print("Validation loss: ", test_loss)

Validation accuracy:  0.9826086759567261
Validation loss:  0.04678649082779884


----

## Test Set

In [20]:
folder_path_ripe = './data/images/test/ripe'
folder_path_raw = './data/images/test/raw'

image_files_ripe = [f for f in os.listdir(folder_path_ripe) if f.endswith(('.jpg', '.jpeg', '.png'))]
image_files_raw = [f for f in os.listdir(folder_path_raw) if f.endswith(('.jpg', '.jpeg', '.png'))]

# Loop through the image files and load each image

#RIPE
images_ripe = []
for file_name in image_files_ripe:
    image_path = os.path.join(folder_path_ripe, file_name)
    img = cv2.imread(image_path)

    if img is not None:
        #Convert the BGR image to RGB 
        images_ripe.append(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    else:
        print(f"Failed to load {file_name}")

#RAW
images_raw = []
for file_name in image_files_raw:
    image_path = os.path.join(folder_path_raw, file_name)
    img = cv2.imread(image_path)

    if img is not None:
        #Convert the BGR image to RGB 
        images_raw.append(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    else:
        print(f"Failed to load {file_name}")

In [21]:
#concat raw and ripe
y_test = [0 for x in range(len(images_raw))] + [1 for x in range(len(images_ripe))]
X_test=images_raw+images_ripe

#convert from list to np.array
X_test=np.array(X_test)
y_test=np.array(y_test)

#Shuffle
X_shuffled_test, X_shuffled_test = shuffle(X_test, y_test)

In [24]:
print(f'X_train shape: {X_test.shape}')
print(f'y_train shape: {y_test.shape}')

X_train shape: (231, 640, 640, 3)
y_train shape: (231,)


In [23]:
# Evaluating on the validation set
test_loss, test_accuracy = CNN_model.evaluate(X_test, y_test)
print("Test accuracy: ", test_accuracy)
print("Test loss: ", test_loss)

Test accuracy:  1.0
Test loss:  0.007888706400990486


In [25]:
# Save the entire model
CNN_model.save('CNN_model.h5')

----