In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2 as cv #data processing
from sklearn.model_selection import train_test_split #splits data into testing and training datasets
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras import models, layers #used to make CNN model and provide layers
from tensorflow.keras.optimizers import RMSprop #provides root mean squared propagation
import tensorflow.keras.losses
import tensorflow.keras.metrics
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt
import random
from tensorflow.keras.preprocessing.image import ImageDataGenerator
%matplotlib inline

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os 
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

The code heavily influenced by Arthur Arnx. His tutorial for preparing your own dataset, along with the original source code, can be found here: [Towards Data Science](https://towardsdatascience.com/all-the-steps-to-build-your-first-image-classifier-with-code-cf244b015799)

None of this would have beena ccomplished without the guidance of Dr. Tomitaka.

The architecture of the CNN is based on the LeNet-5 architecture. 

Prepare data:

In [None]:


#DATADIRs to switch between non-augmented data, goldnps & goldnps2(where poorest quality photos were removed), and data augmented with AI-generated data, midjaunps

#DATADIR = "/kaggle/input/goldnps2/"
#DATADIR = "/kaggle/input/goldnps/"
DATADIR = "/kaggle/input/midjaunps/"

# Categories which CNN will classify by
CATEGORIES = ["0", "10", "20", "30", "40", "50", "60", "70", "80", "90", "100"]

# The size of the images that your neural network will use
IMG_SIZE = 100

#Arrays to hold features and labels
FEATURES = []
AUGMENTED_FEATURES = []
LABELS = []
AUGMENTED_LABELS = []

#FOR DATA AUGMENTATION Make an image data generator that will rotate, horizontally flip, tilt & zoom images
gen = ImageDataGenerator(rotation_range = 10,
                        width_shift_range = 0.1,
                        height_shift_range = 0.1,
                        shear_range =0.15,
                        zoom_range = 0.1,
                        horizontal_flip =True)


for c in CATEGORIES:  # access all folders
    dataPath = os.path.join(DATADIR, c)  # create a path for each folder
    classNum = CATEGORIES.index(c)
    for img in os.listdir(dataPath):   # access all images in each folder
        #preprocessing
        #read image, in color
        img = cv.imread(os.path.join(dataPath, img), cv.IMREAD_COLOR)
        #Ensure every image is the same size
        img = cv.resize(img,(IMG_SIZE, IMG_SIZE) )
        #convert from BGR to YCrCb color space
        img = cv.cvtColor(img, cv.COLOR_BGR2YCrCb)
        
        #RGB performed terrible compared to YCrCb
        #img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
        #normalize
        img = img.astype(np.float32)/255
        #add image & label to arrays
        FEATURES.append(img)
        LABELS.append(classNum)
        
        #Generate Augmented --- to break
        #img = img.reshape((1,) + img.shape)
         # Apply augmentation to generate additional images
        #for batch in gen.flow(img, batch_size=1):
        
            # Incrementally append augmented images to the arrays
            #aug_img = batch[0].astype(np.float32)
            
            #AUGMENTED_FEATURES.append(aug_img)
            #AUGMENTED_LABELS.append(classNum)
            
            # Control the number of augmented images per original image
            #if len(AUGMENTED_FEATURES) % 6 == 0:  # Change 6 to control augmentation factor
                #break  # Stop after generating 6 augmented images per original image

#FOR UNAUGMENTED DATA
X = np.array(FEATURES)
y = np.array(LABELS)

#FOR AUGMENTED DATA                
#put pictures into numpy array        
#X_OG = np.array(FEATURES) #original image data
#y_OG = np.array(LABELS)   #original labels
#X_augmented = np.array(AUGMENTED_FEATURES)#augmented images
#y_augmented = np.array(AUGMENTED_LABELS) #augmented images labels
#X =np.concatenate((X_OG, X_augmented))
#y =np.concatenate((y_OG, y_augmented))

#split data into testing(20%) and training(80%) datasets
FEATURES_train, FEATURES_test, LABELS_train, LABELS_test = train_test_split(X, y, train_size = .8)  




Data Augmentation

In [None]:
#Will be used to plot generated images
def plotImages(images_arr):
    fig, axes = plt.subplots(1, 10, figsize=(20,20))
    axes= axes.flatten()
    for img, ax in zip(images_arr, axes):
        ax.imshow(img)
        ax.axes('off')
    plt.tight_layout()
    plt.show()
    


Convolutional Neural Network Model based on early LeNet-5 architecture. 

In [None]:
#Model based on LeNet architecture
model = models.Sequential([
    layers.Conv2D(6, (5, 5), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    layers.AveragePooling2D(pool_size= (2,2), strides= 2),
    layers.Conv2D(16, (5, 5), activation='relu'),
    layers.AveragePooling2D(pool_size= (2, 2), strides = 2),
    layers.Flatten(),
    layers.Dense(400, activation='relu'),
    layers.Dense(180, activation='relu'),
    layers.Dense(len(CATEGORIES), activation='softmax')
])

# set Root Mean Square Propogation's learning rate and decay factor
rmsprop = RMSprop(learning_rate=0.001, rho=0.9)

#Compile model and set optimize, loss function and metrics
model.compile(optimizer= rmsprop,
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])

#print a summary of the model
model.summary()

epochs = 15

#train model
history=model.fit(FEATURES_train, LABELS_train, epochs=epochs, validation_split=.1, batch_size=10, shuffle = True)

#chart test accuracy/loss compared to validation accuracy/loss to catch things like overfitting
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')

plt.show()

# evaluate the model with the test dataset
test_loss, test_acc= model.evaluate(FEATURES_test, LABELS_test)
print(f'Test accuracy: {test_acc * 100:.2f}%')



Evaluate model;

In [None]:



#Get true labels & predictions for classification report/confusion matrix
pred = model.predict(FEATURES_test, batch_size = 10)
pred = np.argmax(pred, axis = -1)

#labels = np.argmax(LABELS_test, axis = -1)
labels = LABELS_test

#Supposed to make a report showing three metrics F1,precision 
print("Classification Report:")
print(classification_report(labels, pred))

# Generate confusion matrix
#confusion_matrix = confusion_matrix(labels, pred)

#print("Confusion Matrix:")
#print(conf_matrix)

