Add imports

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os
import scipy
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from tensorflow import keras
from ast import literal_eval
from tensorflow.keras import layers
from tensorflow.keras.applications import inception_v3

Setup gpu

In [2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

1 Physical GPUs, 1 Logical GPUs


2022-05-25 20:14:34.826532: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-25 20:14:34.853961: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-25 20:14:34.854128: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-25 20:14:34.875601: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

Set paths to data

In [3]:
TRAIN_CSV = '../data/train.csv'
TRAIN_IMAGES_DIR = '../data/train_images/'

Load csv using pandas

In [4]:
traindf = pd.read_csv(TRAIN_CSV, dtype=str)
traindf['labels'] = traindf['labels'].str.split()
traindf
traindf['labels'].value_counts()

[scab]                                 4826
[healthy]                              4624
[frog_eye_leaf_spot]                   3181
[rust]                                 1860
[complex]                              1602
[powdery_mildew]                       1184
[scab, frog_eye_leaf_spot]              686
[scab, frog_eye_leaf_spot, complex]     200
[frog_eye_leaf_spot, complex]           165
[rust, frog_eye_leaf_spot]              120
[rust, complex]                          97
[powdery_mildew, complex]                87
Name: labels, dtype: int64

In [5]:
traindf

Unnamed: 0,image,labels
0,800113bb65efe69e.jpg,[healthy]
1,8002cb321f8bfcdf.jpg,"[scab, frog_eye_leaf_spot, complex]"
2,80070f7fb5e2ccaa.jpg,[scab]
3,80077517781fb94f.jpg,[scab]
4,800cbf0ff87721f8.jpg,[complex]
...,...,...
18627,fffb900a92289a33.jpg,[healthy]
18628,fffc488fa4c0e80c.jpg,[scab]
18629,fffc94e092a59086.jpg,[rust]
18630,fffe105cf6808292.jpg,"[scab, frog_eye_leaf_spot]"


Load images using ImageDataGenerator, copied from [StackOverflow](https://stackoverflow.com/questions/59464409/loading-images-in-keras-for-cnn-from-directory-but-label-in-csv-file)

In [6]:
shape = (224, 224, 3)
datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1./255.,
                                                             validation_split=0.2,
                                                             height_shift_range=0.2,
                                                             width_shift_range=0.2,
                                                             rotation_range=40,
                                                             zoom_range=0.2)
train_generator = datagen.flow_from_dataframe(dataframe = traindf,
                                              directory = TRAIN_IMAGES_DIR,
                                              featurewise_std_normalization = True,
                                              x_col = 'image',
                                              y_col = 'labels',
                                              subset = 'training',
                                              batch_size = 16,
                                              seed = 1,
                                              shuffle = True,
                                              class_mode = 'categorical',
                                              target_size = shape[:2]
                                             )

validation_generator = datagen.flow_from_dataframe(dataframe = traindf,
                                              directory = TRAIN_IMAGES_DIR,
                                              featurewise_std_normalization = True,
                                              x_col = 'image',
                                              y_col = 'labels',
                                              subset = 'validation',
                                              batch_size = 16,
                                              seed = 1,
                                              shuffle = True,
                                              class_mode = 'categorical',
                                              target_size = shape[:2]
                                             )

Found 14906 validated image filenames belonging to 6 classes.
Found 3726 validated image filenames belonging to 6 classes.


In [7]:
train_images, train_labels = next(iter(train_generator))
val_images, val_labels = next(iter(validation_generator))

In [8]:
classes_amount = 6

### Third model:

In [15]:
from tensorflow.keras.applications.vgg16 import VGG16

base_model = VGG16(include_top=False, weights='imagenet', input_shape=shape)

for layer in base_model.layers:
    layer.trainable = False

model3 = keras.Sequential([
    # imput_layer,
    base_model,
    layers.Flatten(),
    layers.Dense(1024, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(classes_amount, kernel_initializer = 'uniform', activation = "softmax")
])

model3.compile(loss = 'categorical_crossentropy',
               optimizer = tf.keras.optimizers.RMSprop(lr=0.0001),
               metrics = ['accuracy'])

model3.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 7, 7, 512)         14714688  
                                                                 
 flatten_3 (Flatten)         (None, 25088)             0         
                                                                 
 dense_6 (Dense)             (None, 1024)              25691136  
                                                                 
 dropout_3 (Dropout)         (None, 1024)              0         
                                                                 
 dense_7 (Dense)             (None, 6)                 6150      
                                                                 
Total params: 40,411,974
Trainable params: 25,697,286
Non-trainable params: 14,714,688
_________________________________________________________________


  super(RMSprop, self).__init__(name, **kwargs)


In [17]:
history3 = model3.fit(train_generator,
                     epochs = 5,
                      steps_per_epoch=200,
                    validation_data = validation_generator,
                     verbose = 1)

model3.save(f"model/m3")

Epoch 1/5
Epoch 2/5

KeyboardInterrupt: 

In [None]:
print("Evaluate")

result = model3.evaluate(validation_generator)
dict(zip(model3.metrics_names, result))

In [None]:
model_predictions3 = model3.predict(val_images)

y_true = [np.argmax(row) for row in val_labels]
y_pred2 = [np.argmax(row) for row in model_predictions3]

matrix = ConfusionMatrixDisplay(confusion_matrix(y_true, y_pred2))

matrix.plot()