In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random

from keras.layers import Input,Flatten,Dense,Conv2D,BatchNormalization,LeakyReLU,Dropout,MaxPooling2D,Activation
from keras.models import Model
from keras.utils import  to_categorical
from keras.optimizers import Adam,SGD
from sklearn.metrics import confusion_matrix
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

In [11]:
# Zip to Unzip the folder

import zipfile
import os

def extract_zip(file_path, output_dir):
    # Ensure the output directory exists
    os.makedirs(output_dir, exist_ok=True)
    
    with zipfile.ZipFile(file_path, 'r') as zip_ref:
        zip_ref.extractall(output_dir)

# path defin
file_path = 'D:/Project_1/cassava-leaf-disease-classification.zip'  # Replace with your .zip file path
output_dir = 'D:/Project_1/cassava'  # Replace with your desired output directory

extract_zip(file_path, output_dir)


In [5]:
train_df = pd.read_csv('D:/Project_1/cassava/train.csv')

In [6]:
train_df.head()

Unnamed: 0,image_id,label
0,1000015157.jpg,0
1,1000201771.jpg,3
2,100042118.jpg,1
3,1000723321.jpg,1
4,1000812911.jpg,3


In [7]:
train_imgs = 'D:/Project_1/cassava/train_images'

diseases = 'D:/Project_1/cassava/diseases'
disease_0 = 'D:/Project_1/cassava/diseases/disease_0'
disease_1 = 'D:/Project_1/cassava/diseases/disease_1'
disease_2 = 'D:/Project_1/cassava/diseases/disease_2'
disease_3 = 'D:/Project_1/cassava/diseases/disease_3'
healthy = 'D:/Project_1/cassava/diseases/healthy'

img_files = os.listdir(train_imgs)

In [8]:
img_files

['1000015157.jpg',
 '1000201771.jpg',
 '100042118.jpg',
 '1000723321.jpg',
 '1000812911.jpg',
 '1000837476.jpg',
 '1000910826.jpg',
 '1001320321.jpg',
 '1001723730.jpg',
 '1001742395.jpg',
 '1001749118.jpg',
 '100204014.jpg',
 '1002088496.jpg',
 '1002255315.jpg',
 '1002394761.jpg',
 '1003218714.jpg',
 '1003298598.jpg',
 '1003442061.jpg',
 '1003888281.jpg',
 '1003987001.jpg',
 '1004105566.jpg',
 '1004163647.jpg',
 '1004389140.jpg',
 '1004672608.jpg',
 '100472565.jpg',
 '1004826518.jpg',
 '1004881261.jpg',
 '1005138819.jpg',
 '1005200906.jpg',
 '100533489.jpg',
 '100560400.jpg',
 '1005695738.jpg',
 '1005739807.jpg',
 '100609661.jpg',
 '1007196516.jpg',
 '1007246985.jpg',
 '100731318.jpg',
 '1007533812.jpg',
 '1007700625.jpg',
 '1007891044.jpg',
 '1008126487.jpg',
 '1008142548.jpg',
 '1008244905.jpg',
 '1008284502.jpg',
 '1008532311.jpg',
 '1009037539.jpg',
 '1009049118.jpg',
 '1009126931.jpg',
 '1009148537.jpg',
 '1009268848.jpg',
 '1009322597.jpg',
 '1009361983.jpg',
 '1009431532.jpg',


In [10]:
import shutil

In [11]:
for i in img_files:
  labels = train_df[train_df['image_id'] == i]['label'].values[0]

  src_file = os.path.join(train_imgs,i)

  if labels == 0:
    dst_file = os.path.join(disease_0,i)
  elif labels == 1:
    dst_file = os.path.join(disease_1,i)
  elif labels == 2:
    dst_file = os.path.join(disease_2,i)
  elif labels == 3:
    dst_file = os.path.join(disease_3,i)
  elif labels == 4:
    dst_file = os.path.join(healthy,i)

#copy from src_file to dst_file
    shutil.copy(src_file,dst_file)

In [12]:
train_df.label = train_df.label.astype(str)

In [13]:
train_generator = ImageDataGenerator(validation_split = 0.2,
                                     preprocessing_function = None,
                                     zoom_range = 0.2,
                                     cval = 0.2,
                                     horizontal_flip = True,
                                     vertical_flip = True,
                                     rotation_range = 5,
                                     fill_mode = 'constant',
                                     shear_range = 0.2,
                                     height_shift_range = 0.2,
                                     width_shift_range = 0.2) \
    .flow_from_dataframe(train_df,
                         directory = train_imgs,
                         subset = "training",
                         x_col = "image_id",
                         y_col = "label",
                         target_size = (256, 256),
                         batch_size = 32,
                         class_mode = "sparse")

Found 17118 validated image filenames belonging to 5 classes.


In [14]:
validation_generator = ImageDataGenerator(validation_split = 0.2,) \
    .flow_from_dataframe(train_df,
                         directory = train_imgs,
                         subset = "validation",
                         x_col = "image_id",
                         y_col = "label",
                         target_size = (256, 256),
                         batch_size = 32,
                         class_mode = "sparse")

Found 4279 validated image filenames belonging to 5 classes.


In [15]:
print(train_generator.class_indices)

{'0': 0, '1': 1, '2': 2, '3': 3, '4': 4}


In [22]:
# Input layer
input_layer = Input(shape=(256, 256, 3))

# ResNet50 base model
base_model = ResNet50(include_top=False,
                      weights="imagenet",
                      input_tensor=input_layer,
                      pooling='avg')  # Use 'avg' pooling to get a 2D feature vector

# Adding custom layers on top of the ResNet50 base model
x = base_model.output
x = BatchNormalization()(x)
x = LeakyReLU()(x)

# Flatten the output
x = Flatten()(x)

# Fully connected layers
x = Dense(256)(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)
x = Dropout(rate=0.5)(x)

x = Dense(5)(x)
output_layer = Activation('softmax')(x)

# Define the model
model = Model(inputs=input_layer, outputs=output_layer)

# Model summary
model.summary()

In [23]:
opt = Adam(learning_rate=0.0005)

model.compile(loss='categorical_crossentropy',optimizer=opt, metrics=['accuracy'])  

In [26]:
# Define checkpoint 
checkpoint = ModelCheckpoint(filepath='D:/Project_1/cassava/saved_models/weights_best.hdf5',
                             monitor='val_accuracy',
                             verbose=1,
                             save_best_only=True,
                             mode='max')

callbacks_list = [checkpoint]

ValueError: The filepath provided must end in `.keras` (Keras model format). Received: filepath=D:/Project_1/cassava/saved_models/weights_best.hdf5