In [None]:
# import required libraries
import os
import random
from shutil import copyfile
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive

In [None]:
# mount google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# set the path to the directory containing the training images
train_data_dir = '/content/drive/MyDrive/train_images'

# set the base directory where you want to create the train, validation, and test directories
base_dir = '/content/drive/MyDrive/Plant Disease Classification/data_splits'

# Shuffle Starts here

In [None]:
for subdir in subdir_len:
    for key, value in subdir.items():
      print(f'{key} : {value}')

In [None]:
# create the train, validation, and test directories
try:
    os.makedirs(base_dir + '/train')
    os.makedirs(base_dir + '/validation')
    os.makedirs(base_dir + '/test')
except OSError:
    pass

In [None]:
# set the size of the validation and test sets as a fraction of the total dataset
train_size = 0.6
validation_size = 0.2
test_size = 0.2

In [None]:
# create a dictionary to store the subdirectories of the training data, with each key being the name of a plant
# and each value being a list of the corresponding subdirectories (disease names)
train_subdirs = {}
for subdir in os.listdir(train_data_dir):
    if os.path.isdir(os.path.join(train_data_dir, subdir)):
        plant_name = subdir.split('___')[0]
        if plant_name not in train_subdirs:
            train_subdirs[plant_name] = []
        train_subdirs[plant_name].append(subdir)

In [None]:
total_image_size = 0
for subdir in os.listdir(train_data_dir):
    total_image_size += len(os.listdir(train_data_dir + "/" + subdir))
    print(f'{subdir}: {len(os.listdir(train_data_dir + "/" + subdir))} images')

print(total_image_size)

In [None]:
train_subdirs

In [None]:
for folder in ['train', 'validation', 'test']:
    for diseases in train_subdirs.values():
        for disease in diseases:
            try:
                os.makedirs(base_dir + '/train/'+ disease)
                os.makedirs(base_dir + '/validation/'+ disease)
                os.makedirs(base_dir + '/test/'+ disease)
            except OSError:
                pass

In [None]:
total_cp = 0
index = 1
log = []

In [None]:
# iterate over the subdirectories of the training data for each plant and move images to the train, validation, and test directories
for plant_name, subdirs in train_subdirs.items():
    for subdir in subdirs:
        print(f'{index}-Copying contents of {subdir} directory')
        subdir_path = os.path.join(train_data_dir, subdir)
        directory = os.listdir(subdir_path)
        file_list = []
        for filename in directory:
            file_size = os.path.getsize(subdir_path + '/' + filename)
            if file_size > 0:
                file_list.append(filename)

        # shuffle the list of files
        random.shuffle(file_list)

        # calculate the number of images in the validation and test sets based on the specified sizes
        train_len = int(len(file_list) * train_size)
        val_len = train_len + int(len(file_list) * validation_size)
        print(f'train: {train_len}, val: {val_len-train_len}')

        # split the list of files into train, validation, and test sets
        train_files = file_list[:train_len]
        validation_files = file_list[train_len:val_len]
        test_files = file_list[val_len:]

        # create the subdirectories in the train, validation, and test directories
        for folder in ['train', 'validation', 'test']:
            folder_path = os.path.join(base_dir, folder, subdir)
            try:
                # use PyDrive to create a folder in Google Drive
                gauth = GoogleAuth()
                gauth.LocalWebserverAuth()  # create local webserver and auto handles authentication
                drive = GoogleDrive(gauth)
                folder_metadata = {'title': subdir, 'parents': [{'kind': 'drive#fileLink', 'id': drive.ListFile({'q': "title='%s' and trashed=false and mimeType contains 'application/vnd.google-apps.folder'" % folder}).GetList()[0]['id']}]}
                folder = drive.CreateFile(folder_metadata)
                folder.Upload()
            except:
                pass

        is_fully_cp = []
        for folder in ['train', 'validation', 'test']:
            folder_path = os.path.join(base_dir, folder, subdir)
            dst_len = len(os.listdir(folder_path))
            src_len = len(os.listdir(subdir_path))
            if dst_len == src_len:
              is_fully_cp.append(True)
            else:
              is_fully_cp.append(False) 

        train_file_path = os.path.join(base_dir, 'train', subdir)
        val_file_path = os.path.join(base_dir, 'validation', subdir)
        test_file_path = os.path.join(base_dir, 'test', subdir)

        # copy the images to the appropriate directories
        if !is_fully_cp[0]:
            for file in train_files:
                src_path = os.path.join(subdir_path, file)
                dst_path = os.path.join(base_dir, 'train', subdir, file)
                if dst_path in os.listdir(train_file_path)
                    continue
                copyfile(src_path,dst_path)
                total_cp +=1

        if !is_fully_cp[1]:
            for file in validation_files:
                src_path = os.path.join(subdir_path, file)
                dst_path = os.path.join(base_dir, 'validation', subdir, file)
                if dst_path in os.listdir(val_file_path)
                    continue
                copyfile(src_path, dst_path)
                total_cp +=1
        if !is_fully_cp[2]:
            for file in test_files:
                src_path = os.path.join(subdir_path, file)
                dst_path = os.path.join(base_dir, 'test', subdir, file)
                if dst_path in os.listdir(test_file_path)
                    continue
                copyfile(src_path, dst_path)
                total_cp +=1

        print(f'Finished copying contents of {subdir} directory')
        index += 1

print('Data split completed successfully.')

In [None]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pickle

In [None]:
# Define the train, validation, and test base directories
train_dir = '/content/drive/My Drive/Plant Disease Classification/data_splits/train/'
val_dir = '/content/drive/My Drive/Plant Disease Classification/data_splits/validation/'
test_dir = '/content/drive/My Drive/Plant Disease Classification/data_splits/test/'

In [None]:
# Instantiate the ImageDataGenerator for train, validation, and test
train_datagen = ImageDataGenerator(rescale=1.0/255.0,
                                     rotation_range=50,
                                     width_shift_range=0.25,
                                     height_shift_range=0.25,
                                     shear_range=0.2,
                                     zoom_range=0.2,
                                     horizontal_flip=True,
                                     fill_mode='nearest')
  
val_datagen = ImageDataGenerator(rescale=1.0/255.0,)

test_datagen = ImageDataGenerator(rescale=1.0/255.0,)

In [None]:
train_gen = train_datagen.flow_from_directory(directory=train_dir,
                                              batch_size=32,
                                              class_mode='categorical',
                                              target_size=(256, 256))

val_gen = val_datagen.flow_from_directory(directory=val_dir, 
                                                 batch_size=32, 
                                                 class_mode='categorical',
                                                 target_size=(256, 256))

test_gen = test_datagen.flow_from_directory(directory=test_dir,
                                            batch_size=32,
                                            class_mode='categorical',
                                            target_size=(256, 256))

In [None]:
import pandas as pd

df = pd.DataFrame(log)

In [None]:
df.to_csv('log.csv')