<a href="https://colab.research.google.com/github/Brymer-Meneses/Plant-Doctor/blob/main/model_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Prerequisites**

In [69]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, BatchNormalization, Dropout, Flatten, \
                                    GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import tqdm
import shutil
import random
import math
import os 

from os.path import join
from zipfile import ZipFile


In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

# **Download the Datasets**

In [2]:
raw = {
        'raw_main': '/content/raw',
        'raw_ds1': '/content/raw/ds1',
        'raw_ds2': '/content/raw/ds2',
        'raw_ds3' : '/content/raw/ds3',
        'raw_all' : '/content/raw/all'
    }

main = {
        'main' : '/content/datasets',
        'apple' : '/content/datasets/apple',
        'cherry' : '/content/datasets/cherry',
        'citrus' : '/content/datasets/citrus',
        'corn' : '/content/datasets/corn',
        'grape' : '/content/datasets/grape',
        'peach' : '/content/datasets/peach',
        'pepper' : '/content/datasets/pepper',
        'potato' : '/content/datasets/potato',
        'rice': '/content/datasets/rice',
        'strawberry': '/content/datasets/strawberry',
        'tomato': '/content/datasets/tomato'
    }


In [3]:
os.environ['KAGGLE_CONFIG_DIR'] = "/content/drive/MyDrive/External Datasets/Kaggle"


In [4]:
# Make Directories for datasets
for dir in raw:
  try:
    os.mkdir(raw[dir])
  except Exception as e:
    print(e)

for dir in main:
  try:
    os.mkdir(main[dir])
  except Exception as e:
    print(e)



In [None]:
os.chdir(raw['raw_ds1'])
!wget https://data.mendeley.com/public-files/datasets/tywbtsjrjv/files/d5652a28-c1d8-4b76-97f3-72fb80f94efc/file_downloaded
os.rename('/content/raw/ds1/file_downloaded', '/content/raw/ds1/plantvillage-dataset.zip')

os.chdir(raw['raw_ds2'])
!kaggle datasets download -d minhhuy2810/rice-diseases-image-dataset

os.chdir(raw['raw_ds3'])
!wget https://md-datasets-cache-zipfiles-prod.s3.eu-west-1.amazonaws.com/3f83gxmv57-2.zip

In [6]:
# Extract the first dataset

os.chdir('/content/raw/ds1')
zip_ds1_dir ='/content/raw/ds1/plantvillage-dataset.zip'
zip_ds1 = ZipFile(zip_ds1_dir, 'r')
zip_ds1.extractall()

os.remove('/content/raw/ds1/plantvillage-dataset.zip')

# Extract the second dataset

os.chdir('/content/raw/ds2')
zip_ds2_dir = '/content/raw/ds2/rice-diseases-image-dataset.zip'
zip_ds2 = ZipFile(zip_ds2_dir, 'r')

for file in zip_ds2.namelist():
  if file.startswith('LabelledRice/'):
    zip_ds2.extract(file, '/content/raw/ds2')

os.remove('/content/raw/ds2/rice-diseases-image-dataset.zip')

# Extract the third dataset

os.chdir('/content/raw/ds3')
zip_ds3_dir = '/content/raw/ds3/3f83gxmv57-2.zip'
zip_ds3 = ZipFile(zip_ds3_dir, 'r')
zip_ds3.extractall()

zip_ds3_dir1 = '/content/raw/ds3/Citrus Plant Dataset/Citrus.zip'
zip_ds3_1 = ZipFile(zip_ds3_dir1, 'r')
zip_ds3_1.extractall()

In [7]:
ds2_dir = '/content/raw/ds2/LabelledRice/Labelled'
for folder in os.listdir(ds2_dir):

  folder_path = join(ds2_dir, folder)
  folder_new = join(ds2_dir, f'Rice__{folder}')

  os.rename(folder_path, folder_new)


In [8]:
ds3_dir = '/content/raw/ds3/Citrus/Leaves'

for folder in os.listdir(ds3_dir):
  folder_path = join(ds3_dir, folder)
  folder_new = join(ds3_dir, f'Citrus__{folder}')

  os.rename(folder_path, folder_new)

In [9]:
ds1_dir = '/content/raw/ds1/Plant_leave_diseases_dataset_without_augmentation'
ds2_dir = '/content/raw/ds2/LabelledRice/Labelled'
ds3_dir = '/content/raw/ds3/Citrus/Leaves'

ds_dirs = [ds1_dir, ds2_dir, ds3_dir]

for dir in ds_dirs:
  for folder in os.listdir(dir):
    folder_source = join(dir, folder)
    folder_des = join(raw['raw_all'], folder)

    shutil.move(folder_source, folder_des)

In [10]:
# Delete unnecessary folders 

del_paths = [
             '/content/raw/all/Background_without_leaves',
             '/content/raw/all/Blueberry___healthy',
             '/content/raw/all/Orange___Haunglongbing_(Citrus_greening)',
             '/content/raw/all/Raspberry___healthy',
             '/content/raw/all/Soybean___healthy',
             '/content/raw/all/Squash___Powdery_mildew',
          
]

for path in del_paths:
  try: shutil.rmtree(path)
  except Exception as e:
    print(e)

In [13]:
raw_all = raw['raw_all']

for plant in os.listdir(main['main']):
  for folder in os.listdir(raw_all):
    if plant in folder.lower():
      plant_source = join(raw_all, folder)
      plant_des = main[plant]
      try:
        shutil.move(plant_source, plant_des)
      except Exception as e:
        print(e)

In [14]:
shutil.rmtree('/content/raw')

# **Load the dataset in a dataframe**

In [15]:
ds_main = main['main']
dataset = pd.DataFrame()

for plant in os.listdir(ds_main):
  plant_folder = join(ds_main, plant)

  for classification in os.listdir(plant_folder):
    classification_folder = join(plant_folder, classification)

    dataset[classification] = os.listdir(classification_folder)
    


In [77]:
ds_main = main['main']

In [62]:
def gen_df(dir):
  temp = []
  for root, sub_dir, files in os.walk(dir):
    if files == []: 
      continue

    new_files = []
    for file in files:
      file = root + file
      new_files.append(file)

    plant_class = root.split('/')[4] 
    temp_df = pd.DataFrame()
    temp_df['filename'] = new_files
    temp_df['class'] = [plant_class for file in new_files]
    temp.append(temp_df)

  df = pd.concat(temp, axis = 0, join = 'outer')

  # Shuffles the dataframe
  df = df.sample(frac=1).reset_index(drop=True) 
  return df
  
    


In [71]:
def gen_df(dir):
  temp = []
  for root, sub_dir, files in os.walk(dir):
    if files == []: 
      continue

    new_files = []
    for file in files:
      file = root + file
      new_files.append(file)

    plant_class = root.split('/')[4] 
    temp_df = pd.DataFrame()
    temp_df['filename'] = new_files
    temp_df['class'] = [plant_class for file in new_files]
    temp.append(temp_df)

  df = pd.concat(temp, axis = 0, join = 'outer')

  # Shuffles the dataframe
  df = df.sample(frac=1).reset_index(drop=True) 
  return df
  

In [94]:

def split(dir, split = [0.6, 0.2, 0.2],):
  datasets = []
  train_split = split[0]
  val_split = split[1]
  test_split = split[2]
  
  for plant in os.listdir(dir):
    plant_path = join(ds_main, plant)


    temp_df = gen_df(plant_path)

    x, y = temp_df['filename'], temp_df['class']

    x_temp, x_test, y_temp, y_test = train_test_split(
        x, y, train_size = (train_split + val_split), test_size = test_split)
    
    x_train, x_val, y_train, y_val = train_test_split(
        x_temp, y_temp, train_size = (train_split), test_size = val_split)
    
    train_data = pd.DataFrame()
    test_data = pd.DataFrame()
    val_data = pd.DataFrame()

    train_data['filename'] = x_train
    train_data['target'] = y_train

    test_data['filename'] = x_test
    test_data['target'] = y_test
    
    val_data['filename'] = x_val
    val_data['target'] = y_val


    data = [train_data, test_data, val_data]
    datasets.append(data)

  return datasets

In [105]:
for plant in os.listdir(ds_main):
  print(plant)

strawberry
peach
rice
cherry
grape
pepper
corn
potato
tomato
citrus
apple


In [98]:
ds = split(ds_main)

In [106]:
strawberry, peach, rice, cherry, grape, pepper, corn, potato, tomato, citrus, apple = ds

In [116]:
train, test, val = rice
print(len(train))
print(len(test))
print(len(val))

1610
671
537
