In [1]:
import os, shutil
import pandas as pd
import tensorflow as tf

orig_dataset_dir = 'C:/Users/fabio/Documents/PlantPathology/images'
base_dir = 'C:/Users/fabio/Documents/PlantPathology/images_cnn'
os.makedirs(base_dir, exist_ok=True)

In [2]:
#configuring GPU
physical_device = tf.config.experimental.list_physical_devices('GPU')
print("Num of GPU's available: ",len(physical_device))
tf.config.experimental.set_memory_growth(physical_device[0],True)

Num of GPU's available:  1


In [3]:
images_csv = pd.read_csv('C:/Users/fabio/Documents/PlantPathology/train.csv')
one_condition = ['scab', 'healthy', 'frog_eye_leaf_spot', 'rust', 'complex', 'powdery_mildew']
images_csv_one_condition = images_csv[images_csv['labels'].isin(one_condition)].reset_index(drop=True)
images_csv_one_condition['labels'].value_counts()

scab                  4826
healthy               4624
frog_eye_leaf_spot    3181
rust                  1860
complex               1602
powdery_mildew        1184
Name: labels, dtype: int64

In [4]:
images_dictionary = {'scab': [], 'healthy': [], 'frog_eye_leaf_spot': [], 'rust': [], 'complex': [], 'powdery_mildew': []}

for index in range(len(images_csv_one_condition)):
    label = images_csv_one_condition.loc[index, 'labels']
    images_dictionary[label].append(images_csv_one_condition.loc[index, 'image'])

In [5]:
images_dictionary['scab'][1]

'80077517781fb94f.jpg'

In [6]:
#CREATE TRAIN, VALIDATION AND TEST FOLDERS
train_dir = os.path.join(base_dir,'train')
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir,'validation')
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir,'test')
os.mkdir(test_dir)

In [7]:
####Training folders
#CREATE SUBFOLDERS FOR EACH CLASS
train_complex_dir = os.path.join(train_dir,'complex')
os.mkdir(train_complex_dir)
train_frog_eye_leaf_spot_dir = os.path.join(train_dir,'frog_eye_leaf_spot')
os.mkdir(train_frog_eye_leaf_spot_dir)
train_powdery_mildew_dir = os.path.join(train_dir,'powdery_mildew')
os.mkdir(train_powdery_mildew_dir)
train_rust_dir = os.path.join(train_dir,'rust')
os.mkdir(train_rust_dir)
train_healthy_dir = os.path.join(train_dir,'healthy')
os.mkdir(train_healthy_dir)
train_scab_dir = os.path.join(train_dir,'scab')
os.mkdir(train_scab_dir)

####Validation folders
#CREATE SUBFOLDERS FOR EACH CLASS
validation_complex_dir = os.path.join(validation_dir,'complex')
os.mkdir(validation_complex_dir)
validation_frog_eye_leaf_spot_dir = os.path.join(validation_dir,'frog_eye_leaf_spot')
os.mkdir(validation_frog_eye_leaf_spot_dir)
validation_powdery_mildew_dir = os.path.join(validation_dir,'powdery_mildew')
os.mkdir(validation_powdery_mildew_dir)
validation_rust_dir = os.path.join(validation_dir,'rust')
os.mkdir(validation_rust_dir)
validation_healthy_dir = os.path.join(validation_dir,'healthy')
os.mkdir(validation_healthy_dir)
validation_scab_dir = os.path.join(validation_dir,'scab')
os.mkdir(validation_scab_dir)

####Test folders
#CREATE SUBFOLDERS FOR EACH CLASS
test_complex_dir = os.path.join(test_dir,'complex')
os.mkdir(test_complex_dir)
test_frog_eye_leaf_spot_dir = os.path.join(test_dir,'frog_eye_leaf_spot')
os.mkdir(test_frog_eye_leaf_spot_dir)
test_powdery_mildew_dir = os.path.join(test_dir,'powdery_mildew')
os.mkdir(test_powdery_mildew_dir)
test_rust_dir = os.path.join(test_dir,'rust')
os.mkdir(test_rust_dir)
test_healthy_dir = os.path.join(test_dir,'healthy')
os.mkdir(test_healthy_dir)
test_scab_dir = os.path.join(test_dir,'scab')
os.mkdir(test_scab_dir)

In [8]:
#Function to join images to training folder
def image_join(disease,limit,source_path,dest_path):
    """ Function to copy images
    Args
    disease: [string], the name of the disease
    limit: [int], number of images to add
    source_path: [path] ,path source of disease images, i.e. Complex
    dest_path: [path], path destiny of disease images, i.e. train_complex_dir
    return: None
    """
    for image_scab in images_dictionary[disease][:limit]:
        source = os.path.join(source_path, image_scab).replace("\\","/")
        dest = os.path.join(dest_path, image_scab).replace("\\","/")
        shutil.copyfile(source, dest)

In [9]:
#TRAIN FOLDER
#Complex training
image_join('complex',1281,orig_dataset_dir,train_complex_dir)
#frog_eye_leaf_spot training
image_join("frog_eye_leaf_spot",1280,orig_dataset_dir,train_frog_eye_leaf_spot_dir)
#Powdery_Mildew training
image_join("powdery_mildew",947,orig_dataset_dir,train_powdery_mildew_dir)
#For Rust training
image_join("rust",1488,orig_dataset_dir,train_rust_dir)
#For Healthy training
image_join("healthy",1280,orig_dataset_dir,train_healthy_dir)
#For scab training
image_join('scab',1280,orig_dataset_dir,train_scab_dir)

In [10]:
def image_join_with_limits(disease,lower_limit,upper_limit,source_path,dest_path):
    """ Function to copy images
    Args
    disease: [string], the name of the disease
    lower_limit: [int], number of training examples
    lower_limit: [int], number of validation examples
    source_path: [path] ,path source of disease images, i.e. Complex
    dest_path: [path], path destiny of disease images, i.e. train_complex_dir
    return: None
    """
    for image_scab in images_dictionary[disease][lower_limit:lower_limit+upper_limit]:
        source = os.path.join(source_path, image_scab).replace("\\","/")
        dest = os.path.join(dest_path, image_scab).replace("\\","/")
        shutil.copyfile(source, dest)

In [11]:
#VALIDATION FOLDER
#Complex Validation
#different function, including [1120:1120+320]
image_join_with_limits("complex",1281,160,orig_dataset_dir,validation_complex_dir)
#frog_eye_leaf_spot training
image_join_with_limits("frog_eye_leaf_spot",1280,160,orig_dataset_dir,validation_frog_eye_leaf_spot_dir)
#Powdery_Mildew validation
image_join_with_limits("powdery_mildew",947,118,orig_dataset_dir,validation_powdery_mildew_dir)
#For Rust validation
image_join_with_limits("rust",1488,186,orig_dataset_dir,validation_rust_dir)
#For Healthy validation
image_join_with_limits("healthy",1280,160,orig_dataset_dir,validation_healthy_dir)
#For scab validation
image_join_with_limits('scab',1280,160,orig_dataset_dir,validation_scab_dir)

In [12]:
#TEST FOLDER
#Complex test
#different function, including [400+280:400+280+80]
image_join_with_limits("complex",1281+160,160,orig_dataset_dir,test_complex_dir)
#frog_eye_leaf_spot test
image_join_with_limits("frog_eye_leaf_spot",1280+160,160,orig_dataset_dir,test_frog_eye_leaf_spot_dir)
#Powdery_Mildew test
image_join_with_limits("powdery_mildew",947+118,118,orig_dataset_dir,test_powdery_mildew_dir)
#For Rust test
image_join_with_limits("rust",1488+186,186,orig_dataset_dir,test_rust_dir)
#For Healthy test
image_join_with_limits("healthy",1280+160,160,orig_dataset_dir,test_healthy_dir)
#For scab test
image_join_with_limits('scab',1280+160,160,orig_dataset_dir,test_scab_dir)

In [17]:
dir_train_complex = len(os.listdir(base_dir+"/train/complex"))
dir_val_complex = len(os.listdir(base_dir+"/validation/complex"))
dir_test_complex = len(os.listdir(base_dir+"/test/complex"))
assert dir_train_complex == 1281
assert dir_val_complex == 160
assert dir_test_complex == 160