In [6]:
# imports
import os
import csv
import torchvision.io as io
import torchvision.transforms as T

## Create Training Set

In [12]:
cur_dir = os.getcwd()
train_src_dir = os.path.join(cur_dir, 'Plant_leave_diseases_dataset_with_augmentation')
train_dst_dir = os.path.join(os.path.join(os.path.join(cur_dir, 'load_dataset'), 'dataset'), 'train')

In [13]:
train_set = []
img_index = 0
img_size = (256, 256) # minimum size for ImageNet is 224x224, but default dataset is mostly 256x256; might tweak
transform = T.Resize(size=img_size)

# iterate through all the subfolders (where each subfolder corresponds to species + healthy/disease)
# 
for subdir in os.listdir(train_src_dir):
    if subdir == 'Background_without_leaves':
        # ignore this case
        pass
    else:
        # iterate through all the files within the subfolder
        superdir = os.path.join(train_src_dir, subdir)
        for src_filename in os.listdir(superdir):
            
            # destination filename
            dst_filename = 'img{}.jpg'.format(img_index)
            
            # print(src_filename)
            img = io.read_image(os.path.join(superdir, src_filename), mode=io.ImageReadMode.RGB)
            img = transform(img)

            # print(img.dtype)

            io.write_jpeg(img, os.path.join(train_dst_dir, dst_filename), quality=90)

            if subdir.endswith('healthy'):
                # 1 to indicate it is healthy
                train_set.append([dst_filename, 1])
            else:
                # 0 to indicate it is diseased
                train_set.append([dst_filename, 0])
            img_index += 1

In [18]:
# write train labels to csv

# field names 
csv_fields = ['filename', 'isHealthy'] 
    
# name of csv file 
csv_train_filename = os.path.join(os.path.join(os.path.join(cur_dir, 'load_dataset'), 'dataset'), 'train_labels.csv')
    
# writing to csv file 
with open(csv_train_filename, 'w') as csvfile: 
    # creating a csv writer object 
    csvwriter = csv.writer(csvfile) 
        
    # writing the fields 
    csvwriter.writerow(csv_fields) 
        
    # writing the data rows 
    csvwriter.writerows(train_set)

## Create Test Set

In [14]:
test_src_dirs = [os.path.join(cur_dir, 'A Database of Leaf Images_ Practice towards Plant Conservation with Plant Pathology'),os.path.join(cur_dir, 'PlantaeK_ A leaf database of native plants of Jammu and Kashmir')]
test_dst_dir = os.path.join(os.path.join(os.path.join(cur_dir, 'load_dataset'), 'dataset'), 'test')

In [16]:
test_set = []
img_index = 0
img_size = (256, 256) # minimum size for ImageNet is 224x224, but default dataset is mostly 256x256; might tweak
transform = T.Resize(size=img_size)

# iterate through all the subfolders (where each subfolder corresponds to species + healthy/disease)
for test_src_dir in test_src_dirs:
    
    # each subdir corresponds to a species
    for species_sub_dir in os.listdir(test_src_dir):
        # iterate through all the files within the subfolder
        species_dir = os.path.join(test_src_dir, species_sub_dir)

        # each subsubdir corresponds to diseased or healthy
        for status_sub_dir in os.listdir(species_dir):
            species_status_dir = os.path.join(species_dir, status_sub_dir)

            for src_filename in os.listdir(species_status_dir):
                
                # destination filename
                dst_filename = 'img{}.jpg'.format(img_index)
                
                # print(src_filename)
                img = io.read_image(os.path.join(species_status_dir, src_filename), mode=io.ImageReadMode.RGB)
                img = transform(img)

                # print(img.shape)

                io.write_jpeg(img, os.path.join(test_dst_dir, dst_filename), quality=90)

                if status_sub_dir.lower() == 'healthy':
                    # 1 to indicate it is healthy
                    test_set.append([dst_filename, 1])
                elif status_sub_dir.lower() == 'diseased':
                    # 0 to indicate it is diseased
                    test_set.append([dst_filename, 0])
                img_index += 1

In [19]:
# write train labels to csv

# field names 
csv_fields = ['filename', 'isHealthy'] 
    
# name of csv file 
csv_test_filename = os.path.join(os.path.join(os.path.join(cur_dir, 'load_dataset'), 'dataset'), 'test_labels.csv')
    
# writing to csv file 
with open(csv_test_filename, 'w') as csvfile: 
    # creating a csv writer object 
    csvwriter = csv.writer(csvfile) 
        
    # writing the fields 
    csvwriter.writerow(csv_fields) 
        
    # writing the data rows 
    csvwriter.writerows(test_set)