# Plant Seedlings Classification
Determine the species of a seedling from an image

# Create Validation Set from Training Set

In [1]:
import os, shutil
from random import shuffle

In [2]:
# path to original dataset
original_dataset_dir = 'data/original/train'
# directory to store the smaller dataset
base_dir = 'data/curated' 

os.mkdir(base_dir)

In [3]:
# make training, validation and test set directories
train_dir = os.path.join(base_dir, 'train')
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, 'validation')
os.mkdir(validation_dir)

## Create directories for each class

In [4]:
classes = ['Black-grass', 'Charlock', 'Cleavers', 'Common Chickweed', 'Common wheat', 'Fat Hen', 'Loose Silky-bent',
           'Maize', 'Scentless Mayweed', 'Shepherds Purse', 'Small-flowered Cranesbill', 'Sugar beet']

In [5]:
for directory in [train_dir, validation_dir]:
    for clas in classes:
        clas_dir = os.path.join(directory, clas)
        os.mkdir(clas_dir)

## Copy images

Training set - 70%

Validation set - 30%

### Get number of images in each class

In [6]:
for clas in classes:
    print(clas, ": ", len(os.listdir(os.path.join(original_dataset_dir, clas))))

Black-grass :  263
Charlock :  390
Cleavers :  287
Common Chickweed :  611
Common wheat :  221
Fat Hen :  475
Loose Silky-bent :  654
Maize :  221
Scentless Mayweed :  516
Shepherds Purse :  231
Small-flowered Cranesbill :  496
Sugar beet :  385


In [7]:
rate = 0.3
for clas in classes:
    clas_dir = os.path.join(directory, clas)
    original_directory = os.path.join(original_dataset_dir, clas)
    

    # list of all the images in the current class directory
    files = os.listdir(original_directory)
    num_of_val_images = int(rate * len(files))
    # randomly shuffle the list
    shuffle(files)
    
    val_files = files[:num_of_val_images]
    trn_files = files[num_of_val_images:]
    
    for index, fnames in enumerate([trn_files, val_files]):
        folder = train_dir if index == 0 else validation_dir
        destination_dir = os.path.join(folder, clas)
        
        for fname in fnames:
            src = os.path.join(original_directory, fname)
            dst = os.path.join(destination_dir, fname)
            shutil.copyfile(src, dst)  
            
        if index == 0:
            print(clas, "--> Training:  ", len(os.listdir(destination_dir)))
        else: 
            print(clas, "--> Validation:  ", len(os.listdir(destination_dir)))

Black-grass --> Training:   185
Black-grass --> Validation:   78
Charlock --> Training:   273
Charlock --> Validation:   117
Cleavers --> Training:   201
Cleavers --> Validation:   86
Common Chickweed --> Training:   428
Common Chickweed --> Validation:   183
Common wheat --> Training:   155
Common wheat --> Validation:   66
Fat Hen --> Training:   333
Fat Hen --> Validation:   142
Loose Silky-bent --> Training:   458
Loose Silky-bent --> Validation:   196
Maize --> Training:   155
Maize --> Validation:   66
Scentless Mayweed --> Training:   362
Scentless Mayweed --> Validation:   154
Shepherds Purse --> Training:   162
Shepherds Purse --> Validation:   69
Small-flowered Cranesbill --> Training:   348
Small-flowered Cranesbill --> Validation:   148
Sugar beet --> Training:   270
Sugar beet --> Validation:   115
