In [None]:
import os
import random
from shutil import copyfile

def img_train_test_split(img_source_dir, train_size):
    """
    Randomly splits images over a train and validation folder, while preserving the folder structure
    
    Parameters
    ----------
    img_source_dir : string
        Path to the folder with the images to be split. Can be absolute or relative path   
        
    train_size : float
        Proportion of the original images that need to be copied in the subdirectory in the train folder
    """    
    if not (isinstance(img_source_dir, str)):
        raise AttributeError('img_source_dir must be a string')
        
    #if not os.path.exists(img_source_dir):
        #raise OSError('img_source_dir does not exist')
        
    if not (isinstance(train_size, float)):
        raise AttributeError('train_size must be a float')
    
    # Set up empty folder structure if not exists
    #path for all input data organized af folders
    if not os.path.exists("/content/drive/My Drive/Neural_Project/data"):
        os.makedirs("/content/drive/My Drive/Neural_Project/data")
    else:
        #path for folders containing train and validation images within main folder
        if not os.path.exists("/content/drive/My Drive/Neural_Project/data/train"):
            os.makedirs("/content/drive/My Drive/Neural_Project/data/train")
        if not os.path.exists('/content/drive/My Drive/Neural_Project/data/validation'):
            os.makedirs('/content/drive/My Drive/Neural_Project/data/validation')
            
     #Get the subdirectories in the main image folder
    subdirs = [subdir for subdir in os.listdir(img_source_dir) if os.path.isdir(os.path.join(img_source_dir, subdir))]
    print("Subdirect",subdirs)
    for subdir in subdirs:
        subdir_fullpath = os.path.join(img_source_dir, subdir)
        if len(os.listdir(subdir_fullpath)) == 0:
            print(subdir_fullpath + ' is empty')
            break

        #path for training data
        train_subdir = os.path.join('/content/drive/My Drive/Neural_Project/data/train', subdir)
        #path for validation data
        validation_subdir = os.path.join('/content/drive/My Drive/Neural_Project/data/validation', subdir)

        # Create subdirectories in train and validation folders
        if not os.path.exists(train_subdir):
            os.makedirs(train_subdir)

        if not os.path.exists(validation_subdir):
            os.makedirs(validation_subdir)

        train_counter = 0
        validation_counter = 0

        # Randomly assign an image to train or validation folder
        for filename in os.listdir(subdir_fullpath):
            if filename.endswith(".jpg") or filename.endswith(".png"): 
                fileparts = filename.split('.')

                if random.uniform(0, 1) <= train_size:
                    copyfile(os.path.join(subdir_fullpath, filename), os.path.join(train_subdir, str(train_counter) + '.' + fileparts[1]))
                    train_counter += 1
                else:
                    copyfile(os.path.join(subdir_fullpath, filename), os.path.join(validation_subdir, str(validation_counter) + '.' + fileparts[1]))
                    validation_counter += 1
                    
        print('Copied ' + str(train_counter) + ' images to /content/drive/My Drive/Neural_ProjectC1/data/train/' + subdir)
        print('Copied ' + str(validation_counter) + ' images to /content/drive/My Drive/Neural_ProjectC1/data/validation/' + subdir)

In [None]:
#path to classified images
img_train_test_split("/content/drive/My Drive/Neural_ProjectC1/classified_images",0.7)

Subdirect ['0', '3', '2', '4', '1']
Copied 262 images to /content/drive/My Drive/Neural_ProjectC1/data/train/0
Copied 132 images to /content/drive/My Drive/Neural_ProjectC1/data/validation/0
Copied 71 images to /content/drive/My Drive/Neural_ProjectC1/data/train/3
Copied 46 images to /content/drive/My Drive/Neural_ProjectC1/data/validation/3
Copied 77 images to /content/drive/My Drive/Neural_ProjectC1/data/train/2
Copied 32 images to /content/drive/My Drive/Neural_ProjectC1/data/validation/2


KeyboardInterrupt: ignored

In [None]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
        validation_split=0.2)

#training data
train_generator = train_datagen.flow_from_directory(
        '/content/drive/My Drive/Neural_Project/TrainData-C2',
        subset='training')
#validation data
validation_generator = train_datagen.flow_from_directory(
        '/content/drive/My Drive/Neural_ProjectC1/TrainData-C2', #same as in train generator
        subset='validation')