# Creating folders with synthetic data.

In this notebook https://github.com/albertovpd/viu_tfm-deep_vision_classification/blob/kfolds_validation/src/creating_5_subfolders_for_kfoldslike_validation.ipynb were did the following:
- For all available data, it was shuffled and taken 150 pics of each class for the test set.
- The remaining pics were at first shuffled again, and then distributed in train/validation folders (80-20%).

Now, 3 new folders will be created with 50, 250 and 480 synthetic pics for each class. So:
- The same 150 pics are taken for the test set.
- The same train/validation distribution (80-20%) is taken.
- 50 synth pics of each class are randomly chosen, added to the real ones to create the folder *"synth50_train_val_ds"*. The same process is repeated with 250 and 480 synth pics.

In [1]:
# local path
from dotenv import load_dotenv
import os
import numpy as np
import shutil
import random
import shutil
from PIL import Image

In [2]:
# local
load_dotenv()
base_folder = os.environ.get("INPUT_PATH")

real_input  = base_folder + "House_Room_Dataset-5_rooms/"
synth_input = base_folder + "common_misclassifications/mosaics/"
synth_input_base = base_folder + "common_misclassifications/"
synth_output= base_folder + "dataset_synth_data-1test_3trainval_20220525/"

classes = {
    'Dinning/' : 'dinning_fakes/', 
   'Bedroom/' : 'bedroom_fakes/',
   'Livingroom/' : 'livingroom_fakes/',
   'Kitchen/' : 'kitchen_fakes/',
   'Bathroom/': 'bathroom_fakes/'
            }

new_folders = {
    "no_synth_train_val_ds/": 0, # without fake pics
    "synth50_train_val_ds/" : 50, 
    "synth250_train_val_ds/": 250, 
    "synth480_train_val_ds/": 480
                }

- create folders for train, test, validation
- merge the input pics and divide them into that folders

In [3]:
for c in classes:
    # create subfolders with fraction     
    os.makedirs(synth_output + 'train_ds/' + c, exist_ok = True)
    os.makedirs(synth_output + 'val_ds/' + c, exist_ok = True)   
    os.makedirs(synth_output + 'test_ds/' + c, exist_ok = True) 
    
    # getting pics
    real_pics = os.listdir(real_input+c)
        
    # shuffle them and split into train/val (test pics already separated)
    np.random.seed()
    np.random.shuffle(real_pics)
    
    # get 150 pics of each class for test ds
    test_ds, train_val_ds = np.split(np.array(real_pics),[150])
    
    # shuffle and split them again
    np.random.seed()
    np.random.shuffle(train_val_ds)    
    # get 80% of the remaining for train ds, 20% of the remaining for val ds
    train_ds, val_ds = np.split(np.array(train_val_ds),[int(0.8 * len(train_val_ds))])

    # copying real pics to new folders        
    for train_pic in train_ds:
        shutil.copyfile(real_input + c + train_pic, synth_output + "train_ds/" + c + train_pic)
    for val_pic in val_ds:
        shutil.copyfile(real_input + c + val_pic, synth_output + "val_ds/"   + c + val_pic)
    for test_pic in test_ds:
        shutil.copyfile(real_input + c + test_pic, synth_output + "test_ds/"   + c + test_pic)

-------------------------------------------------

- now we got all real pics shuffled in the *test_ds, train_ds, val_ds* folder.
- let's create the folders within the *new_folders* dictionary and copy train_ds and val_ds to all of them (*test_ds* will remain unique)

In [4]:
# copying all real pics to the 3 new folders
for n in new_folders:
    for c in classes:
        
        # create the new folders
        os.makedirs(synth_output + n + "train_ds/"+ c, exist_ok = True)
        os.makedirs(synth_output + n + "val_ds/"+ c, exist_ok = True)
        
        train_set = os.listdir(synth_output + "train_ds/" + c )
        val_set = os.listdir(synth_output + "val_ds/" + c )
        
        for t in train_set:
            shutil.copyfile(synth_output + "train_ds/" + c + t, synth_output + n + "train_ds/" + c + t)
        for v in val_set:
            shutil.copyfile(synth_output + "val_ds/" + c + v,   synth_output + n + "val_ds/"   + c + v)

- fake images have 256x256 resolution and they have to be changed into 224x224

In [5]:
size = 224, 224
for c in classes:
    # read path
    pic_list = os.listdir(synth_input+classes[c])
    rescaled_path = synth_input_base+"rescaled_x224/"+classes[c]
    output224 = os.makedirs(rescaled_path, exist_ok = True)
    #print(rescaled_path)
    
    for p in pic_list:
        im = Image.open(synth_input+classes[c]+ p)
        im_resized = im.resize(size, Image.ANTIALIAS)    
        im_resized.save(rescaled_path+p[:-4]+".png", "PNG")

- now add different amout of fake data to that folders as shows the associated values in the dictionary above, for train set

In [6]:
fake_folders = os.listdir(synth_input_base+"rescaled_x224/")
fake_folders

['bathroom_fakes',
 'livingroom_fakes',
 'dinning_fakes',
 'bedroom_fakes',
 'kitchen_fakes']

In [7]:
# copying fake pics to the new folders
for n in new_folders:
    for c in classes:
        
        # locate fake pics and shuffle them
        fake_pics = os.listdir(synth_input_base+"rescaled_x224/"+classes[c])
        np.random.seed()
        np.random.shuffle(fake_pics)
        
        # create a list of 50, 250 and 480 synthetic shuffled pics
        fake_shuffled, _ = np.split(np.array(fake_pics),[new_folders[n]])
        print(n, c, len(fake_shuffled), " fake pics added")
        
        # copy that pics into the new folders
        for f in fake_shuffled:
            shutil.copyfile(synth_input_base+"rescaled_x224/"+ classes[c] + f, synth_output + n + "train_ds/" + c + f)
            #print(synth_output + n + c+f)'''

no_synth_train_val_ds/ Dinning/ 0  fake pics added
no_synth_train_val_ds/ Bedroom/ 0  fake pics added
no_synth_train_val_ds/ Livingroom/ 0  fake pics added
no_synth_train_val_ds/ Kitchen/ 0  fake pics added
no_synth_train_val_ds/ Bathroom/ 0  fake pics added
synth50_train_val_ds/ Dinning/ 50  fake pics added
synth50_train_val_ds/ Bedroom/ 50  fake pics added
synth50_train_val_ds/ Livingroom/ 50  fake pics added
synth50_train_val_ds/ Kitchen/ 50  fake pics added
synth50_train_val_ds/ Bathroom/ 50  fake pics added
synth250_train_val_ds/ Dinning/ 250  fake pics added
synth250_train_val_ds/ Bedroom/ 250  fake pics added
synth250_train_val_ds/ Livingroom/ 250  fake pics added
synth250_train_val_ds/ Kitchen/ 250  fake pics added
synth250_train_val_ds/ Bathroom/ 250  fake pics added
synth480_train_val_ds/ Dinning/ 480  fake pics added
synth480_train_val_ds/ Bedroom/ 480  fake pics added
synth480_train_val_ds/ Livingroom/ 480  fake pics added
synth480_train_val_ds/ Kitchen/ 480  fake pics adde