# Working with *image_dataset_from_directory()*.

Shown here => https://github.com/albertovpd/viu_tfm-deep_vision_classification/blob/3folders_08trainds/src/tfm_2022_02_05.ipynb , it looks like nns are not learning anything but resnet50.
Let's create the following:
5 folders, each one will have its train and validation set, and each one of them will have folders with the 5 classes.

I'll run the model in the 5 different folders hoping to get more or less the same learning curves in all of them. This is kind of a homemade cross validation 

# Splitting data

- let's create 5 folders with random percentajes of the input data
- that folders will contain 2 subfolders, train and test
- every train and test folder will  have a subfolder with each class

In [1]:
# Google Drive stuff
#from google.colab import drive
#drive.mount('/content/drive/', force_remount=True)

- libraries

In [1]:
# local path
from dotenv import load_dotenv
import os
import numpy as np
import shutil
import random

- paths

In [2]:
# for google drive
# base_folder = "/content/drive/My Drive/2-Estudios/viu-master_ai/tfm-deep_vision/"

# local
load_dotenv()
base_folder = os.environ.get("INPUT_PATH")

# pics located in
root_dir  = base_folder+"House_Room_Dataset-5_rooms/"
# my train/test/val folders will be created in
input_destination = base_folder+"dataset_1val_5traintest_folders/"

In [3]:
classes_dir = os.listdir(root_dir)
classes_dir

['Dinning', 'Bedroom', 'Livingroom', 'Kitchen', 'Bathroom']

### Validation folder.

- Creating the *validation set* with 150 pictures of every class. 
- The rest of pics go to the training and test set, in 5 shuffled folders.

In [5]:
for cls in classes_dir:
    # create subfolders with fraction
    
    os.makedirs(input_destination + 'val_ds/' + cls, exist_ok = True)    
    os.makedirs(input_destination + 'train_test_ds/' + cls, exist_ok = True)
    # for each class, let's counts its elements
    src = root_dir + cls
    allFileNames = os.listdir(src)

    # shuffle it and split into train/test/val
    np.random.seed()
    np.random.shuffle(allFileNames)
    val_ds, train_test_ds = np.split(np.array(allFileNames),[150])
    
    # save their initial path
    val_ds = [src+'/'+ name  for name in val_ds.tolist()]
    train_test_ds  = [src+'/' + name for name in train_test_ds.tolist()]
    print("\n *****************************",
          "\n", input_destination[-70:] ,
          "\n Total images: ",cls, len(allFileNames),
          '\n Validation: ', len(val_ds),
          '\n Train and testing: ', len(train_test_ds),
          '\n *****************************')
    
    # copy files from the initial path to this subfolders
    for name in val_ds:
        shutil.copy(name, input_destination +'val_ds/' + cls)
    for name in train_test_ds:
        shutil.copy(name, input_destination +'train_test_ds/' + cls)


 ***************************** 
 _tfm-deep_vision_classification/input/dataset_1val_5traintest_folders/ 
 Total images:  Dinning 1158 
 Validation:  150 
 Train and testing:  1008 
 *****************************

 ***************************** 
 _tfm-deep_vision_classification/input/dataset_1val_5traintest_folders/ 
 Total images:  Bedroom 1248 
 Validation:  150 
 Train and testing:  1098 
 *****************************

 ***************************** 
 _tfm-deep_vision_classification/input/dataset_1val_5traintest_folders/ 
 Total images:  Livingroom 1273 
 Validation:  150 
 Train and testing:  1123 
 *****************************

 ***************************** 
 _tfm-deep_vision_classification/input/dataset_1val_5traintest_folders/ 
 Total images:  Kitchen 965 
 Validation:  150 
 Train and testing:  815 
 *****************************

 ***************************** 
 _tfm-deep_vision_classification/input/dataset_1val_5traintest_folders/ 
 Total images:  Bathroom 606 
 Validation

### Creating the 5 folds for train and test.
- Each one will have 2 folders for training (80%) and testing (20%), maintaining datset proportions.
- Run this 5 times.

In [3]:
input_destination = input_destination+"train_test_ds/"
input_destination[-80:]

'-deep_vision_classification/input/dataset_1val_5traintest_folders/train_test_ds/'

In [6]:
# let's take a random fraction
np.random.seed()
#fraction = float(random.randrange(30, 80))/100
#fraction_name = "fold"+str(fraction).replace(".","")+"/"
#print(fraction, fraction_name)

In [None]:
train_ratio = 0.8

for cls in classes_dir:
    # create subfolders with fraction
    
    os.makedirs(input_destination + fraction_name +'train_ds/' + cls, exist_ok=True)
    os.makedirs(input_destination + fraction_name +'test_ds/' + cls, exist_ok=True)
    
    # for each class, let's counts its elements
    src = root_dir + cls
    allFileNames = os.listdir(src)

    # shuffle it and split into train/test/val
    np.random.seed()
    np.random.shuffle(allFileNames)
    sliced_list, discard = np.split(allFileNames, [int(len(allFileNames)*fraction)])
    train_FileNames, test_FileNames = np.split(np.array(sliced_list),[int(train_ratio * len(sliced_list))])
    
    # save their initial path
    train_FileNames = [src+'/'+ name  for name in train_FileNames.tolist()]
    test_FileNames  = [src+'/' + name for name in test_FileNames.tolist()]
    print("\n *****************************",
          "\n", input_destination + fraction_name,
          "\n Total images: ",cls, len(sliced_list),
          '\n Training: ', len(train_FileNames),
          '\n Testing: ', len(test_FileNames),
          '\n *****************************')
    
    # copy files from the initial path to the final folders
    for name in train_FileNames:
        shutil.copy(name, input_destination + fraction_name +'train_ds/' + cls)
    for name in test_FileNames:
        shutil.copy(name, input_destination + fraction_name +'test_ds/' + cls)