In [1]:
## script to extract files from originals and photoshops_samples and then to divide into batches as desired
## with remainder into last batch
## then move to a folder for each batch

In [2]:
import os
import shutil
import numpy as np
import pandas as pd

from os import listdir
from os.path import isfile, join

## Batching the Originals

In [3]:
# defining source directory
src_dir = '../Capstone_Photos/originals_reduced/'


In [4]:
# creating list of all files
all_files = [f for f in listdir(src_dir) if isfile(join(src_dir, f))]

In [5]:
# no. of total files
len(all_files)

1194

In [6]:
# desired size of each batch
batch_size = 2500

In [7]:
# no. of batches needed, with remainder in last batch
len(all_files) // batch_size

0

In [8]:
# remainder files in final batch
len(all_files) % batch_size

1194

In [9]:
# creating batch_df
batch_df = pd.DataFrame(columns=['batch_list', 'filenames_as_list'])

In [10]:
batch_df['batch_list'] = ['ori_batch_' + str(x+1) for x in range((len(all_files) // batch_size)+1)]

In [11]:
batch_df.tail()

Unnamed: 0,batch_list,filenames_as_list
0,ori_batch_1,


In [12]:
# for loop for adding filenames to each batch

index_start = 0
index_end = batch_size

for x in range(len(batch_df['batch_list'])):
    
    batch_df['filenames_as_list'][x] = all_files [index_start:index_end]
    index_start += batch_size
    index_end += batch_size
    



In [13]:
batch_df

Unnamed: 0,batch_list,filenames_as_list
0,ori_batch_1,"[red_a433i739.jpg, red_a41qtech.jpg, red_a2rg4..."


In [14]:
batch_df['number_of_files'] = [len(batch_df['filenames_as_list'][x]) for x in batch_df.index]

In [15]:
batch_df.tail()

Unnamed: 0,batch_list,filenames_as_list,number_of_files
0,ori_batch_1,"[red_a433i739.jpg, red_a41qtech.jpg, red_a2rg4...",1194


In [16]:
# copying files to another folder

# Create target folder if it does not exist
parent_dir = '../Capstone_Photos/originals_batched' 

try:
    os.mkdir(parent_dir)
    print("Directory " , parent_dir ,  " Created ") 
except FileExistsError:
    print("Directory " , parent_dir ,  " already exists")




for x in batch_df.index:
    
    
    dir_name = '../Capstone_Photos/originals_batched//' + batch_df['batch_list'][x]
    
    
    
    
    # Create target folder if it does not exist
    try:
        os.mkdir(dir_name)
        print("Directory " , dir_name ,  " Created ") 
    except FileExistsError:
        print("Directory " , dir_name ,  " already exists")

    output_folder = dir_name

    for y in range(batch_df['number_of_files'][x]):
        shutil.copy(os.path.join(src_dir, batch_df['filenames_as_list'][x][y]), output_folder, follow_symlinks=True)


Directory  ../Capstone_Photos/originals_batched  Created 
Directory  ../Capstone_Photos/originals_batched//ori_batch_1  Created 


## Batching the Photoshops_Samples

In [17]:
# defining source directory
src_dir = '../Capstone_Photos/photoshops_reduced'


In [18]:
# creating list of all files
all_files = [f for f in listdir(src_dir) if isfile(join(src_dir, f))]


In [19]:
# no. of total files
len(all_files)

1452

In [20]:
# no. of batches needed, with remainder in last batch
len(all_files) // batch_size

0

In [21]:
# remainder files in final batch
len(all_files) % batch_size

1452

In [22]:
# cleaning previously used batch_df
batch_df = pd.DataFrame(columns=batch_df.columns)


In [23]:
batch_df['batch_list'] = ['ps_batch_' + str(x+1) for x in range((len(all_files) // batch_size)+1)]

In [24]:
batch_df.tail()

Unnamed: 0,batch_list,filenames_as_list,number_of_files
0,ps_batch_1,,


In [25]:
# for loop for adding filenames to each batch

index_start = 0
index_end = batch_size

for x in range(len(batch_df['batch_list'])):
    
    batch_df['filenames_as_list'][x] = all_files [index_start:index_end]
    index_start += batch_size
    index_end += batch_size

In [26]:
batch_df

Unnamed: 0,batch_list,filenames_as_list,number_of_files
0,ps_batch_1,"[red_dq4wpht_0.jpg, red_cmdluiq_0.jpg, red_cy0...",


In [27]:
batch_df['number_of_files'] = [len(batch_df['filenames_as_list'][x]) for x in batch_df.index]

In [28]:
batch_df.tail()

Unnamed: 0,batch_list,filenames_as_list,number_of_files
0,ps_batch_1,"[red_dq4wpht_0.jpg, red_cmdluiq_0.jpg, red_cy0...",1452


In [29]:
# copying files to another folder

# Create target folder if it does not exist
parent_dir = '../Capstone_Photos/photoshops_batched' 

try:
    os.mkdir(parent_dir)
    print("Directory " , parent_dir ,  " Created ") 
except FileExistsError:
    print("Directory " , parent_dir ,  " already exists")



for x in batch_df.index:

    dir_name = '../Capstone_Photos/photoshops_batched//' + batch_df['batch_list'][x]
    
    # Create target folder if it does not exist
    try:
        os.mkdir(dir_name)
        print("Directory " , dir_name ,  " Created ") 
    except FileExistsError:
        print("Directory " , dir_name ,  " already exists")

    output_folder = dir_name

    for y in range(batch_df['number_of_files'][x]):
        shutil.copy(os.path.join(src_dir, batch_df['filenames_as_list'][x][y]), output_folder, follow_symlinks=True)


Directory  ../Capstone_Photos/photoshops_batched  Created 
Directory  ../Capstone_Photos/photoshops_batched//ps_batch_1  Created 
