In [1]:
# Code to reshuffle downloaded image files into labeled subdirectories
# to enable keras dataset creation using 
# keras image_dataset_from_directory function
#
# Current directory structure:
# 
# /img
#   Pose_no_XYZ_directory
#         zyx.jpg
#         ...
# 
#  /data directory contains two files
#    yoga_train.txt
#    yoga_test.txt
# 
#  each with structure of
#     Akarna_Dhanurasana/16.jpg,1,8,0
#     Akarna_Dhanurasana/95.jpg,1,8,0
# 
#  to enable keras image_dataset_from_directory function,
#  the following directory structure should be created:
# 
#     main_directory/
#         ...class_a/
#         ......a_image_1.jpg
#         ......a_image_2.jpg
#         ...class_b/
#         ......b_image_1.jpg
#         ......b_image_2.jpg
# 

# Code steps should be as follows
# Parameter No 1: TRAIN or TEST - read yoga_train.txt or yoga_test.txt
# Parameter No 2: classification tier 1: classes 0~5 (6 total classes)
#                 classification tier 2: classes 0~19 (20 total classes)
#                 classification tier 3: classes 0~81 (82 total classes)
# 
#  read line from yoga_train.txt
#  in data directory check/create subdirectory TRAIN / TEST
#  depending on the value of parameter 2, check/create
#  directory YOGA_CLASSES_6, or YOGA_CLASSES_20, or YOGA_CLASSES_82
#  INSIDE corresponding directories create directory structure a follows:
#  
#  read line from yoga_train.txt, say
#       Akarna_Dhanurasana/16.jpg,1,8,0
#  assuming parameter no 2 = 2 (20 classes)
#  check/create directory named "class_8" - label of the tier 2,
#  copy file img/Akarna_Dhanurasana/16.jpg as 
#  KERAS_data/TRAIN/YOGA_CLASSES_20/class_8/8_Akarna_Dhanurasana_16.jpg
#  
#  read next line from yoga_train.txt
# 
# Code would have to be run six(6) times - three classification levels for 
# train data
# and three classification levels for the test data
# 
# yoga_train_ds = tf.keras.preprocessing.image_dataset_from_directory(
#     directory,
#     labels="inferred", ...
# 
#  where directory is 'data/TRAIN/YOGA_CLASSES_20/'

In [2]:
# location variables
# debugging mode
DEBUG = True

def log(s):
    '''
    https://stackoverflow.com/questions/6579496/using-print-statements-only-to-debug
    '''
    if DEBUG:
        print(s)

# https://pythonexamples.org/python-logging-debug/
#  
import logging

class MyFilter(object):
    def __init__(self, level):
        self.__level = level

    def filter(self, logRecord):
        return logRecord.levelno <= self.__level

#create a logger
logger = logging.getLogger('mylogger')
logger.setLevel(logging.DEBUG)
handler = logging.FileHandler('image_dataset_from_directory.log')
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
#set filter to log only DEBUG lines
handler.addFilter(MyFilter(logging.DEBUG))
logger.addHandler(handler)

#write a debug line to log file
# logger.debug('This is a DEBUG message')
# logger.info('This is a INFO message')
# logger.warning('This is a WARNING message')
# logger.error('This is an ERROR message')
# logger.critical('This is a CRITICAL message')

In [5]:
!pwd

/Users/alexey_imac/Documents/Data_Science_Immersive_Galvanize/ALEXEY_DATA/CAPSTONES/C2_PROJECT/notebooks


In [6]:
os.chdir('..')

In [7]:
print(f'Current Directory = {os.getcwd()}')

Current Directory = /Users/alexey_imac/Documents/Data_Science_Immersive_Galvanize/ALEXEY_DATA/CAPSTONES/C2_PROJECT


In [36]:
src = r'C:\Users\Administrator.SHAREPOINTSKY\Desktop\Work\name.txt'

In [110]:
def keras_dir_check_mkdir(train_test: str, classes_n: str, class_n: str):
    '''
    Check if destination folder for keras
    image_dataset_from_directory function exists,
    create one if it does not
    KERAS_data/TRAIN/YOGA_CLASSES_20/class_8/8_Akarna_Dhanurasana_16.jpg
    '''
    dir_path_assembled = 'KERAS_data/' + TRAIN_TEST + '/' + classes_n + '/' + 'class_' + class_n
    if os.path.isdir(dir_path_assembled):
        # directory exists in !pwd
        logger.debug(f'Directory {dir_path_assembled} exists')
    else:
        os.mkdir(dir_path_assembled)
        logger.debug(f'Directory {dir_path_assembled} was CREATED!')


# REF: https://pythonguides.com/python-copy-file/
import shutil
import os


In [136]:
# change this line manually for test directory structure
# file_name_2_process = 'data/yoga_train.txt'  # change to 'data/yoga_test.txt'
# TRAIN_TEST = 'TRAIN'                         # change to 'TEST'
file_name_2_process = 'data/yoga_test.txt'  # change to 'data/yoga_test.txt'
TRAIN_TEST = 'TEST'                         # change to 'TEST'


# check if passed file parameter exists
if os.path.isfile(file_name_2_process):  # file exists, open
    file = open(file_name_2_process, 'r')
#     beepy.beep(sound=5) # 5 : 'ready'
    log(f'Opened file {file} for processing ===============')
    print(f'Opened file {file} for processing ===============')
    logger.debug(f'Opened file {file} for processing ===============')
else:
    log(f'File does not exists: {file_name_2_process}')
    logger.debug(f'File does not exists: {file_name_2_process}')
    

line_count = 1
skipped_files_counter = 0


for line in file:  
    logger.debug(f'File {file_name_2_process}. Processing line {line_count}: \n {line}')
    log(f'File {file_name_2_process}. Processing line {line_count}: \n {line}')
#     print(f'Line = {line}')


#     # parse the line for directory_file, class_8, class_20, class_82
    [directory_file_jpg, class_6, class_20, class_82] = line.split(',')
    class_82 = class_82.rstrip("\n")  #\n just don't need it

#   check if directory_file exists before copy-pasting. ADD img/ (assuming program is in the root)
    if os.path.isfile('img/' + directory_file_jpg):  # file exists continue
#         beepy.beep(sound=5) # 5 : 'ready'
        log(f'{directory_file_jpg} exists, will copy for keras ===============')
        logger.debug(f'{directory_file_jpg} exists, will copy for keras ===============')
    
    #   check if destination directories exist, create 'em if they does not
    #   KERAS_data/ prefix is hard coded !!!
        keras_dir_check_mkdir(TRAIN_TEST, 'YOGA_CLASSES_6', class_6)    
        keras_dir_check_mkdir(TRAIN_TEST, 'YOGA_CLASSES_20', class_20)    
        keras_dir_check_mkdir(TRAIN_TEST, 'YOGA_CLASSES_82', class_82)    

        source = 'img/' + directory_file_jpg
        classes_values = zip(['YOGA_CLASSES_6', 'YOGA_CLASSES_20', 'YOGA_CLASSES_82'], [class_6, class_20, class_82])

        for cl, vl in classes_values:
    #         print('started copying loop')
            dir_prefix = 'KERAS_data/' + TRAIN_TEST + '/' + cl + '/' + 'class_' + vl + '/'
            new_name = vl + '_' + directory_file_jpg.split('/')[0] + '_' + directory_file_jpg.split('/')[1]
            destination = dir_prefix + new_name # # Akarna_Dhanurasana/16.jpg => 0_Akarna_Dhanurasana_16.jpg
    #   process the line to rename file three times, and copy to three destinations 
            if os.path.isfile(destination):  # files exist, skip
                logger.debug(f'File {destination} exist, skipping read/write!')
                log(f'File {destination} exist, skipping read/write!')
    #             print(f'File {destination} exist, skipping read/write!')

            else:  # call file read/write subroutine
                log(f'source = {source}')
                log(f'destination = {destination}')
                log(f'name_new = {name_new}')

    #             print(f'source = {source}')
    #             print(f'destination = {destination}')
    #             print(f'name_new = {name_new}')

    #           perform copy / rename 
                shutil.copyfile(source, destination)
    
    else:
        log(f'{directory_file_jpg} DOES NOT exist, SKIPPING ===============')
        logger.debug(f'File does not exists: {file_name_2_process}')
        skipped_files_counter += 1

            
    line_count += 1

#     if line_count == 6:
#         break # debugging block
    
log(f'Skipped {skipped_files_counter} image files.')
print(f'Skipped {skipped_files_counter} image files. Read {line_count-1} lines')

file.close()  # very important AFTER successful completion.
log(f'Closed {file}  #####################')
print(f'Closed {file}  #####################')
logger.debug(f'Closed {file}  #####################')
# beepy.beep(sound=6) # 6 : 'success'

Skipped 2129 image files. Read 7456 lines
Closed <_io.TextIOWrapper name='data/yoga_test.txt' mode='r' encoding='UTF-8'>  #####################


In [125]:
file.close()  # very important AFTER successful completion.

In [126]:
DEBUG = False

In [130]:
# !pwd
# z = set(classes_values)
# print(z)

set()


In [108]:
destination

'KERAS_data/TRAIN/YOGA_CLASSES_82/class_0/0_Akarna_Dhanurasana_229.jpg'

In [109]:
shutil.copyfile(source, destination)
# os.rename(destination,name_new)

'KERAS_data/TRAIN/YOGA_CLASSES_82/class_0/0_Akarna_Dhanurasana_229.jpg'

In [67]:
tmp = !pwd

In [68]:
tmp

['/Users/alexey_imac/Documents/Data_Science_Immersive_Galvanize/ALEXEY_DATA/CAPSTONES/C2_PROJECT']

In [69]:
tmp[0]

'/Users/alexey_imac/Documents/Data_Science_Immersive_Galvanize/ALEXEY_DATA/CAPSTONES/C2_PROJECT'