# Image Utils

The `img_utils` module alows users to manage images datasets (folders and images files) directly on your disk.

### Type Of Folder Trees

#### tree = 1
```
[root_path] "folder_name"/..  
    [subfolder] class_0/..  
    [subfolder] class_1/..  
    [subfolder] errors/..
```

#### tree = 2
```
[root_path] "folder_name"/..  
    [subfolder] train_tr/..  
        [subfolder] class_0/..  
        [subfolder] class_1/..  
    [subfolder] train_val/..  
        [subfolder] class_0/..  
        [subfolder] class_1/..  
    [subfolder] test/test/..  
    [subfolder] errors/..  
```

### impor `img_utils`

In [21]:
from gapml.utils.img_tools import img_utils
import os

In [90]:
""" Image Utils
Copyright, 2018(c), Andrew Ferlitsch
Autor: David Molina @virtualdvid
"""

import os
import sys
import random
import shutil

class img_utils:
    """
    Image Utils:

    Type of Folder Tree

    ## tree = 1 ##
    [root_path] "folder_name"/..
        [subfolder] class_0/..
        [subfolder] class_1/..
        [subfolder] errors/..

    ## tree = 2 ##
    [root_path] "folder_name"/..
        [subfolder] train_tr/..
            [subfolder] class_0/..
            [subfolder] class_1/..
        [subfolder] train_val/..
            [subfolder] class_0/..
            [subfolder] class_1/..
        [subfolder] test/test/..
        [subfolder] errors/..
    """

    def __init__(self, root_path='./', tree=1, transf='1to2', rm=False):
        """Make directories"""
        self.labels = os.listdir(root_path)      # list of images labels
        self.root_path = root_path.split('/')[0] # root folder where labels are located
        self.tree = tree                         # folder structure to the end sample
        self.transf = transf                     # type of folder tree to tranform '1to2' or '2to1'
        self.rm = rm                             # warning! remove folder from directory
        
        if rm:
            answere_ok = False
            while answere_ok == False:
                try:
                    warning = input('Warning! this will delete your image dataset. Are you sure? [Yes/no]: ')
                    warning = warning[0].lower()
                    if warning == 'y' or warning == 'n':
                        answere_ok = True
                except:
                    continue
            if warning == 'y':
                shutil.rmtree(self.root_path)
                print('Your files were deleted')
        
    def _list_labels_org(self):
        """ List Labels Origin """
        # list of labels into root_path folder
        if self.transf == '1to2':
            self.labels_org = [f'{self.root_path}/{lb}' for lb in self.labels]
        elif self.transf == '2to1':
            self.root_path = self.root_path[:-3]
            train_tr = [f'{self.root_path}_t2/train_tr/{lb}' for lb in self.labels]
            train_val = [f'{self.root_path}_t2/train_val/{lb}' for lb in self.labels]
            self.labels_org = train_tr + train_val
    
    def _src_list(self):
        """ Source List """
        # list of labels for folders that will be renamed
        if self.tree == 1:
            self.src_list = [f'{self.root_path}/{lb}' for lb in self.labels]
        if self.tree == 2:
            train_tr = [f'{self.root_path}/train_tr/{lb}' for lb in self.labels]
            train_val = [f'{self.root_path}/train_val/{lb}' for lb in self.labels]
            self.src_list = train_tr + train_val
            
    def _makedirs(self):
        """ Make Directories """
        #creates folders structure
        if self.tree == 1:
            for lb in self.labels:
                os.makedirs(f'{self.root_path}{self.end}/{lb}', exist_ok=True)
        elif self.tree == 2:
            for lb in self.labels:
                os.makedirs(f'{self.root_path}{self.end2}/train_tr/{lb}', exist_ok=True)
                os.makedirs(f'{self.root_path}{self.end2}/train_val/{lb}', exist_ok=True)
            os.makedirs(f'{self.root_path}{self.end2}/test/test', exist_ok=True)
            os.makedirs(f'{self.root_path}{self.end2}/errors', exist_ok=True)
        elif self.tree == None:
            pass
        else:
            print('select between tree=1 or tree=2')

    def _copy_move(self, ppath, action, lb, img_list, index):
        """
        Copy or Move images
        :param ppath:     Required. Partial path
        :param action:    Required. Select between 'copy' or 'move'
        :param lb:        Required. Label name
        :param img_list:  Required. List of images per class
        :param index:     Required. Index image in the list
        """
        # verify type of tree to transform
        label = lb.split('/')[-1]
        if self.transf == '1to2':
            org_file = f'{lb}/{img_list[index]}'
            dst_file = f'{self.root_path}{ppath}/{label}/{img_list[index]}'
        elif self.transf == '2to1':
            org_file = f'{lb}/{img_list}'
            dst_file = f'{self.root_path}/{label}/{img_list}'
        
        # move or copy images into new tree structure
        if action == 'copy':
            shutil.copy(org_file, dst_file)
        elif action == 'move':
            shutil.move(org_file, dst_file)
        else:
            print('select copy or move')
            
    def img_container(self, action='copy', spl=5, shufle=False, img_split=0.2):
        """
        Images Container
        :param action:    Select between 'copy' or 'move'
        :param spl:       Select the number of pictures for label to create the sample
        :param shufle:    select ramdom images per label or the first images on the list
        :param img_split: percentage of split between train / val
        """
        
        # specifies the name for the root_path
        if action == 'copy':
            self.end  = '_spl'
            self.end2 = '_t2' + self.end
        elif action == 'move':
            self.end  = ''
            self.end2 = '_t2'
        else:
            print('select copy or move')
        
        # creates list of labels from root_path
        self._list_labels_org()
            
        # creates the directories
        self._makedirs()
        
        for lb in self.labels_org:
            # list of images per label
            img_list = os.listdir(lb)
            # total of images per class 
            len_img_list = len(img_list)
            
            # sets a sample number or total of images per class to move or copy 
            if action == 'copy':
                spl = spl
            elif action == 'move':
                spl = len_img_list
            else:
                print('select copy or move')
            
            if shufle:
                #get a random image sample
                list_index = random.sample(range(len_img_list), spl)
            else:
                #get the first images on folder
                list_index = list(range(spl))
            
            # move images from tree 2 to tree 1
            if self.transf == '2to1':
                for img in img_list:
                    ppath = None
                    action = 'move'
                    index = None
                    self._copy_move(ppath, action, lb, img, index)
                self.tree = None
            
            # copy or move selected images into the sample labels depending of the selected tree
            if self.tree == 1:
                for index in list_index:
                    self._copy_move('_spl', action, lb, img_list, index)
          
            elif self.tree == 2:
                img_tr = int(len(list_index) * (1 - img_split))
                count = 0
                for index in list_index:
                    if count <= img_tr:
                        # move or copy images in the train folder
                        self._copy_move(f'{self.end2}/train_tr', action, lb, img_list, index)
                    else:
                        # move or copy images in the validation folder
                        self._copy_move(f'{self.end2}/train_val', action, lb, img_list, index)
                    count += 1
            elif self.tree == None:
                pass
            else:
                print('select 1 or 2')
                
    def transform(self, shufle=False, img_split=0.2):
        """
        Transform
        :param shufle:    select ramdom images per label or the first images on the list
        :param img_split: percentage of split between train / val
        """
        # move the files between tree structures
        action = 'move'
        if self.transf == '1to2':
            self.tree = 2
            spl = None
            self.img_container(action, spl, shufle, img_split)
            shutil.rmtree(self.root_path)
        elif self.transf == '2to1':
            self.img_container(action)
            shutil.rmtree(f'{self.root_path}_t2')
        else:
            print('select 1to2 or 2to1')
            
    def img_rename(self, text=None):
        """
        Rename Images
        :param text:   give a text for your images name
        """
        
        # creates source list
        self._src_list()
                
        for lb in self.src_list:
            # list of images per label
            img_list = os.listdir(lb)
            # extract label name
            text_lb = lb.split('/')[-1]
            for i, img in enumerate(img_list):
                if os.path.isdir(f'{lb}/{img}'):
                    print('There is not images to rename')
                    break
                dtype = img.split('.')[-1]
                if text == True:
                    img_name = f'{text_lb}_{i}'
                elif text != None:
                    img_name = f'{text}_{i}'
                else:
                    img_name = f'{i}'
                os.rename(f'{lb}/{img}', f'{lb}/{img_name}.{dtype}')
                    
    def img_replace(self, old, new, img_id=False):
        """
        Rename Images
        :param old:    Required. The text you want to replace.
        :param new:    Required. The text you want to replace "old" with.
        :param img_id: True to enumerate by id name_id
        """
        # creates source list
        self._src_list()
        
        for lb in self.src_list:
            # list of images per label
            img_list = os.listdir(lb)
            for i, img in enumerate(img_list):
                if os.path.isdir(f'{lb}/{img}'):
                    print('There is not images to replace')
                    break
                if img_id:
                    new2 = f'{new}_{i}'
                    os.rename(f'{lb}/{img}', f'{lb}/{img.replace(old,new2)}')
                else:
                    os.rename(f'{lb}/{img}', f'{lb}/{img.replace(old,new)}')

In [30]:
# Proccess root_path = flower_photos
gap = img_utils(root_path='flower_photos')

In [31]:
# current working directory path.
%pwd

'/mnt/c/Users/david/github/gapml/Gap/train'

In [32]:
# current list directory contents
%ls

 [0m[01;32m10nc.pdf[0m*                  [01;32mboardgames.csv[0m*     [01;32m'session 2.ipynb'[0m*
 [01;32m4scan.pdf[0m*                 [01;32mcrash_2015.pdf[0m*     [01;32m'session 3.ipynb'[0m*
[01;32m'NLP Epipog - I.pptx'[0m*      [34;42mflower_photos[0m/      [01;32m'session 4.ipynb'[0m*
[01;32m'Roadmap - Gap 1.0.pptx'[0m*   [34;42mflower_photos_spl[0m/  [01;32m'session 5.ipynb'[0m*
[01;32m'Session 6.ipynb'[0m*         [01;32m'session 0.ipynb'[0m*    [34;42mtut_files[0m/
[01;32m'Train - Intro.pptx'[0m*      [01;32m'session 1.ipynb'[0m*


In [33]:
# root_path labels directory contents
%ls flower_photos/

[0m[34;42mdaisy[0m/  [34;42mdandelion[0m/  [34;42mroses[0m/  [34;42msunflowers[0m/  [34;42mtulips[0m/


In [34]:
# transform directoies between tree 1 to tree 2
gap.transform(shufle=True, img_split=0.2)

In [35]:
# notice the new folder flower_photos_t2
%ls

 [0m[01;32m10nc.pdf[0m*                  [01;32mboardgames.csv[0m*     [01;32m'session 2.ipynb'[0m*
 [01;32m4scan.pdf[0m*                 [01;32mcrash_2015.pdf[0m*     [01;32m'session 3.ipynb'[0m*
[01;32m'NLP Epipog - I.pptx'[0m*      [34;42mflower_photos_spl[0m/  [01;32m'session 4.ipynb'[0m*
[01;32m'Roadmap - Gap 1.0.pptx'[0m*   [34;42mflower_photos_t2[0m/   [01;32m'session 5.ipynb'[0m*
[01;32m'Session 6.ipynb'[0m*         [01;32m'session 0.ipynb'[0m*    [34;42mtut_files[0m/
[01;32m'Train - Intro.pptx'[0m*      [01;32m'session 1.ipynb'[0m*


In [36]:
# let's see the structure inside
%ls flower_photos_t2/

[0m[34;42merrors[0m/  [34;42mtest[0m/  [34;42mtrain_tr[0m/  [34;42mtrain_val[0m/


In [37]:
# Proccess root_path = flower_photos_t2/train_tr
gap = img_utils(root_path='flower_photos_t2/train_tr', transf='2to1')

In [38]:
# transform directories between tree 2 to tree 1
gap.transform()

In [39]:
# notice we got the previous folder flower_photos
%ls

 [0m[01;32m10nc.pdf[0m*                  [01;32mboardgames.csv[0m*     [01;32m'session 2.ipynb'[0m*
 [01;32m4scan.pdf[0m*                 [01;32mcrash_2015.pdf[0m*     [01;32m'session 3.ipynb'[0m*
[01;32m'NLP Epipog - I.pptx'[0m*      [34;42mflower_photos[0m/      [01;32m'session 4.ipynb'[0m*
[01;32m'Roadmap - Gap 1.0.pptx'[0m*   [34;42mflower_photos_spl[0m/  [01;32m'session 5.ipynb'[0m*
[01;32m'Session 6.ipynb'[0m*         [01;32m'session 0.ipynb'[0m*    [34;42mtut_files[0m/
[01;32m'Train - Intro.pptx'[0m*      [01;32m'session 1.ipynb'[0m*


In [40]:
# let's see the structure inside
%ls flower_photos/

[0m[34;42mdaisy[0m/  [34;42mdandelion[0m/  [34;42mroses[0m/  [34;42msunflowers[0m/  [34;42mtulips[0m/


In [48]:
# Proccess root_path = flower_photos
gap = img_utils(root_path='flower_photos')

In [49]:
# Let's extract a sample of the data set
gap.img_container(action='copy', spl=15)

In [50]:
# notice the new folder flower_photos_spl
%ls

 [0m[01;32m10nc.pdf[0m*                  [01;32mboardgames.csv[0m*     [01;32m'session 2.ipynb'[0m*
 [01;32m4scan.pdf[0m*                 [01;32mcrash_2015.pdf[0m*     [01;32m'session 3.ipynb'[0m*
[01;32m'NLP Epipog - I.pptx'[0m*      [34;42mflower_photos[0m/      [01;32m'session 4.ipynb'[0m*
[01;32m'Roadmap - Gap 1.0.pptx'[0m*   [34;42mflower_photos_spl[0m/  [01;32m'session 5.ipynb'[0m*
[01;32m'Session 6.ipynb'[0m*         [01;32m'session 0.ipynb'[0m*    [34;42mtut_files[0m/
[01;32m'Train - Intro.pptx'[0m*      [01;32m'session 1.ipynb'[0m*


In [51]:
# Let's see inside
%ls flower_photos_spl/

[0m[34;42mdaisy[0m/  [34;42mdandelion[0m/  [34;42mroses[0m/  [34;42msunflowers[0m/  [34;42mtulips[0m/


In [55]:
# Proccess root_path = flower_photos
gap = img_utils(root_path='flower_photos', tree=2)

In [56]:
# Let's extract a sample of the data set but with the tree 2 structure
gap.img_container(action='copy', spl=15, shufle=True, img_split=0.2)

In [57]:
# notice the new folder flower_photos_t2_spl
%ls

 [0m[01;32m10nc.pdf[0m*                  [01;32mboardgames.csv[0m*        [01;32m'session 1.ipynb'[0m*
 [01;32m4scan.pdf[0m*                 [01;32mcrash_2015.pdf[0m*        [01;32m'session 2.ipynb'[0m*
[01;32m'NLP Epipog - I.pptx'[0m*      [34;42mflower_photos[0m/         [01;32m'session 3.ipynb'[0m*
[01;32m'Roadmap - Gap 1.0.pptx'[0m*   [34;42mflower_photos_spl[0m/     [01;32m'session 4.ipynb'[0m*
[01;32m'Session 6.ipynb'[0m*          [34;42mflower_photos_t2_spl[0m/  [01;32m'session 5.ipynb'[0m*
[01;32m'Train - Intro.pptx'[0m*      [01;32m'session 0.ipynb'[0m*       [34;42mtut_files[0m/


In [58]:
img_list_tr = os.listdir('flower_photos_t2_spl/train_tr/roses')
img_list_val = os.listdir('flower_photos_t2_spl/train_val/roses')
print('number of images in train_tr:', len(img_list_tr))
print('number of images in train_val:', len(img_list_val))
%ls flower_photos_t2_spl/train_tr

number of images in train_tr: 13
number of images in train_val: 2
[0m[34;42mdaisy[0m/  [34;42mdandelion[0m/  [34;42mroses[0m/  [34;42msunflowers[0m/  [34;42mtulips[0m/


In [59]:
# Proccess root_path = flower_photos_spl
gap = img_utils(root_path='flower_photos_spl')

In [60]:
# current images names
%ls flower_photos_spl/roses/

[0m[01;32m10090824183_d02c613f10_m.jpg[0m*  [01;32m12045735155_42547ce4e9_n.jpg[0m*
[01;32m102501987_3cdb8e5394_n.jpg[0m*    [01;32m12165480946_c4a3fe182d_n.jpg[0m*
[01;32m10503217854_e66a804309.jpg[0m*    [01;32m12202373204_34fb07205b.jpg[0m*
[01;32m10894627425_ec76bbc757_n.jpg[0m*  [01;32m12238827553_cf427bfd51_n.jpg[0m*
[01;32m110472418_87b6a3aa98_m.jpg[0m*    [01;32m12240165555_98625b1e88_n.jpg[0m*
[01;32m11102341464_508d558dfc_n.jpg[0m*  [01;32m12240303_80d87f77a3_n.jpg[0m*
[01;32m11233672494_d8bf0a3dbf_n.jpg[0m*  [01;32m12240577184_b0de0e53ea_n.jpg[0m*
[01;32m11694025703_9a906fedc1_n.jpg[0m*  [01;32m12243069253_e512464095_n.jpg[0m*
[01;32m118974357_0faa23cce9_n.jpg[0m*    [01;32m123128873_546b8b7355_n.jpg[0m*
[01;32m11944957684_2cc806276e.jpg[0m*


In [65]:
# rename images
# try the diferent options
gap.img_rename(text='test')# text=True text='test'

In [66]:
# new images names in the label roses
%ls flower_photos_spl/roses/

[0m[01;32mtest_0.jpg[0m*   [01;32mtest_12.jpg[0m*  [01;32mtest_16.jpg[0m*  [01;32mtest_3.jpg[0m*  [01;32mtest_7.jpg[0m*
[01;32mtest_1.jpg[0m*   [01;32mtest_13.jpg[0m*  [01;32mtest_17.jpg[0m*  [01;32mtest_4.jpg[0m*  [01;32mtest_8.jpg[0m*
[01;32mtest_10.jpg[0m*  [01;32mtest_14.jpg[0m*  [01;32mtest_18.jpg[0m*  [01;32mtest_5.jpg[0m*  [01;32mtest_9.jpg[0m*
[01;32mtest_11.jpg[0m*  [01;32mtest_15.jpg[0m*  [01;32mtest_2.jpg[0m*   [01;32mtest_6.jpg[0m*


In [67]:
# replace part of the name text 
gap.img_replace(old='st', new='sted', img_id=False)

In [68]:
# new images names in the label roses
%ls flower_photos_spl/roses/

[0m[01;32mtested_0.jpg[0m*   [01;32mtested_12.jpg[0m*  [01;32mtested_16.jpg[0m*  [01;32mtested_3.jpg[0m*  [01;32mtested_7.jpg[0m*
[01;32mtested_1.jpg[0m*   [01;32mtested_13.jpg[0m*  [01;32mtested_17.jpg[0m*  [01;32mtested_4.jpg[0m*  [01;32mtested_8.jpg[0m*
[01;32mtested_10.jpg[0m*  [01;32mtested_14.jpg[0m*  [01;32mtested_18.jpg[0m*  [01;32mtested_5.jpg[0m*  [01;32mtested_9.jpg[0m*
[01;32mtested_11.jpg[0m*  [01;32mtested_15.jpg[0m*  [01;32mtested_2.jpg[0m*   [01;32mtested_6.jpg[0m*


In [91]:
# Proccess root_path = flower_photos_t2_spl/train_tr
gap = img_utils(root_path='flower_photos_t2_spl/train_tr', tree=2)

In [97]:
# current images names
%ls flower_photos_t2_spl/train_tr/roses/

[0m[01;32mtested_0_0.jpg[0m*   [01;32mtested_1_1.jpg[0m*   [01;32mtested_5_2.jpg[0m*  [01;32mtested_9_6.jpg[0m*
[01;32mtested_10_7.jpg[0m*  [01;32mtested_2_10.jpg[0m*  [01;32mtested_6_3.jpg[0m*
[01;32mtested_11_8.jpg[0m*  [01;32mtested_3_11.jpg[0m*  [01;32mtested_7_4.jpg[0m*
[01;32mtested_12_9.jpg[0m*  [01;32mtested_4_12.jpg[0m*  [01;32mtested_8_5.jpg[0m*


In [93]:
# rename images
# try the diferent options
gap.img_rename(text='test')# text=True text='test'

In [94]:
# new images names in the label roses
%ls flower_photos_t2_spl/train_tr/roses/

[0m[01;32mtest_0.jpg[0m*   [01;32mtest_11.jpg[0m*  [01;32mtest_3.jpg[0m*  [01;32mtest_6.jpg[0m*  [01;32mtest_9.jpg[0m*
[01;32mtest_1.jpg[0m*   [01;32mtest_12.jpg[0m*  [01;32mtest_4.jpg[0m*  [01;32mtest_7.jpg[0m*
[01;32mtest_10.jpg[0m*  [01;32mtest_2.jpg[0m*   [01;32mtest_5.jpg[0m*  [01;32mtest_8.jpg[0m*


In [95]:
gap.img_replace(old='st', new='sted', img_id=True)

In [96]:
# new images names in the label roses
%ls flower_photos_t2_spl/train_tr/roses/

[0m[01;32mtested_0_0.jpg[0m*   [01;32mtested_1_1.jpg[0m*   [01;32mtested_5_2.jpg[0m*  [01;32mtested_9_6.jpg[0m*
[01;32mtested_10_7.jpg[0m*  [01;32mtested_2_10.jpg[0m*  [01;32mtested_6_3.jpg[0m*
[01;32mtested_11_8.jpg[0m*  [01;32mtested_3_11.jpg[0m*  [01;32mtested_7_4.jpg[0m*
[01;32mtested_12_9.jpg[0m*  [01;32mtested_4_12.jpg[0m*  [01;32mtested_8_5.jpg[0m*


### Delete samples

In [60]:
gap = img_utils(root_path='flower_photos_spl', rm=True)

Your files were deleted


In [55]:
gap = img_utils(root_path='flower_photos_t2_spl', rm=True)

Your files were deleted


In [56]:
# current list directory contents 
%ls

 [0m[01;32m10nc.pdf[0m*                  [01;32mboardgames.csv[0m*    [01;32m'session 3.ipynb'[0m*
 [01;32m4scan.pdf[0m*                 [01;32mcrash_2015.pdf[0m*    [01;32m'session 4.ipynb'[0m*
[01;32m'NLP Epipog - I.pptx'[0m*      [34;42mflower_photos[0m/     [01;32m'session 5.ipynb'[0m*
[01;32m'Roadmap - Gap 1.0.pptx'[0m*  [01;32m'session 0.ipynb'[0m*   [34;42mtut_files[0m/
[01;32m'Session 6.ipynb'[0m*         [01;32m'session 1.ipynb'[0m*
[01;32m'Train - Intro.pptx'[0m*      [01;32m'session 2.ipynb'[0m*
