In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Image Processing with Neural Network
## Session 32 : Data Preparation for Yolo
<img src='../../../images/prasami_color_tutorials_small.png' style = 'width:400px;' alt="By Pramod Sharma : pramod.sharma@prasami.com" align="left"/>

In [1]:
# Import some libraries
import os
import csv
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import cv2

from sklearn.model_selection import train_test_split

In [3]:
# Some basic parameters
inpDir = '../output/GarbClass'
outDir = '../output'
dataDir = 'GarbYolo' # location of the data
imgDir = 'JPGImages' # location of the images
anotDir = 'Annotations' # location of the annotations

RANDOM_STATE = 24
np.random.seed(RANDOM_STATE) # Set Random Seed for reproducible  results
TEST_SIZE = 0.2
# parameters for Matplotlib
params = {'legend.fontsize': 'x-large',
          'figure.figsize': (15, 12),
          'axes.labelsize': 'x-large',
          'axes.titlesize':'x-large',
          'xtick.labelsize':'x-large',
          'ytick.labelsize':'x-large'
         }

CMAP = plt.cm.brg

LINE_THICK = 1

MARK_COLOR = (15, 82, 186)

plt.rcParams.update(params)

## Helper Function

In [4]:
def fn_read_image(imgPath):
    '''
    Args:
        imgPath : Path of image file to read
    returns:
        im: image in cv2 format
        rgbIm: image in RGB format
    
    '''
    
    # Read image file
    im = cv2.imread(imgPath, cv2.IMREAD_COLOR)
    
    
    if im is None:
        print('Could not open or find the image:', fileName)
        exit(0)
    else:
        return im

    
def fn_plot_one_img(im):
    
    '''
    Args:
        im : image to display and save
    
    '''
    # showing image
    plt.imshow(im)
    plt.axis('off')
    
        
def fn_plot_images(im_lst):
    '''
    Args:
        img_list: list of images
    '''
    nRows = 1
    nCols = len(im_lst)
    
    
    fig, axes = plt.subplots(nRows, nCols)
    
    for i in range(nCols):
        axes[i].imshow(im_lst[i]['img'], cmap = im_lst[i]['cmap'])
        axes[i].set_title(im_lst[i]['name'])

        axes[i].set_xticklabels([]);
        axes[i].set_yticklabels([]);

                
    plt.tight_layout()
    
    plt.show()
    
def fn_disp_image(im, window = 'Image'):
    while True:
        cv2.imshow(window, srcImg)


        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cv2.destroyAllWindows()

In [5]:
class_idx = {'cardboard':0, 'glass':1,  'metal':2,  'paper':3, 'plastic' : 4, 'trash':5 }
class_names = {0: 'cardboard', 1: 'glass', 2: 'metal', 3: 'paper', 4: 'plastic', 5: 'trash'}

In [6]:
image_types = (".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff")


def fn_list_images(basePath, contains=None):
    # return the set of files that are valid
    return fn_list_files(basePath, validExts=image_types, contains=contains)


def fn_list_files(basePath, validExts=None, contains=None):
    
    # loop over the directory structure
    for (rootDir, dirNames, filenames) in os.walk(basePath):
        
        # loop over the filenames in the current directory
        for filename in filenames:
            
            # if the contains string is not none and the filename does not contain
            # the supplied string, then ignore the file
            if contains is not None and filename.find(contains) == -1:
                continue

            # determine the file extension of the current file
            ext = filename[filename.rfind("."):].lower()

            # check to see if the file is an image and should be processed
            if validExts is None or ext.endswith(validExts):
                
                # construct the path to the image and yield it
                imagePath = os.path.join(rootDir, filename)
                
                yield imagePath

In [7]:
imgPaths = list(fn_list_images(os.path.join(inpDir, imgDir)))

In [8]:
def fn_prep_data_df(row):
    
    imgName = row['ImgPath'].split(os.sep)[-1]
    
    row['ImgName'] = imgName
    
    txtName = f"{imgName.split('.')[0]}.txt"

    txtPath = os.path.join(inpDir, anotDir, txtName)
    
    row['TxtPath'] = txtPath

    ### Reading class from the txt file for stratified split in train and test
    with open(txtPath, 'r') as file:
        # Create a CSV reader object
        csv_reader = csv.reader(file)

        # Iterate through each row in the CSV file
        for line in csv_reader:
            # Each row is a list where elements are separated by commas
            row['Label']= np.int16(line[0])
    
    return row['ImgName'], row['TxtPath'], row['Label']

In [11]:
applied_df

Unnamed: 0,ImgPath


In [9]:
data_df = pd.DataFrame(imgPaths, columns = ['ImgPath'])
applied_df = data_df.apply(lambda row: fn_prep_data_df(row), axis='columns', result_type='expand')
data_df = pd.concat([data_df, applied_df], axis='columns')
data_df.columns = ['ImgPath', 'ImgName', 'TxtPath', 'Label']
data_df.head()

ValueError: Length mismatch: Expected axis has 2 elements, new values have 4 elements

In [None]:
y = data_df['Label']

X = data_df.drop('Label', axis = 1)

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=TEST_SIZE, stratify=y)

X_train.shape, X_test.shape

((2021, 3), (506, 3))

In [None]:
shutil.move?

[0;31mSignature:[0m [0mshutil[0m[0;34m.[0m[0mmove[0m[0;34m([0m[0msrc[0m[0;34m,[0m [0mdst[0m[0;34m,[0m [0mcopy_function[0m[0;34m=[0m[0;34m<[0m[0mfunction[0m [0mcopy2[0m [0mat[0m [0;36m0x7f003c9a5510[0m[0;34m>[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Recursively move a file or directory to another location. This is
similar to the Unix "mv" command. Return the file or directory's
destination.

If the destination is a directory or a symlink to a directory, the source
is moved inside the directory. The destination path must not already
exist.

If the destination already exists but is not a directory, it may be
overwritten depending on os.rename() semantics.

If the destination is on our current filesystem, then rename() is used.
Otherwise, src is copied to the destination and then removed. Symlinks are
recreated under the new name if os.rename() fails because of cross
filesystem renames.

The optional `copy_function` argument is a call

In [None]:
def fn_move_files_test_train(row, mode):
    imgName = row['ImgName']
    src = row['ImgPath']
    dst = os.path.join(outDir, dataDir,'images', mode, imgName)
    if os.path.isfile(src):
        shutil.move(src, dst)
        #print ('Image source', src, 'Destination', dst)
    
    
    txtName = f'{os.path.splitext(imgName)[0]}.txt'
    src = row['TxtPath']
    dst = os.path.join(outDir, dataDir,'labels', mode, txtName)
    if os.path.isfile(src):
        shutil.move(src, dst)
        #print ('Label source', src, 'Destination', dst)
    

In [None]:
imgName = data_df.iloc[0]['ImgName']
src = data_df.iloc[0]['ImgPath']
mode = 'train'
dst = os.path.join(outDir, dataDir,'images', mode, imgName)
src, dst

('../output/GarbClass/JPGImages/plastic152.jpg',
 '../output/GarbYolo/images/train/plastic152.jpg')

In [None]:
mode = 'train'
X_train.apply(lambda row: fn_move_files_test_train(row, mode), axis = 1)

mode = 'validation'
X_test.apply(lambda row: fn_move_files_test_train(row, mode), axis = 1)


1502    None
483     None
1590    None
601     None
418     None
        ... 
994     None
2147    None
1745    None
1020    None
2026    None
Length: 506, dtype: object