# Preprocessing

## Loading Data

In [1]:
import os
from PIL import Image
import cv2

c:\Users\foxin\AppData\Local\Programs\Python\Python310\lib\site-packages\numpy\.libs\libopenblas.XWYDX2IKJW2NMTWSFYNGFUWKQU3LYTCZ.gfortran-win_amd64.dll
c:\Users\foxin\AppData\Local\Programs\Python\Python310\lib\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll
c:\Users\foxin\AppData\Local\Programs\Python\Python310\lib\site-packages\numpy\.libs\libopenblas64__v0.3.23-gcc_10_3_0.dll


In [4]:
import numpy as np

In [9]:
import matplotlib.pyplot as plt

In [17]:
def getRawData(openImage = False):
    for className in os.listdir("raw data"):
        for file in os.listdir(os.path.join("raw data", className)):
            if openImage:
                yield (np.array(Image.open(os.path.join("raw data", className, file))), className, file, os.path.join("raw data", className, file))
            else:
                yield (className, file, os.path.join("raw data", className, file))


## Train Test Splitting

In [19]:
rawData = list(getRawData())

In [21]:
rawDataDict = {'bikes':[], 'cars':[], 'cycles':[], 'scooters':[], 'trucks':[]}

for x,y,z in rawData:
    rawDataDict[x].append(z)

In [26]:
from random import shuffle

In [34]:
partitionedData = {
    'train':{
        'bikes':[],
        'cars':[],
        'cycles':[],
        'scooters':[],
        'trucks':[],
    },
    'valid':{
        'bikes':[],
        'cars':[],
        'cycles':[],
        'scooters':[],
        'trucks':[],
    },
    'test':{
        'bikes':[],
        'cars':[],
        'cycles':[],
        'scooters':[],
        'trucks':[],
    },
}

for key in rawDataDict:
    shuffle(rawDataDict[key])
    total = len(rawDataDict[key])
    trainLen = round( total*0.75 )
    vaildLen = round( total*0.15 )
    
    train = rawDataDict[key][:trainLen]
    valid = rawDataDict[key][trainLen : trainLen+vaildLen]
    test = rawDataDict[key][trainLen+vaildLen:]

    partitionedData['train'][key] = train
    partitionedData['valid'][key] = valid
    partitionedData['test'][key] = test


In [36]:
import shutil

In [38]:
for partition in partitionedData:
    for className in partitionedData[partition]:
        for file in partitionedData[partition][className]:
            fileName = file.split('\\')[-1]
            destination = os.path.join("partitioned data", partition, className, fileName)
            shutil.copy(file, destination)

## Augmenting Data

In [39]:
import albumentations as A

In [43]:
import uuid

In [41]:
from typing import Union

In [40]:
transform = A.Compose([
    A.HorizontalFlip(),
    A.RandomBrightnessContrast(),
    A.RandomRotate90(),
    A.Flip(),
    A.RGBShift(),
    A.HueSaturationValue(),
    A.CLAHE(),
    A.RandomGamma(),
    A.Blur(),
    A.ToGray(),
    A.ImageCompression(),
])
# listing transformations 

In [44]:
def augment(imageList:list[str], partition:str, className:str, thread:int):
    for i, image in enumerate(imageList):
        img = np.array(Image.open(image))
        for _ in range(15):
            try:
                augmentedImg = transform(image=img)['image']
                augmentedImg = cv2.resize(augmentedImg, (256, 256))

                cv2.imwrite(os.path.join('augmented data', partition, className, str(uuid.uuid1()) + '.jpg'), augmentedImg)
                # augmenting image 
                
            except Exception as e:
                print(f'[ERROR] Couldnt augment image {image} [EXCEPTION] {e}')
        cv2.imwrite(os.path.join('augmented data', partition, className, str(uuid.uuid1()) + '.jpg'), cv2.resize(img, (256, 256)))
        print(f"[THREAD {thread}] {round( (i/len(imageList))*100, 2)}% complete", end='\r')


In [77]:
def split(a, n):
    """
    splits a list into n parts
    """
    k, m = divmod(len(a), n)
    return (list(a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n)))

In [87]:
batches = []

for partition in os.listdir("partitioned data"):
    for className in os.listdir(os.path.join("partitioned data", partition)):
        files = list(map(lambda x: os.path.join("partitioned data", partition, className, x), os.listdir(os.path.join("partitioned data", partition, className))))
        batchedFiles = list(map(lambda x: (partition, className, x), split(files, 10)))
        batches += batchedFiles

In [88]:
len(batches)

150

In [86]:
from threading import  Thread

In [90]:
# threads = []

# for i, (partition, className, batch )in enumerate(batches):
#     threads.append(Thread(target=augment, args=(batch, partition, className, i)))

# for thread in threads:
#     thread.start()

# commenting out to avoid running accidently 

[ERROR] Couldnt augment image partitioned data\train\bikes\images19.jpg [EXCEPTION] RGBShift transformation expects 3-channel images.
[ERROR] Couldnt augment image partitioned data\train\bikes\images19.jpg [EXCEPTION] RGBShift transformation expects 3-channel images.
[ERROR] Couldnt augment image partitioned data\train\bikes\images19.jpg [EXCEPTION] RGBShift transformation expects 3-channel images.
[ERROR] Couldnt augment image partitioned data\train\bikes\images19.jpg [EXCEPTION] RGBShift transformation expects 3-channel images.
[ERROR] Couldnt augment image partitioned data\train\bikes\images19.jpg [EXCEPTION] RGBShift transformation expects 3-channel images.
[ERROR] Couldnt augment image partitioned data\train\bikes\images19.jpg [EXCEPTION] RGBShift transformation expects 3-channel images.
[ERROR] Couldnt augment image partitioned data\train\bikes\images19.jpg [EXCEPTION] RGBShift transformation expects 3-channel images.
[ERROR] Couldnt augment image partitioned data\train\bikes\ima

  warn(


[ERROR] Couldnt augment image partitioned data\test\scooters\web-white-xpro-min-1024x945.png [EXCEPTION] RGBShift transformation expects 3-channel images.
[ERROR] Couldnt augment image partitioned data\test\scooters\web-white-xpro-min-1024x945.png [EXCEPTION] RGBShift transformation expects 3-channel images.
[ERROR] Couldnt augment image partitioned data\test\scooters\web-white-xpro-min-1024x945.png [EXCEPTION] RGBShift transformation expects 3-channel images.
[ERROR] Couldnt augment image partitioned data\test\scooters\web-white-xpro-min-1024x945.png [EXCEPTION] RGBShift transformation expects 3-channel images.
[ERROR] Couldnt augment image partitioned data\test\scooters\web-white-xpro-min-1024x945.png [EXCEPTION] RGBShift transformation expects 3-channel images.
[ERROR] Couldnt augment image partitioned data\test\scooters\web-white-xpro-min-1024x945.png [EXCEPTION] RGBShift transformation expects 3-channel images.
[ERROR] Couldnt augment image partitioned data\test\scooters\web-white

[ERROR] Couldnt augment image partitioned data\valid\bikes\96618092.jpg [EXCEPTION] RGBShift transformation expects 3-channel images.
[ERROR] Couldnt augment image partitioned data\valid\bikes\96618092.jpg [EXCEPTION] RGBShift transformation expects 3-channel images.
[ERROR] Couldnt augment image partitioned data\valid\bikes\96618092.jpg [EXCEPTION] RGBShift transformation expects 3-channel images.
[ERROR] Couldnt augment image partitioned data\train\scooters\new-blue-min-1024x773.png [EXCEPTION] RGBShift transformation expects 3-channel images.
[ERROR] Couldnt augment image partitioned data\train\scooters\new-blue-min-1024x773.png [EXCEPTION] RGBShift transformation expects 3-channel images.
[ERROR] Couldnt augment image partitioned data\valid\bikes\96618092.jpg [EXCEPTION] ToGray transformation expects 3-channel images.
[ERROR] Couldnt augment image partitioned data\train\scooters\new-blue-min-1024x773.png [EXCEPTION] RGBShift transformation expects 3-channel images.
[ERROR] Couldnt 