In [11]:
from imgaug import augmenters as iaa
import matplotlib.pyplot as plt
from tqdm import tqdm
import imgaug as ia
from cv2 import cv2
import numpy as np
import pandas as pd
import glob
import os

In [12]:
DATASET_TRAIN_PATH = r"D:/Lucru/github-folder/itec-2022/dataset/train/samples/"
DATASET_TEST_PATH = r"D:/Lucru/github-folder/itec-2022/dataset/test/samples/"

In [13]:
def load_images_from_folder(folder, start = 0, end = 3195):
    images = []
    files = list([pth for pth in os.listdir(folder)])
    files.sort(key=lambda f: int(''.join(filter(str.isdigit, f))))
    for filename in tqdm(files):
        img = cv2.imread(os.path.join(folder,filename))
        if img is not None:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            images.append(img)
    return np.array(images)

In [14]:
def save_images_in_folder(folder, images, start_index = 0):
    for i, img in enumerate(images):
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        cv2.imwrite(os.path.join(folder, str(start_index + i) + '.png'), img)

In [15]:
def augment_images(folder, augmenter, images, start_index = 0, iterations = 10):
    n = len(images)
    for i in tqdm(range(iterations)):
        augmented_images=augmenter(images=images)
        save_images_in_folder(folder=folder, images=augmented_images, start_index = i*n)

In [16]:
ia.seed(1)

seq = iaa.Sequential([
    #iaa.imgcorruptlike.ShotNoise(severity=2),
    #iaa.SaltAndPepper(0.1, per_channel=True),
    iaa.Sometimes(0.5, iaa.AddElementwise((-40, 80), per_channel=True)),
    iaa.Sometimes(0.5, iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=True))
    ], random_order=True) # apply augmenters in random order

## Augmenting train dataset

In [17]:
train_imgs = load_images_from_folder(DATASET_TRAIN_PATH)

100%|████████████████████████████████████████████████████████████████████████████| 3204/3204 [00:00<00:00, 5037.55it/s]


In [18]:
dir_aug = r'D:/Lucru/github-folder/itec-2022/dataset-aug/train/samples/'
augment_images(folder = dir_aug, augmenter = seq, images=train_imgs, start_index = 0, iterations = 3)

100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:21<00:00,  7.30s/it]


In [19]:
train_df = pd.read_csv(r"D:/Lucru/github-folder/itec-2022/dataset/train/labels.csv")
train_df_ = pd.DataFrame()
train_df_ = pd.concat([train_df_, train_df, train_df, train_df], ignore_index = True)

In [20]:
image_ids = []
for idx in train_df_.index:
    image_ids.append(f'{idx}.png')

In [21]:
train_df_ = train_df_.assign(image_id = image_ids)
train_df_.drop(["Unnamed: 0"], axis=1, inplace=True)

In [22]:
train_df_.to_csv(r'D:/Lucru/github-folder/itec-2022/dataset-aug/train/labels.csv')

## Augmenting validation dataset

In [23]:
test_imgs = load_images_from_folder(DATASET_TEST_PATH)

100%|██████████████████████████████████████████████████████████████████████████████| 801/801 [00:00<00:00, 4975.14it/s]


In [24]:
dir_aug = r'D:/Lucru/github-folder/itec-2022/dataset-aug/test/samples/'
augment_images(folder = dir_aug, augmenter = seq, images=test_imgs, start_index = 0, iterations = 3)

100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:05<00:00,  1.82s/it]


In [25]:
test_df = pd.read_csv(r"D:/Lucru/github-folder/itec-2022/dataset/test/labels.csv")
test_df_ = pd.DataFrame()
test_df_ = pd.concat([test_df_, test_df, test_df, test_df], ignore_index = True)

In [26]:
image_ids = []
for idx in test_df_.index:
    image_ids.append(f'{idx}.png')

In [27]:
test_df_ = test_df_.assign(image_id = image_ids)
test_df_.drop(["Unnamed: 0"], axis=1, inplace=True)

In [28]:
test_df_.to_csv(r'D:/Lucru/github-folder/itec-2022/dataset-aug/test/labels.csv')

In [29]:
test_df_

Unnamed: 0,image_id,shape,color,area
0,0.png,2.0,0.0,0.136
1,1.png,2.0,1.0,0.181
2,2.png,2.0,2.0,0.133
3,3.png,1.0,0.0,0.105
4,4.png,1.0,1.0,0.230
...,...,...,...,...
2398,2398.png,1.0,1.0,0.065
2399,2399.png,1.0,2.0,0.077
2400,2400.png,0.0,0.0,0.139
2401,2401.png,0.0,1.0,0.176


In [43]:
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(handle_unknown='ignore')

In [51]:
shape = test_df_.drop(["image_id", "color", "area"], axis=1)

In [52]:
one_hot = pd.get_dummies(shape['shape'])

In [53]:
one_hot.iloc[0]

0.0    0
1.0    0
2.0    1
Name: 0, dtype: uint8

In [2]:
test = pd.read_csv(r"D:/Lucru/github-folder/itec-2022/dataset-aug/train/labels.csv")

In [3]:
test

Unnamed: 0.1,Unnamed: 0,image_id,shape,color,area
0,0,0.png,2.0,0.0,0.157
1,1,1.png,2.0,1.0,0.289
2,2,2.png,2.0,2.0,0.214
3,3,3.png,1.0,0.0,0.098
4,4,4.png,1.0,1.0,0.086
...,...,...,...,...,...
9607,9607,9607.png,1.0,1.0,0.131
9608,9608,9608.png,1.0,2.0,0.076
9609,9609,9609.png,0.0,0.0,0.110
9610,9610,9610.png,0.0,1.0,0.114


In [4]:
test.drop(["Unnamed: 0"], axis=1, inplace=True)

In [8]:
test.head()

Unnamed: 0,image_id,shape,color,area
0,0.png,2.0,0.0,0.157
1,1.png,2.0,1.0,0.289
2,2.png,2.0,2.0,0.214
3,3.png,1.0,0.0,0.098
4,4.png,1.0,1.0,0.086


In [10]:
test.rename(columns=test.iloc[0]).drop(test.index[0])

Unnamed: 0,0.png,2.0,0.0,0.157
1,1.png,2.0,1.0,0.289
2,2.png,2.0,2.0,0.214
3,3.png,1.0,0.0,0.098
4,4.png,1.0,1.0,0.086
5,5.png,1.0,2.0,0.290
...,...,...,...,...
9607,9607.png,1.0,1.0,0.131
9608,9608.png,1.0,2.0,0.076
9609,9609.png,0.0,0.0,0.110
9610,9610.png,0.0,1.0,0.114


In [1]:
r = (255,255,255)
type(r)

tuple