In [2]:
import numpy as np
import pandas as pd

from PIL import Image, ImageEnhance
import torchvision.transforms as transforms

import os
from tqdm import tqdm 

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# the folder from 256_ObjectCategories.tar file
train_dir = '/home/atik/Documents/Summer_1/data/256_ObjectCategories/'

# a folder where resized and split data will be stored
data_dir = '/home/atik/Documents/Summer_1/data/'

# Load constant train-val split

In [4]:
T = pd.read_csv('/home/atik/Documents/Summer_1/train_val_split/train_metadata_no_clutter.csv')
V = pd.read_csv('/home/atik/Documents/Summer_1/train_val_split/val_metadata_no_clutter.csv')
print(T.head())
print("*"*15)
print(V.head())

   Unnamed: 0           directory      img_name  height  width  channels  \
0           0  175.roulette-wheel  175_0038.jpg     167    248         3   
1           1  175.roulette-wheel  175_0010.jpg     251    500         3   
2           2  175.roulette-wheel  175_0022.jpg     175    188         3   
3           3  175.roulette-wheel  175_0066.jpg     384    512         3   
4           4  175.roulette-wheel  175_0011.jpg     253    364         3   

   byte_size bit_depth   category_name img_extension  category_number  
0     124248     uint8  roulette-wheel           jpg              175  
1     376500     uint8  roulette-wheel           jpg              175  
2      98700     uint8  roulette-wheel           jpg              175  
3     589824     uint8  roulette-wheel           jpg              175  
4     276276     uint8  roulette-wheel           jpg              175  
***************
   Unnamed: 0           directory      img_name  height  width  channels  \
0           0  175.

# Create directories for different categories

In [5]:
try:
    os.mkdir(data_dir + 'train')
    for i in range(1, 256 + 1):
        os.mkdir(data_dir + 'train/' + str(i))
except:
    pass

In [6]:
try:
    os.mkdir(data_dir + 'val')
    for i in range(1, 256 + 1):
        os.mkdir(data_dir + 'val/' + str(i))   
except:
    pass

# Resize val. images

In [7]:
val_transform = transforms.Compose([
    transforms.Resize(299, Image.LANCZOS),
    transforms.CenterCrop(299)
])

  transforms.Resize(299, Image.LANCZOS),


In [8]:
val_size = len(V)
val_size

5120

In [9]:
# resize RGB images
for i, row in tqdm(V.loc[V.channels == 3].iterrows()):
    # get image
    file_path = os.path.join(train_dir, row.directory, row.img_name)
    image = Image.open(file_path)
    
    # transform it
    image = val_transform(image)
    
    # save
    save_path = os.path.join(data_dir, 'val', str(row.category_number), row.img_name)
    image.save(save_path, 'jpeg')

5041it [00:18, 271.15it/s]


In [10]:
# resize grayscale images
for i, row in tqdm(V.loc[V.channels == 1].iterrows()):
    # get image
    file_path = os.path.join(train_dir, row.directory, row.img_name)
    image = Image.open(file_path)
    
    # transform it
    image = val_transform(image)
    
    # convert to RGB
    array = np.asarray(image, dtype='uint8')
    array = np.stack([array, array, array], axis=2)
    image = Image.fromarray(array)
    
    # save
    save_path = os.path.join(data_dir, 'val', str(row.category_number), row.img_name)
    image.save(save_path, 'jpeg')

79it [00:00, 378.39it/s]


# Resize train images

In [11]:
enhancers = {
    0: lambda image, f: ImageEnhance.Color(image).enhance(f),
    1: lambda image, f: ImageEnhance.Contrast(image).enhance(f),
    2: lambda image, f: ImageEnhance.Brightness(image).enhance(f),
    3: lambda image, f: ImageEnhance.Sharpness(image).enhance(f)
}

factors = {
    0: lambda: np.random.uniform(0.4, 1.6),
    1: lambda: np.random.uniform(0.8, 1.2),
    2: lambda: np.random.uniform(0.8, 1.2),
    3: lambda: np.random.uniform(0.4, 1.6)
}

# randomly enhance images in random order
def enhance(image):
    order = [0, 1, 2, 3]
    np.random.shuffle(order)
    for i in order:
        f = factors[i]()
        image = enhancers[i](image, f)
    return image

In [12]:
train_transform_rare = transforms.Compose([
    transforms.Resize(384, Image.LANCZOS),
    transforms.RandomCrop(299),
    transforms.RandomHorizontalFlip(),
    transforms.Lambda(enhance)
])

train_transform = transforms.Compose([
    transforms.Resize(384, Image.LANCZOS),
    transforms.RandomCrop(299),
    transforms.RandomHorizontalFlip(),
])

  transforms.Resize(384, Image.LANCZOS),
  transforms.Resize(384, Image.LANCZOS),


In [13]:
# number of images in each category
class_counts = dict(T.category_name.value_counts())
np.save('class_counts.npy', class_counts)

In [14]:
# sample with replacement 100 images from each category
T = T.groupby('category_name', group_keys=False).apply(lambda x: x.sample(n=100, replace=True))
T.reset_index(drop=True, inplace=True)

In [15]:
train_size = len(T)
train_size

25600

In [16]:
# resize RGB images
for i, row in tqdm(T.loc[T.channels == 3].iterrows()):
    # get image
    file_path = os.path.join(train_dir, row.directory, row.img_name)
    image = Image.open(file_path)
    
    # transform it
    if class_counts[row.category_name] < 100:
        image = train_transform_rare(image)
    else:
        image = train_transform(image)
    
    # save
    new_image_name = str(i) + '_' + row.img_name
    save_path = os.path.join(data_dir, 'train', str(row.category_number), new_image_name)
    image.save(save_path, 'jpeg')

25204it [02:54, 144.25it/s]


In [17]:
# resize grayscale images
for i, row in tqdm(T.loc[T.channels == 1].iterrows()):
    # get image
    file_path = os.path.join(train_dir, row.directory, row.img_name)
    image = Image.open(file_path)
    
    # transform it
    if class_counts[row.category_name] < 100:
        image = train_transform_rare(image)
    else:
        image = train_transform(image)
    
    # convert to RGB
    array = np.asarray(image, dtype='uint8')
    array = np.stack([array, array, array], axis=2)
    image = Image.fromarray(array)
    
    # save
    new_image_name = str(i) + '_' + row.img_name
    save_path = os.path.join(data_dir, 'train', str(row.category_number), new_image_name)
    image.save(save_path, 'jpeg')

396it [00:01, 239.87it/s]
