In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

In [52]:
!pip install -U git+https://github.com/albumentations-team/albumentations

In [2]:
import albumentations as A
import cv2
from torchvision import transforms,datasets
from torch.utils.data import Dataset, DataLoader
from albumentations.pytorch import ToTensorV2
import random
import copy
import matplotlib.pyplot as plt

In [23]:
import albumentations as A
import cv2

In [None]:
transform = A.Compose([
    A.RandomCrop(width=256, height=256),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
])

image = cv2.imread("../input/pokemon-images-dataset/pokemon/pokemon/1.png")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

plt.imshow(image)
plt.show()

In [7]:
def plot(imgs, with_orig=True, col_title=None, **imshow_kwargs):
    if not isinstance(imgs[0], list):
        # Make a 2d grid even if there's just 1 row
        imgs = [imgs]

    num_rows = len(imgs)
    num_cols = len(imgs[0]) + with_orig
    fig, axs = plt.subplots(nrows=num_rows, ncols=num_cols, squeeze=False)
    for row_idx, row in enumerate(imgs):
        row = [orig_img] + row if with_orig else row
        for col_idx, img in enumerate(row):
            ax = axs[row_idx, col_idx]
            ax.imshow(np.asarray(img), **imshow_kwargs)
            ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])

    if with_orig:
        axs[0, 0].set(title='Original image')
        axs[0, 0].title.set_size(8)
    if col_title is not None:
        for col_idx in range(num_cols-1):
            axs[0, col_idx+1].set(title=col_title[col_idx])
            axs[0, col_idx+1].title.set_size(8)

    plt.tight_layout()

In [74]:
from PIL import Image
import numpy as np

orig_img  = Image.open("../input/pokemon-images-dataset/pokemon/pokemon/1.png")
image = np.array(orig_img )
plt.imshow(image)
plt.axis('off')
plt.show()
print(image.shape)

## Resize and Center Crop

In [19]:
plot([transformed['image']],col_title=["Resize & Center Crop"])

In [44]:
transform_resize = A.Resize(width=64, height=64)

transform_cc = A.Compose([
    A.Resize(width=128, height=128),
    A.CenterCrop(width=32, height=32),
])

transform_rc = A.Compose([
    A.Resize(width=128, height=128),
    A.RandomCrop(width=32, height=32),
])

transformed_res = transform_resize(image=image)
transformed_cc = transform_cc(image=image)
transformed_rc = transform_rc(image=image)
plot([transformed_res['image'],transformed_cc['image'],transformed_rc['image']],col_title=["Resize 64x64","Resize & Center Crop","Resize & Random Crop"])
#plt.imshow(transformed['image'])
#plt.show()

In [84]:
transform_grid= A.GridDropout(p=1.0)
transformed_grid = transform_grid(image=image)
transform_grid2= A.GridDropout(p=1.0,holes_number_x=3,holes_number_y=4)
transformed_grid2 = transform_grid_rand(image=image)

plot([transformed_grid['image'],transformed_grid2['image']],col_title=["Grid dropout","Less blocks"])

In [72]:
image = cv2.imread("../input/pokemon-images-dataset/pokemon/pokemon/1.png")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
transform_grid= A.GridDropout(random_offset=True,)
transformed_grid = transform_grid(image=image)

plot([transformed_res['image']],col_title=["grid distortion"])

## Gaussian Blur

In [91]:
transform_blur = A.Blur(p=1.0)
transform_mblur = A.MedianBlur(p=1.0)
transform_gblur = A.GaussianBlur(sigma_limit=9, p=1.0)

transformed_blur = transform_blur(image=image)
transformed_gblur = transform_gblur(image=image)
transformed_mblur = transform_mblur(image=image)
plot([transformed_blur['image'],transformed_gblur['image'],transformed_mblur['image']],col_title=["blur","gaussian blur","median blur"])

## DownScale
Decreases image quality by downscaling and upscaling back.

In [None]:
scale_min=0.25
scale_max=0.25
interpolation=0

transform = Downscale(scale_min, scale_max, interpolation=0, p=1.0)

transformed = transform(image=image)
plt.imshow(transformed['image'])
plt.show()

## Gaussian Noise

Blur the input image using using a Gaussian filter with a random kernel size

In [102]:
transform = A.GaussNoise(var_limit=350.0, p=1.0)

transformed_gnoise = transform(image=image)
plot([transformed_gnoise['image']],col_title=["Gaussian Noise"])

## Pytorch integration

In [31]:
from torchvision import transforms,datasets
from torch.utils.data import Dataset, DataLoader
from albumentations.pytorch import ToTensorV2
import os
import cv2
import random
import torchvision

dataset_directory = "../input/pokemon-images-dataset/pokemon/pokemon"
pokemon_filepaths = sorted([os.path.join(dataset_directory, f) for f in os.listdir(dataset_directory)])
correct_images_filepaths = [i for i in pokemon_filepaths if cv2.imread(i) is not None]

random.seed(42)
random.shuffle(correct_images_filepaths)
n = len(correct_images_filepaths)
n_train = int(n*0.8)
train_images_filepaths = correct_images_filepaths[:n_train]
test_images_filepaths = correct_images_filepaths[n_train:]
print(len(train_images_filepaths), len(test_images_filepaths))

In [32]:
class PokemonDataset(Dataset):
    def __init__(self, images_filepaths, transform=None):
        self.images_filepaths = images_filepaths
        self.transform = transform

    def __len__(self):
        return len(self.images_filepaths)

    def __getitem__(self, idx):
        image_filepath = self.images_filepaths[idx]
        image = cv2.imread(image_filepath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform is not None:
            image = self.transform(image=image)["image"]
        return image

In [33]:
train_transform = A.Compose(
    [
        A.Resize(height=128, width=128),
        A.Rotate(),
        A.GaussianBlur(sigma_limit=9, p=0.5),
        A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
        ToTensorV2(),
    ]
)

test_transform = A.Compose(
    [
        A.Resize(height=128, width=128),
        A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
        ToTensorV2(),
    ]
)

train_dataset = PokemonDataset(images_filepaths=train_images_filepaths, transform=train_transform)
test_dataset = PokemonDataset(images_filepaths=test_images_filepaths, transform=test_transform)
train_loader = DataLoader(dataset=train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=16, shuffle=False)

In [42]:
def show_img(img):
    plt.figure(figsize=(20,16))
    img = img * 0.5 + 0.5  
    npimg = np.clip(img.numpy(), 0., 1.)
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

In [40]:
import matplotlib.pyplot as plt
import numpy as np

In [43]:
def show_img(img):
    plt.figure(figsize=(20,16))
    img = img * 0.5 + 0.5  
    npimg = np.clip(img.numpy(), 0., 1.)
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

data = iter(train_loader)
images = data.next()
show_img(torchvision.utils.make_grid(images))