In [None]:
import numpy as np
import os
import cv2
import random
import pandas as pd
import tensorflow as tf
import tensorflow_addons as tfa
import seaborn as sns
import albumentations as A

from PIL import Image
from matplotlib import pyplot as plt
from sklearn.model_selection import StratifiedKFold
from albumentations.core.composition import Compose, OneOf
from tensorflow.keras.utils import img_to_array, array_to_img

In [None]:
train_meta_path = '../input/sorghum-id-fgvc-9/train_cultivar_mapping.csv'
train_meta = pd.read_csv(train_meta_path)
train_meta

In [None]:
pd.value_counts(train_meta['cultivar'])

In [None]:
plt.figure(figsize=[24, 6], dpi=200)
sns.countplot(x=train_meta['cultivar'])
plt.xticks(rotation=60)
plt.show()

In [None]:
dir = '../input/sorghum-cultivar-identification-512512/train/'
filename = '2017-06-01__10-27-00-467.png'

In [None]:
central_crop_width = (0.35, 0.65, 0.75)
central_crop_height = (0.35, 0.9, 0.75)

for w_factor, h_factor in zip(central_crop_width, central_crop_height):
    h, w = tf.keras.utils.img_to_array(Image.open(os.path.join(dir, filename))).shape[:2]
    print(int(w * w_factor), int(h * h_factor))

In [None]:
def resize_to_smaller_side(img,small_side_to=512, crop_window=(256, 256, 3), copies=3):
    h, w = img.shape[:2]
    crops = []

    if h < w:
        resized = tf.image.resize(img, size=(small_side_to, w))
    elif w < h:
        resized = tf.image.resize(img, size=(h, small_side_to))
    else:
        resized = tf.image.resize(img, (small_side_to, w))

    for _ in range(copies):
        crops.append(tf.image.random_crop(resized, crop_window))

    return crops


img = tf.keras.utils.img_to_array(Image.open(os.path.join(dir,filename)))
crops = resize_to_smaller_side(tf.image.resize(img, size=(512, 512)))

fig, axes = plt.subplots(nrows=1, ncols=3, figsize=[12, 6], dpi=300)

for i, crop in enumerate(crops):
    axes[i].imshow(tf.keras.utils.array_to_img(crop))

plt.show()

In [None]:
def cropping(filename,
             dir,
             central_crop_width=(0.35, 0.65, 0.75),
             central_crop_height=(0.35, 0.9, 0.45)):
    arr = tf.keras.utils.img_to_array(Image.open(os.path.join(dir, filename)))
    crops = []

    if isinstance(central_crop_width, (list, tuple, np.ndarray)):

        for w_factor, h_factor in zip(central_crop_width, central_crop_height):
            h, w = arr.shape[:2]
            offset_h = (h - h_factor * h) // 2
            offset_w = (w - w_factor * w) // 2

            crops.append(
                tf.image.crop_to_bounding_box(arr, int(offset_h), int(offset_w), int(h * h_factor), int(w * w_factor))
            )

    return crops


In [None]:
crops = cropping(filename, 
                 dir,
                 central_crop_width=(0.25, 0.5, 0.75),
                 central_crop_height=(0.25, 0.66, 0.45))

fig, axes = plt.subplots(nrows=1, ncols=3, figsize=[12, 10], dpi=300)
axes = axes.ravel()

for i, crop in enumerate(crops):
    axes[i].imshow(tf.keras.utils.array_to_img(crop))

plt.show()

In [None]:
if not os.path.isdir('train'):
    os.mkdir('train')

In [None]:
c = 0
new_train_meta = []

for filename, label in train_meta.values:
    if filename in os.listdir(dir):
        crops = cropping(filename, dir)

        for i, crop in enumerate(crops):
            dst_file = f'cc-{i}-{filename}'
            tf.keras.utils.array_to_img(tf.image.resize(crop, (256, 256))).save(f'train/{dst_file}')

            new_train_meta.append([dst_file, label])

        c += 1
        print(f'{c}/{len(os.listdir(dir))}', end='\r')

In [None]:
fig, axes = plt.subplots(nrows=6, ncols=6, figsize=[18, 18], dpi=300)
axes = axes.ravel()

for i in range(36):
    axes[i].imshow(Image.open(f'train/{random.choice(os.listdir("train"))}'))

plt.show()

In [None]:
new_train_meta_ = pd.DataFrame(new_train_meta, columns=['image', 'cultivar'])
new_train_meta_

In [None]:
skf = StratifiedKFold(n_splits=4, shuffle=True, random_state=42)

for train_idx, valid_idx in skf.split(new_train_meta_['image'], new_train_meta_['cultivar']):
    df_train = new_train_meta_.iloc[train_idx]
    df_valid = new_train_meta_.iloc[valid_idx]

print(f"train size: {len(df_train)}")
print(f"valid size: {len(df_valid)}")

print(df_train.cultivar.value_counts())
print(df_valid.cultivar.value_counts())

In [None]:
df_valid.to_csv('valid_meta.csv', index=False)

In [None]:
def resize(image, size):
    return tf.image.resize(image, size)


def blur(img, blur_limit):
    return cv2.blur(img, ksize=[blur_limit, blur_limit])


def gaussian_blur(img, blur_limit=(3, 7), sigma_limit=0):
    return cv2.GaussianBlur(img, ksize=blur_limit, sigmaX=sigma_limit)


def motion_blur(img, blur_limit=7):
    kmb = np.zeros((blur_limit, blur_limit))
    kmb[(blur_limit - 1) // 2, :] = np.ones(blur_limit)
    kmb = kmb / blur_limit
    return cv2.filter2D(img, -1, kernel=kmb)


def gaussian_noise(img):
    x = tf.compat.v1.placeholder(dtype=tf.float32, shape=[512, 512, 3])
    noise = tf.random.normal(shape=tf.shape(x), mean=0.0, stddev=1, dtype=tf.float32)
    return tf.add(img, noise)


def iso_noise(img, color_shift=0.05, intensity=0.5):
    one_over_255 = float(1.0 / 255.0)
    image = np.multiply(img, one_over_255, dtype=np.float32)
    hls = cv2.cvtColor(image, cv2.COLOR_RGB2HLS)
    _, stddev = cv2.meanStdDev(hls)

    luminance_noise = np.random.poisson(stddev[1] * intensity * 255, hls.shape[:2])
    color_noise = np.random.normal(0, color_shift * 360 * intensity, hls.shape[:2])

    hue = hls[..., 0]
    hue += color_noise
    hue[hue < 0] += 360
    hue[hue > 360] -= 360

    luminance = hls[..., 1]
    luminance += (luminance_noise / 255) * (1.0 - luminance)

    image = cv2.cvtColor(hls, cv2.COLOR_HLS2RGB) * 255
    return image.astype(np.uint8)


def random_cut_out(images):
    return tfa.image.random_cutout(images, (32, 32), constant_values=0)

In [None]:
def aug_fn(image):
    data = {"image":image}
    aug_data = get_transform(**data)
    aug_img = aug_data["image"]
    aug_img = tf.cast(aug_img/255.0, tf.float32)
    aug_img = tf.image.resize(aug_img, size=[256, 256])
    return aug_img

get_transform = Compose([A.CoarseDropout(max_holes=16, min_holes=8, max_height=16, max_width=16, min_height=8, min_width=8, p=0.2)])

def get_transforms_train(image):
    # get random crop of random crop window size
    crop_side = int(256*random.uniform(0.33, 1))
    temp = tf.image.random_crop(image, size=(crop_side, crop_side, 3)).numpy()
    temp = resize(temp, size=(256, 256)).numpy()

    # random flip (vertically)
    temp = tf.image.random_flip_left_right(temp).numpy()

    if np.random.choice([True, False], p=[0.6, 0.4]):
        # rotate randomly by N*90 deg
        k = random.randint(1, 4)
        temp = tf.image.rot90(temp, k=k).numpy()

    if np.random.choice([True, False], p=[0.45, 0.55]):
        if random.choice([True, False]):
            delta = random.uniform(-0.4, 0.4)
            cf = random.uniform(-1.5, 2.5)
            temp = tf.image.adjust_brightness(temp, delta=delta).numpy()
            temp = tf.image.adjust_contrast(temp, contrast_factor=cf).numpy()
        else:
            gamma = random.uniform(0.33, 2.2)
            temp = tf.image.adjust_gamma(temp, gamma=gamma).numpy()

    if np.random.choice([True, False], p=[0.25, 0.75]):
        delta = random.uniform(-0.2, 0.4)
        temp = tf.image.adjust_hue(temp, delta=delta).numpy()

    if np.random.choice([True, False], p=[0.2, 0.8]):
        sf = random.uniform(-0.2, 0.8)
        temp = tf.image.adjust_saturation(temp, saturation_factor=sf).numpy()

    if np.random.choice([True, False], p=[0.4, 0.6]):
        one_of_blur = random.choice([1, 2, 3])

        if one_of_blur == 1:
            temp = blur(temp, blur_limit=7)
        elif one_of_blur == 2:
            temp = gaussian_blur(temp)
        elif one_of_blur == 3:
            temp = motion_blur(temp)

    if np.random.choice([True, False], p=[0.35, 0.65]):
        temp = iso_noise(temp)

    if np.random.choice([True, False], p=[0.3, 0.7]):
        temp = temp.reshape([1,temp.shape[0], temp.shape[1], 3])
        temp = random_cut_out(temp).numpy()

        return temp[0]

    temp = aug_fn(temp).numpy()

    return temp

In [None]:
array_to_img(tf.convert_to_tensor(get_transforms_train(img_to_array(Image.open(os.path.join(dir,filename)))), dtype=tf.float32)/255.0)

In [None]:
df_train.sample(n=int(df_train.shape[0]*0.65))

In [None]:
c = 0
new_train_meta = []

for filename, label in df_train.sample(n=int(df_train.shape[0]*0.65)).values:
    if filename in os.listdir('train'):
        if random.choice([True, False]):
            image = tf.keras.utils.img_to_array(Image.open(os.path.join('train', filename)))
            
            c += 1
            print(f'{c}/~{int(df_train.shape[0]*0.65)*0.5}', end='\r')
            
            # apply aumentations
            for i in range(10):
                process = get_transforms_train(image=image)
                
                dst_file = f'{i}-{filename}'
                tf.keras.utils.array_to_img(process).save(f'train/{dst_file}')
                new_train_meta.append([dst_file, label])


train_df_1 = pd.DataFrame(new_train_meta, columns=['image', 'cultivar'])
train_df_1

In [None]:
df_train = pd.concat([df_train, train_df_1], ignore_index=True)
df_train

In [None]:
temp = train_df_1['image'].sample(n=36).tolist()
fig, axes = plt.subplots(nrows=6, ncols=6, figsize=[18, 18], dpi=300)
axes = axes.ravel()

for i in range(36):
    axes[i].imshow(Image.open(f'train/{temp[i]}'))

plt.show()

In [None]:
df_train.to_csv('train_meta.csv', index=False)

In [None]:
plt.figure(figsize=[24, 6], dpi=200)
sns.countplot(x=new_train_meta_['cultivar'])
plt.xticks(rotation=60)
plt.show()