In [42]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from scipy import ndimage
import Augmentor as Aug
import matplotlib.pyplot as plt
from random import shuffle

In [55]:
class DefaultConfigs(object):
    root_path = "/media/trinhnh1/3A08638408633DCF/kaggle/human-protein/input"
    train_data = root_path + "/train_all_new/" # where is your train data
    test_data = root_path + "/test_jpg/"   # your test data
    sample_submission = root_path + "/sample_submission.csv"
    train_kaggle_csv = root_path + "/train.csv"
    train_external_csv = root_path + "/external_data/img/train.csv"
    weights = "./checkpoints/"
    best_models = "./checkpoints/best_models/"
    submit = "./submit/"
    model_name = "seresnet50_bcelog"
    seed = 2050
    num_classes = 28
    img_weight = 512
    img_height = 512
    channels = 4
    lr = 0.0001
    weight_decay = 0.000000001
    batch_size = 18
    epochs = 30
    thresold = 0.2
    n_tta = 5
    n_fold = 5
    
    # input image path for upsample
    raw_img_path = root_path + "/train_jpg/"
    ext_img_path = root_path + "/external_data/img/train_gray/"
    
    # upsampling csv
    ups_raw_csv = root_path + "/raw_minority.csv"
    ups_ext_csv = root_path + "/ext_minority.csv"
    
    # save new augmeted image path and csv path
    aug_raw_minority = root_path + "/aug_raw_minority/"
    aug_raw_csv = root_path + "/train_aug_raw.csv"
    aug_ext_minority = root_path + "/external_data/img/aug_ext_minority/"
    aug_ext_csv = root_path + "/train_ext_raw.csv"

config = DefaultConfigs()

In [54]:
# upsamping kaggle raw data
df = pd.read_csv(config.ups_raw_csv)

colors = ['red', 'green', 'blue', 'yellow']
aug_labels = ['flipv', 'fliph', 'rot15', 'rot30', 'rot45']

aug_raw_minority_csv = []

for i in range(len(df)):
    img_id = df1.iloc[i].Id
    img_target = df.iloc[i].Target
    
    for aug in aug_labels:
        aug_img_id = aug + '_' + img_id
        # 4 channels
        for color in colors:
            img_path = config.raw_img_path + img_id + '_' + color + '.jpg'
            img = Image.open(img_path)
        
            # augmente image
            aug_img_path = config.aug_raw_minority + aug_img_id + '_' + color + '.jpg'
            if aug == 'flipv': # flip vertical
                flipv = np.fliplr(img)
                imfsv = Image.fromarray(flipv)
                imfsv.save(aug_img_path)

            elif aug == 'fliph':# flip horizontal
                flipv = np.fliplr(img)
                imfsv = Image.fromarray(flipv) 
                imfsv.save(aug_img_path)

            elif aug == 'rot15': # rotate 15 degree
                rotate_angle = ndimage.rotate(img, 15)
                imfsv = Image.fromarray(rotate_angle ) 
                imfsv.save(aug_img_path)

            elif aug == 'rot30': # rotate 30 degree
                rotate_angle = ndimage.rotate(img, 30)
                imfsv = Image.fromarray(rotate_angle ) 
                imfsv.save(aug_img_path)

            elif aug == 'rot45': # rotate 45 degree
                rotate_angle = ndimage.rotate(img, 45)
                imfsv = Image.fromarray(rotate_angle ) 
                imfsv.save(aug_img_path)
            else:
                print('please check image id: {}'.format(img_id))
                continue
    
        # create information for augmented image
        aug_img_info = [aug_img_id, img_target] 
        aug_raw_minority_csv.append(aug_img_info) 
shuffle(aug_raw_minority_csv)

# write upsampling data [Id, Target] to file csv
aug_df = pd.DataFrame(data=aug_raw_minority_csv, columns=['Id','Target'])
print(aug_df.shape)
aug_df.to_csv(config.aug_raw_csv, index=False)

(4170, 2)


In [63]:
# upsamping kaggle raw data
df = pd.read_csv(config.ups_ext_csv)

colors = ['red', 'green', 'blue', 'yellow']
aug_labels = ['flipv', 'fliph', 'rot15', 'rot30', 'rot45']

aug_ext_minority_csv = []

for i in range(len(df)):
    img_id = df.iloc[i].Id
    img_target = df.iloc[i].Target
    
    for aug in aug_labels:
        aug_img_id = aug + '_' + img_id
        # 4 channels
        for color in colors:
            img_path = config.ext_img_path + img_id + '_' + color + '.jpg'
            img = Image.open(img_path)
        
            # augmente image
            aug_img_path = config.aug_ext_minority + aug_img_id + '_' + color + '.jpg'
            if aug == 'flipv': # flip vertical
                flipv = np.fliplr(img)
                imfsv = Image.fromarray(flipv)
                imfsv.save(aug_img_path)

            elif aug == 'fliph':# flip horizontal
                flipv = np.fliplr(img)
                imfsv = Image.fromarray(flipv) 
                imfsv.save(aug_img_path)

            elif aug == 'rot15': # rotate 15 degree
                rotate_angle = ndimage.rotate(img, 15)
                imfsv = Image.fromarray(rotate_angle ) 
                imfsv.save(aug_img_path)

            elif aug == 'rot30': # rotate 30 degree
                rotate_angle = ndimage.rotate(img, 30)
                imfsv = Image.fromarray(rotate_angle ) 
                imfsv.save(aug_img_path)

            elif aug == 'rot45': # rotate 45 degree
                rotate_angle = ndimage.rotate(img, 45)
                imfsv = Image.fromarray(rotate_angle ) 
                imfsv.save(aug_img_path)
            else:
                print('please check image id: {}'.format(img_id))
                continue
    
        # create information for augmented image
        aug_img_info = [aug_img_id, img_target] 
        aug_ext_minority_csv.append(aug_img_info) 
shuffle(aug_ext_minority_csv)

# write upsampling data [Id, Target] to file csv
aug_df = pd.DataFrame(data=aug_ext_minority_csv, columns=['Id','Target'])
print(aug_df.shape)
aug_df.to_csv(config.aug_ext_csv, index=False)

(5560, 2)
