In [1]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.metrics import *
from skimage import exposure
import sys
sys.path.append('../')
from utils.dehaze import dehaze
from utils.lee_filter import lee_filter
from datetime import datetime
from sklearn.model_selection import StratifiedKFold

%matplotlib inline
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import os
import ast
import datetime as dt
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [16, 10]
plt.rcParams['font.size'] = 14
import cv2
import pandas as pd
import json
from tqdm import tqdm_notebook as tqdm

from imgaug import augmenters as iaa

root_dir = '../'
feats_dir = os.path.join(root_dir,'feats')
stage1_feats_dir = os.path.join(feats_dir,'stage1')

%load_ext autoreload
%autoreload 2

In [2]:
### to change according to your machine
data_dir = os.path.join(root_dir,'dataset')

path_test_a = os.path.join(data_dir,'round1_test_a_20181109.h5')
path_test_b = os.path.join(data_dir,'round1_test_b_20190104.h5')
path_validation = os.path.join(data_dir,'validation.h5')
path_training = os.path.join(data_dir,'training.h5')

fid_test_a = h5py.File(path_test_a,'r')
fid_test_b = h5py.File(path_test_b,'r')
fid_validation = h5py.File(path_validation,'r')
fid_training = h5py.File(path_training,'r')

print("-" * 60)
print("test a part")
s1_test_a = fid_test_a['sen1']
print(s1_test_a.shape)
s2_test_a = fid_test_a['sen2']
print(s2_test_a.shape)
print("-" * 60)
print("test b part")
s1_test_b = fid_test_b['sen1']
print(s1_test_b.shape)
s2_test_b = fid_test_b['sen2']
print(s2_test_b.shape)
print("-" * 60)
print("training part")
s1_training = fid_training['sen1']
print(s1_training.shape)
s2_training = fid_training['sen2']
print(s2_training.shape)
label_training = fid_training['label']
print(label_training.shape)
print("-" * 60)
print("validation part")
s1_validation = fid_validation['sen1']
print(s1_validation.shape)
s2_validation = fid_validation['sen2']
print(s2_validation.shape)
label_validation = fid_validation['label']
print(label_validation.shape)

------------------------------------------------------------
test a part
(4838, 32, 32, 8)
(4838, 32, 32, 10)
------------------------------------------------------------
test b part
(4835, 32, 32, 8)
(4835, 32, 32, 10)
------------------------------------------------------------
training part
(352366, 32, 32, 8)
(352366, 32, 32, 10)
(352366, 17)
------------------------------------------------------------
validation part
(24119, 32, 32, 8)
(24119, 32, 32, 10)
(24119, 17)


In [3]:
per_class_num = 5000 #3000
# valid_label_id = np.argmax(label_validation,axis=-1)
valid_label_id_df = pd.DataFrame(np.argmax(label_validation,axis=-1)).rename(index=str,columns={0:'label'})
train_label_id_df = pd.DataFrame(np.argmax(label_training,axis=-1)).rename(index=str,columns={0:'label'})
valid_label_val_cnt = np.sum(label_validation,axis=0)
train_label_val_cnt = np.sum(label_training,axis=0)
print(valid_label_val_cnt)

[ 256. 1254. 2353.  849.  757. 1906.  474. 3395. 1914.  860. 2287.  382.
 1202. 2747.  202.  672. 2609.]


In [4]:
# img transform

def imgs_transform(imgs):
    t_imgs = np.zeros_like(imgs)
    for i in tqdm(range(imgs.shape[0])):
        t_imgs[i,:,:,8:] = imgs[i,:,:,8:]/2.8
        t_imgs[i,:,:,(10,9,8)] = exposure.rescale_intensity(dehaze(t_imgs[i,:,:,(10,9,8)].transpose(1,2,0))).transpose(2,0,1)
        for j in range(4):
            t_imgs[i,:,:,j] = lee_filter(imgs[i,:,:,j])
        for j in range(4,8):
            t_imgs[i,:,:,j] = imgs[i,:,:,j]
    return t_imgs

merged_imgs_v0 = imgs_transform(merged_imgs)
test_imgs = np.concatenate([s1_test,s2_test],axis=-1)
test_imgs_v0 = imgs_transform(test_imgs)

In [None]:
"""
merged_training_imgs:[18c]
(a)train+valid
(b)s1+s2
(c)
    [0-3]:s1[0-3]:lee filter
    [4-7]:s1[4-7]:lee filter
    [9-10]:s2[0-2]:dehaze+rescale_intensity
    [11-17]:s2[3-9]:dehaze+rescale_intensity
"""
data_save_dir = os.path.join(root_dir,'dataset')
np.save(os.path.join(data_save_dir,'merged_training_imgs'),merged_imgs_v0)
np.save(os.path.join(data_save_dir,'merged_training_labels'),merged_labels)
np.save(os.path.join(data_save_dir,'test_imgs'),test_imgs_v0)

In [5]:
"""
merged_training_imgs_v1:[17c]
(a)train+valid
(b)s1+s2
(c)
    [0]:s1[0,1]:lee filter + square_sum_sqrt_merged + omn_exp
    [1]:s1[2,3]:lee filter + square_sum_sqrt_merged + omn_exp
    [2-3]:s1[4,5]:log10 + norm[0-1]
    [4-5]:s1[6,7]:sqrt + norm[0-1]
    [6]:s1[6,7]:square_sum_sqrt_log10_merged + norm[0-1]
    [7-9]:s2[0-2]:dehaze + rescale_intensity + norm[0-1]
    [10-16]:s2[3-9]:norm[0-1]
"""

def get_min_max(data):
    min_max = []
    for i in range(data.shape[-1]):
        min_max.append((np.min(data[:,:,:,i]),np.max(data[:,:,:,i])))
    return min_max

def norm_01(data,min_max):
    min_val,max_val = min_max
#     min_val = np.min(data)
#     max_val = np.max(data)
    return (data - min_val)/(max_val-min_val)

def omn_exp(data):
    return 1-np.exp(np.negative(data))

def imgs_v1_transform(imgs):
    imgs_v1 = np.zeros((*imgs.shape[:-1],17))
    
    imgs_v1[:,:,:,0] = omn_exp(np.sqrt(imgs[:,:,:,0]**2 + imgs[:,:,:,1]**2))
    imgs_v1[:,:,:,1] = omn_exp(np.sqrt(imgs[:,:,:,2]**2 + imgs[:,:,:,3]**2))

    imgs_v1[:,:,:,2] = np.log10(imgs[:,:,:,4])
    imgs_v1[:,:,:,3] = np.log10(imgs[:,:,:,5])

    imgs_v1[:,:,:,4] = np.sqrt(abs(imgs[:,:,:,6])) * np.where(imgs[:,:,:,6]>0,1,-1)
    imgs_v1[:,:,:,5] = np.sqrt(abs(imgs[:,:,:,7])) * np.where(imgs[:,:,:,7]>0,1,-1)
    imgs_v1[:,:,:,6] = np.log10(np.sqrt(imgs[:,:,:,6]**2 + imgs[:,:,:,7]**2))

    imgs_v1[:,:,:,7:] = imgs[:,:,:,8:]
    return imgs_v1

def norm_imgs(imgs,min_max,norm_ind):
    for i in norm_ind:
        imgs[:,:,:,i] = norm_01(imgs[:,:,:,i],min_max[i])

data_save_dir = os.path.join(root_dir,'dataset','merged_data','v1')
merged_imgs_v1 = imgs_v1_transform(merged_imgs_v0)
min_max = get_min_max(merged_imgs_v1)
print(min_max)
norm_imgs(merged_imgs_v1,min_max,[2,3,4,5,6])
np.save(os.path.join(data_save_dir,'merged_training_imgs_v1'),merged_imgs_v1)
np.save(os.path.join(data_save_dir,'merged_training_labels_v1'),merged_labels)
print('process training imgs finished')
test_imgs_v1 = imgs_v1_transform(test_imgs_v0)
norm_imgs(test_imgs_v1,min_max,[2,3,4,5,6])
np.save(os.path.join(data_save_dir,'test_imgs_v1'),test_imgs_v1)
print('process testing imgs finished')

'\nmerged_training_imgs_v1:[17c]\n(a)train+valid\n(b)s1+s2\n(c)\n    [0]:s1[0,1]:lee filter + square_sum_sqrt_merged + omn_exp\n    [1]:s1[2,3]:lee filter + square_sum_sqrt_merged + omn_exp\n    [2-3]:s1[4,5]:log10 + norm[0-1]\n    [4-5]:s1[6,7]:sqrt + norm[0-1]\n    [6]:s1[6,7]:square_sum_sqrt_log10_merged + norm[0-1]\n    [7-9]:s2[0-2]:dehaze + rescale_intensity + norm[0-1]\n    [10-16]:s2[3-9]:norm[0-1]\n'