In [1]:
import numpy as np
import sys
import os
import pickle
import gzip

In [2]:
def concat(data):
    return np.concatenate(data)

def add_ch(img):
    """
    (sample #, height, width,) -> (sample #, height, width, channel)
    """
    return np.expand_dims(img, axis = -1)

def Reshape4torch(img):
    """
    (sample #, height, width, channel) -> (sample #, channel, height, width)
    """
    img = np.transpose(img, (0, 3, 1, 2))
    return img
    
def GenerateLabel(data, cls):
    label = cls*np.ones([data.shape[0]])
    return label


def GBdataLoad(data_dir, ch = [0, 1], data_type = '3_train_data'):
    
    with gzip.open(data_dir + '/SemblexData_config.pickle', 'rb') as f:
        data = pickle.load(f)
       
    G_X, B_X = [], []
    for cls, data_bunch in data.items():
        for data_i in data_bunch:
            if cls == 'GOOD':
                G_X.append(data_i[data_type][:, :, :, ch])
            elif cls == 'BAD':
                B_X.append(data_i[data_type][:, :, :, ch])
     
    G_X, B_X = concat(G_X), concat(B_X)
    
    if len(G_X.shape) != 4:
        G_X, B_X = add_ch(G_X), add_ch(B_X)
    G_X, B_X = Reshape4torch(G_X), Reshape4torch(B_X)
    GB_Xs, GB_Ys = [G_X, B_X], []
    for i, GB_X in zip(range(len(GB_Xs)), GB_Xs):
        GB_Ys.append(GenerateLabel(GB_X, i))
        
    return GB_Xs, GB_Ys
        
def B6dataLoad(data_dir, ch = [0, 1], data_type = '3_train_data'):
    
    with gzip.open(data_dir + '/SemblexData_config.pickle', 'rb') as f:
        data = pickle.load(f)
        
    BAD_cls = {0: 'OIL', 1: 'PUNCH', 2: 'SCRAPPED', 3: 'DIE_CHIP', 4: 'DIE_INTERNAL', 5: 'PIN'}

    B_X1, B_X2, B_X3, B_X4, B_X5, B_X6 = [], [], [], [], [], []
    for cls, data_bunch in data.items():
        for data_i in data_bunch:
            if cls == 'BAD':
                if BAD_cls[0] in data_i['1_file']: B_X1.append(data_i[data_type][:, :, :, ch])
                if BAD_cls[1] in data_i['1_file']: B_X2.append(data_i[data_type][:, :, :, ch])
                if BAD_cls[2] in data_i['1_file']: B_X3.append(data_i[data_type][:, :, :, ch])
                if BAD_cls[3] in data_i['1_file']: B_X4.append(data_i[data_type][:, :, :, ch])
                if BAD_cls[4] in data_i['1_file']: B_X5.append(data_i[data_type][:, :, :, ch])
                if BAD_cls[5] in data_i['1_file']: B_X6.append(data_i[data_type][:, :, :, ch])

    B_X1, B_X2, B_X3, B_X4, B_X5, B_X6 = concat(B_X1), concat(B_X2), concat(B_X3), concat(B_X4), concat(B_X5), concat(B_X6)
    
    if len(B_X1.shape) != 4:
        B_X1, B_X2, B_X3, B_X4, B_X5, B_X6 = add_ch(B_X1), add_ch(B_X2), add_ch(B_X3), add_ch(B_X4), add_ch(B_X5), add_ch(B_X6)
    
    B_X1, B_X2, B_X3, B_X4, B_X5, B_X6 = (Reshape4torch(B_X1), Reshape4torch(B_X2), Reshape4torch(B_X3), 
                                          Reshape4torch(B_X4), Reshape4torch(B_X5), Reshape4torch(B_X6))
    
    B6_Xs = [B_X1, B_X2, B_X3, B_X4, B_X5, B_X6]
    B6_Ys = []
    for i, B6_X in zip(range(len(B6_Xs)), B6_Xs):
        B6_Ys.append(GenerateLabel(B6_X, i))

    return B6_Xs, B6_Ys

In [3]:
data_dir = '/mnt/disk1/yunseob/Pytorch/SSM/1_Semblex/06_no_overlap/SemblexData/wt_4040'

GB_Xs, GB_Ys = GBdataLoad(data_dir, ch = [0, 1], data_type = '3_train_data')
B6_Xs, B6_Ys = B6dataLoad(data_dir, ch = [0, 1], data_type = '3_train_data')

In [4]:
def ReduceSegments(x, y, rate):
    idx = np.random.choice(len(x), int(rate*len(x)), replace = False)
    return x[idx], y[idx]

In [5]:
for sr in [0.2, 0.4, 0.6, 0.8, 1]:
    LEN = []
    for GB_X, GB_Y in zip(GB_Xs, GB_Ys):
        GB_X_, GB_Y_ = ReduceSegments(GB_X, GB_Y, rate = sr)
        LEN.append(len(GB_X_))
    print(sr, LEN, np.sum(LEN))

0.2 [390, 610] 1000
0.4 [780, 1220] 2000
0.6 [1171, 1830] 3001
0.8 [1561, 2440] 4001
1 [1952, 3050] 5002


In [6]:
for sr in [0.2, 0.4, 0.6, 0.8, 1]:
    LEN = []
    for B6_X, B6_Y in zip(B6_Xs, B6_Ys):
        B6_X_, B6_Y_ = ReduceSegments(B6_X, B6_Y, rate = sr)
        LEN.append(len(B6_X_))
    print(sr, LEN, np.sum(LEN))

0.2 [37, 115, 65, 173, 105, 112] 607
0.4 [74, 231, 131, 346, 211, 225] 1218
0.6 [111, 347, 197, 519, 316, 338] 1828
0.8 [148, 463, 263, 692, 422, 451] 2439
1 [185, 579, 329, 865, 528, 564] 3050


In [7]:
GB_Xs, GB_Ys = GBdataLoad(data_dir, ch = [0, 1], data_type = '4_test_data')
B6_Xs, B6_Ys = B6dataLoad(data_dir, ch = [0, 1], data_type = '4_test_data')

In [8]:
LEN = []
for GB_X, GB_Y in zip(GB_Xs, GB_Ys):
    LEN.append(len(GB_X))
print(LEN, np.sum(LEN))

[490, 768] 1258


In [9]:
LEN = []
for B6_X, B6_Y in zip(B6_Xs, B6_Ys):
    LEN.append(len(B6_X))
print(LEN, np.sum(LEN))

[47, 146, 83, 218, 132, 142] 768


In [11]:
data_dir = '/mnt/disk1/yunseob/Pytorch/SSM/1_Semblex/05_10p_overlap/SemblexData/wt_4040'

GB_Xs, GB_Ys = GBdataLoad(data_dir, ch = [0, 1], data_type = '3_train_data')
B6_Xs, B6_Ys = B6dataLoad(data_dir, ch = [0, 1], data_type = '3_train_data')

LEN = []
for GB_X, GB_Y in zip(GB_Xs, GB_Ys):
    LEN.append(len(GB_X))
print(LEN, np.sum(LEN))

LEN = []
for B6_X, B6_Y in zip(B6_Xs, B6_Ys):
    LEN.append(len(B6_X))
print(LEN, np.sum(LEN))
    
GB_Xs, GB_Ys = GBdataLoad(data_dir, ch = [0, 1], data_type = '4_test_data')
B6_Xs, B6_Ys = B6dataLoad(data_dir, ch = [0, 1], data_type = '4_test_data')

LEN = []
for GB_X, GB_Y in zip(GB_Xs, GB_Ys):
    LEN.append(len(GB_X))
print(LEN, np.sum(LEN))

LEN = []
for B6_X, B6_Y in zip(B6_Xs, B6_Ys):
    LEN.append(len(B6_X))
print(LEN, np.sum(LEN))

[2169, 3386] 5555
[206, 644, 365, 960, 586, 625] 3386
[544, 854] 1398
[52, 162, 92, 242, 148, 158] 854
