In [1]:
import numpy as np
import sys
import os
import pickle
import gzip

In [2]:
def concat(data):
    return np.concatenate(data)

def add_ch(img):
    """
    (sample #, height, width,) -> (sample #, height, width, channel)
    """
    return np.expand_dims(img, axis = -1)

def Reshape4torch(img):
    """
    (sample #, height, width, channel) -> (sample #, channel, height, width)
    """
    img = np.transpose(img, (0, 3, 1, 2))
    return img
    
def GenerateLabel(data, cls):
    label = cls*np.ones([data.shape[0]])
    return label


def GBdataLoad(data_dir, ch = [0, 1], data_type = '3_train_data'):
    
    with gzip.open(data_dir + '/SemblexData_config.pickle', 'rb') as f:
        data = pickle.load(f)
       
    G_X, B_X = [], []
    for cls, data_bunch in data.items():
        for data_i in data_bunch:
            if cls == 'GOOD':
                G_X.append(data_i[data_type][:, :, :, ch])
            elif cls == 'BAD':
                B_X.append(data_i[data_type][:, :, :, ch])
     
    G_X, B_X = concat(G_X), concat(B_X)
    
    if len(G_X.shape) != 4:
        G_X, B_X = add_ch(G_X), add_ch(B_X)
    G_X, B_X = Reshape4torch(G_X), Reshape4torch(B_X)
    GB_Xs, GB_Ys = [G_X, B_X], []
    for i, GB_X in zip(range(len(GB_Xs)), GB_Xs):
        GB_Ys.append(GenerateLabel(GB_X, i))
        
    return GB_Xs, GB_Ys
        
def B6dataLoad(data_dir, ch = [0, 1], data_type = '3_train_data'):
    
    with gzip.open(data_dir + '/SemblexData_config.pickle', 'rb') as f:
        data = pickle.load(f)
        
    BAD_cls = {0: 'OIL', 1: 'PUNCH', 2: 'SCRAPPED', 3: 'DIE_CHIP', 4: 'DIE_INTERNAL', 5: 'PIN'}

    B_X1, B_X2, B_X3, B_X4, B_X5, B_X6 = [], [], [], [], [], []
    for cls, data_bunch in data.items():
        for data_i in data_bunch:
            if cls == 'BAD':
                if BAD_cls[0] in data_i['1_file']: B_X1.append(data_i[data_type][:, :, :, ch])
                if BAD_cls[1] in data_i['1_file']: B_X2.append(data_i[data_type][:, :, :, ch])
                if BAD_cls[2] in data_i['1_file']: B_X3.append(data_i[data_type][:, :, :, ch])
                if BAD_cls[3] in data_i['1_file']: B_X4.append(data_i[data_type][:, :, :, ch])
                if BAD_cls[4] in data_i['1_file']: B_X5.append(data_i[data_type][:, :, :, ch])
                if BAD_cls[5] in data_i['1_file']: B_X6.append(data_i[data_type][:, :, :, ch])

    B_X1, B_X2, B_X3, B_X4, B_X5, B_X6 = concat(B_X1), concat(B_X2), concat(B_X3), concat(B_X4), concat(B_X5), concat(B_X6)
    
    if len(B_X1.shape) != 4:
        B_X1, B_X2, B_X3, B_X4, B_X5, B_X6 = add_ch(B_X1), add_ch(B_X2), add_ch(B_X3), add_ch(B_X4), add_ch(B_X5), add_ch(B_X6)
    
    B_X1, B_X2, B_X3, B_X4, B_X5, B_X6 = (Reshape4torch(B_X1), Reshape4torch(B_X2), Reshape4torch(B_X3), 
                                          Reshape4torch(B_X4), Reshape4torch(B_X5), Reshape4torch(B_X6))
    
    B6_Xs = [B_X1, B_X2, B_X3, B_X4, B_X5, B_X6]
    B6_Ys = []
    for i, B6_X in zip(range(len(B6_Xs)), B6_Xs):
        B6_Ys.append(GenerateLabel(B6_X, i))

    return B6_Xs, B6_Ys

In [7]:
for length in [25, 50, 75, 100]:
    seg_len = '{:03d}s'.format(length)
    data_dir = '/mnt/disk1/yunseob/Pytorch/SSM/1_Semblex/08_shorten_length/SemblexData/' + seg_len

    GB_Xs, GB_Ys = GBdataLoad(data_dir, ch = [0, 1], data_type = '3_train_data')

    LEN = []
    for GB_X, GB_Y in zip(GB_Xs, GB_Ys):
        LEN.append(len(GB_X))
    print(seg_len[0] + '.' + seg_len[1:], LEN, np.sum(LEN))

0.25s [7826, 12242] 20068
0.50s [3907, 6110] 10017
0.75s [2604, 4070] 6674
1.00s [1952, 3050] 5002


In [9]:
for length in [25, 50, 75, 100]:
    seg_len = '{:03d}s'.format(length)
    data_dir = '/mnt/disk1/yunseob/Pytorch/SSM/1_Semblex/08_shorten_length/SemblexData/' + seg_len

    GB_Xs, GB_Ys = GBdataLoad(data_dir, ch = [0, 1], data_type = '4_test_data')

    LEN = []
    for GB_X, GB_Y in zip(GB_Xs, GB_Ys):
        LEN.append(len(GB_X))
    print(seg_len[0] + '.' + seg_len[1:], LEN, np.sum(LEN))

0.25s [1959, 3066] 5025
0.50s [978, 1532] 2510
0.75s [654, 1024] 1678
1.00s [490, 768] 1258


In [8]:
for length in [25, 50, 75, 100]:
    seg_len = '{:03d}s'.format(length)
    data_dir = '/mnt/disk1/yunseob/Pytorch/SSM/1_Semblex/08_shorten_length/SemblexData/' + seg_len

    B6_Xs, B6_Ys = B6dataLoad(data_dir, ch = [0, 1], data_type = '3_train_data')

    LEN = []
    for B6_X, B6_Y in zip(B6_Xs, B6_Ys):
        LEN.append(len(B6_X))
    print(seg_len[0] + '.' + seg_len[1:], LEN, np.sum(LEN))

0.25s [744, 2326, 1321, 3470, 2119, 2262] 12242
0.50s [372, 1161, 659, 1733, 1056, 1129] 6110
0.75s [248, 773, 438, 1154, 705, 752] 4070
1.00s [185, 579, 329, 865, 528, 564] 3050


In [11]:
for length in [25, 50, 75, 100]:
    seg_len = '{:03d}s'.format(length)
    data_dir = '/mnt/disk1/yunseob/Pytorch/SSM/1_Semblex/08_shorten_length/SemblexData/' + seg_len

    B6_Xs, B6_Ys = B6dataLoad(data_dir, ch = [0, 1], data_type = '4_test_data')

    LEN = []
    for B6_X, B6_Y in zip(B6_Xs, B6_Ys):
        LEN.append(len(B6_X))
    print(seg_len[0] + '.' + seg_len[1:], LEN, np.sum(LEN))

0.25s [187, 582, 331, 869, 530, 567] 3066
0.50s [93, 291, 165, 434, 266, 283] 1532
0.75s [62, 195, 111, 290, 177, 189] 1024
1.00s [47, 146, 83, 218, 132, 142] 768
