In [76]:
import os, sys

import pathlib
import pandas as pd
import numpy as np
from easydict import EasyDict as edict
from tqdm import tqdm, trange
import matplotlib.pyplot as plt
import time

dir_path = pathlib.Path().absolute()
file = '../data/imputed_data.csv'
data_path = os.path.join(dir_path, file)

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

df = pd.read_csv(data_path  ,index_col=False).drop(['Unnamed: 0'], axis=1)
df.reset_index(drop=True, inplace=True)
assert df.isnull().sum().sum() == 0

# 불필요한 column 제거
df.drop(columns=['SPI_X1','SPI_Y1','SPI_X2','SPI_Y2','SPI_L1','SPI_W1','SPI_L2','SPI_W2'], inplace=True)
df.head(10)

Unnamed: 0,CRD,PartType,Orient.,X,Y,SPI_X_AVG,SPI_Y_AVG,SPI_L,SPI_W,PRE_X,PRE_Y,PRE_A,PRE_L,PRE_W,POST_X,POST_Y,POST_A,POST_L,POST_W,Job,Chip_L,Chip_W,SPI_VOLUME_MEAN
0,B1,R0402,90.0,31.26,100.11,-52.0,61.9,61.9,52.0,-19.887293,-1.840963,-1.877,-1.840963,19.887293,-7.440417,28.108231,1.613,28.108231,7.440417,CENTER,400.0,200.0,78.71525
1,B2,R0402,90.0,31.83,100.11,-47.0,63.7,63.7,47.0,-26.766272,6.898049,-1.449,6.898049,26.766272,5.487133,22.242701,0.0,22.242701,-5.487133,CENTER,400.0,200.0,77.3319
2,B3,R0402,90.0,32.4,100.11,-41.6,72.35,72.35,41.6,-32.058273,8.125244,-3.278,8.125244,32.058273,-19.644203,12.971593,1.552,12.971593,19.644203,CENTER,400.0,200.0,77.1738
3,B4,R0402,90.0,32.97,100.11,-50.6,63.1,63.1,50.6,-23.437572,-6.195457,-3.337,-6.195457,23.437572,-22.517658,26.396389,0.0,26.396389,22.517658,CENTER,400.0,200.0,77.96495
4,B5,R0402,90.0,33.54,100.11,-50.0,69.45,69.45,50.0,-5.187286,7.085744,-3.408,7.085744,5.187286,-21.413681,29.193306,3.611,29.193306,21.413681,CENTER,400.0,200.0,75.377
5,B6,R0402,90.0,31.26,99.2,-49.1,59.75,59.75,49.1,-31.05998,1.531298,3.611,1.531298,31.05998,-5.925862,17.682121,0.0,17.682121,5.925862,CENTER,400.0,200.0,77.6623
6,B7,R0402,90.0,31.83,99.2,-44.95,60.1,60.1,44.95,-20.728939,10.258562,-3.247,10.258562,20.728939,-17.024027,33.61591,3.455,33.61591,17.024027,CENTER,400.0,200.0,77.56575
7,B8,R0402,90.0,32.4,99.2,-39.05,70.0,70.0,39.05,-27.195978,11.357373,-2.031,11.357373,27.195978,-1.401807,29.627155,3.337,29.627155,1.401807,CENTER,400.0,200.0,79.1009
8,B9,R0402,90.0,32.97,99.2,-46.55,62.45,62.45,46.55,-19.82259,1.057152,-3.047,1.057152,19.82259,-20.989681,15.195607,3.31,15.195607,20.989681,CENTER,400.0,200.0,81.96445
9,B10,R0402,90.0,33.54,99.2,-53.25,71.15,71.15,53.25,-26.842043,3.604794,-3.022,3.604794,26.842043,-17.169552,31.046926,-1.66,31.046926,17.169552,CENTER,400.0,200.0,79.3668


In [1]:
# switch 90 data to 0 data
def switchOrient(x90, y90):
    y0 = float(x90)
    x0 = float(-y90)
    return x0, y0 

In [78]:
# config
ABSOLUTE_ANGLE = True
BY_CHIP_PERCENTAGE = True
PRE_OR_POST_ANGLE = 'pre'
DISTANCE = 'spi' # [DISTANCE] - POST 
STOP_AFTER = 10 # rows

dfgroups = df.groupby(['PartType']) 

# df_chips_condition :: holds dataframes by condition (2 total)
#   (0: condition [SPI <= CHIP]
#    1: condition [SPI > CHIP])
df_chips_condition = edict()


In [None]:
# Chip 별로 grouping 해주기
for _, (chiptype, group) in enumerate(dfgroups):
    # initialize dictionaries of each size
    df_chips[f'{chiptype}'] = pd.DataFrame(columns=['dist_SPI',f'{PRE_OR_POST_ANGLE}_angle','dist_pre','dist_post'])
    df_chips_condition[f'{chiptype}'] = \
            [pd.DataFrame(columns=['dist_SPI',f'{PRE_OR_POST_ANGLE}_angle','dist_pre','dist_post']) for _ in range(2)]
    df_chips_cond_quad[f'{chiptype}'] = \
            [pd.DataFrame(columns=['dist_SPI',f'{PRE_OR_POST_ANGLE}_angle','dist_pre','dist_post']) for _ in range(8)]
    df_chips_condition2[f'{chiptype}'] = \
            [pd.DataFrame(columns=['dist_SPI',f'{PRE_OR_POST_ANGLE}_angle','dist_pre','dist_post']) for _ in range(6)]
   
    # grouping된 부분에 해당되는 칩 길이 
    chiplength = group['Chip_L'].values[0]
    
    # grouping 된 그룹에서 row마다 하나씩 훑기
    t = tqdm(group.iterrows(), total=len(group))
    for idx, row in t:
        t.set_description(f'Row: {idx}')
        
        spi_x_avg, spi_y_avg = row[['SPI_X_AVG','SPI_Y_AVG']]
        pad_center_x, pad_center_y = row[['X','Y']] * 1000 # mm --> um change of units
        spi_volume_mean = row['SPI_VOLUME_MEAN']
        pre_x, pre_y = row[['PRE_X','PRE_Y']]
        post_x, post_y = row[['POST_X','POST_Y']]
        pre_angle, post_angle = row[['PRE_A','POST_A']]
        orientation = row['Orient.']
        job = row['Job']
        
        ''' rotate 90 to 0 '''
        if orientation == 90: # orientation is integer
            spi_x_avg, spi_y_avg = switchOrient(spi_x_avg, spi_y_avg)
            pad_center_x, pad_center_y = switchOrient(pad_center_x, pad_center_y)
            pre_x, pre_y = switchOrient(pre_x, pre_y)
            post_x, post_y = switchOrient(pre_x, pre_y)
        
        ''' 
        compute distance between two offset vectors from the origin (칩의 좌측하단)
        and append to df_temp dataframe
        '''
        # vector offset distance difference from SPI
        xval, yval = 0, 0
        if DISTANCE == 'spi':
            xval = spi_x_avg - post_x
            yval = spi_y_avg - post_y
        diff_distance = np.linalg.norm((xval, yval))
        
        # 비율로 변환
        if BY_CHIP_PERCENTAGE == True:
            diff_distance = diff_distance / chiplength * 100
        
        # set angle to pre or post angle
        angle = 0
        if PRE_OR_POST_ANGLE == 'pre':
            angle = pre_angle
        elif PRE_OR_POST_ANGLE == 'post':
            angle = post_angle
        
        # vector offset difference from PRE
        dist_pre = np.linalg.norm((pre_x, pre_y))
        dist_post = np.linalg.norm((post_x, post_y))
        
        # value to add -> List
        values_to_add = [diff_distance, angle, dist_pre, dist_post]
        
        ''' append distance and angle by condition and quadrant for SPI and Post (Chip) '''
        # set case by criteria
        # (ex)
        # if spi_norm <= post_norm:
        #     case = 7
        
        # append to last index row
        # (ex)
        # dataframe[f'{chiptype}'][case].loc(len(dataframe[f'{chiptype'][case]), :) = values_to_add

        # if idx > STOP_AFTER:
        #     break
    
    tcondition = trange(2)
    for condition in tcondition:
        tcondition.set_description('sorting condition dataframes by distance SPI:')
        
        # sort by first column (offset distance difference)
        # (ex)
        # tmp = df_chipsdataframe_condition[f'{chiptype}'][condition].to_numpy()
        # dataframe[f'{chiptype}'][condition] = tmp[tmp[:,0].argsort()]    
        
print(f'It took {time.time() - start} seconds.')

In [34]:
# save files as pickle formats
try:
    import pickle5 as pickle
except ImportError:  # Python 3.x
    !pip install pickle5
    import pickle5 as pickle
with open('pickle/df_chips.p', 'wb') as fp:
    pickle.dump(df_chips, fp, protocol=pickle.HIGHEST_PROTOCOL)
with open('pickle/df_chips_condition.p', 'wb') as fp:
    pickle.dump(df_chips_condition, fp, protocol=pickle.HIGHEST_PROTOCOL)
with open('pickle/df_chips_condition2.p', 'wb') as fp:
    pickle.dump(df_chips_condition2, fp, protocol=pickle.HIGHEST_PROTOCOL)
with open('pickle/df_chips_cond_quad.p', 'wb') as fp:
    pickle.dump(df_chips_cond_quad, fp, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
# load pickle
import pickle5 as pickle
with open('pickle/df_chips.p', 'rb') as fp:
    df_chips = pickle.load(fp)
with open('pickle/df_chips_condition.p', 'rb') as fp:
    df_chips_condition = pickle.load(fp)
with open('pickle/df_chips_condition2.p', 'rb') as fp:
    df_chips_condition2 = pickle.load(fp)
with open('pickle/df_chips_cond_quad.p', 'rb') as fp:
    df_chips_cond_quad = pickle.load(fp)

# reshape 2d array into DataFrame
chips = ['R0402','R0603','R1005']
for chip in chips:
    for condition in range(2):
            dataframe[chip][condition] = pd.DataFrame(dataframe[chip][condition], dtype=float, columns=['dist_SPI',f'{PRE_OR_POST_ANGLE}_angle','dist_pre','dist_post'])

In [13]:
# make image folder for figures
img_path = './img'
if not os.path.exists(img_path):
    os.makedirs(img_path)
os.chdir(img_path)

In [None]:
# split into distance groups (15) and compute mean and variance
chips = ['R0402', 'R0603', 'R1005']
cases = range(8)
conditions = range(2)

# stat_data: 시각화에 사용될 2중 dictionary; dict() - dict() - dataframe 형식. stat_data[chip][str(case)] 에 dataframe 을 추가한후 나중에 불러오면된다
stat_data = edict()
for chip in chips:
    stat_data[chip] = edict()
    for case in cases:
        # set data to plot
        data = df_chips_cond_quad[chip][case].astype(float).copy()

        # 15개 구간으로 나누기
        num_groups = 15
        data_range = data.iloc[:,0].max() - data.iloc[:,0].min()
        data_max_dists = [i * data_range / num_groups for i in np.arange(num_groups)]
        stat_data[chip][str(case)] = pd.DataFrame(columns=['dist_mean', 'angle_mean', 'angle_stddev'])
        # 구간별 훑으면서 value append
        for i, (data_max) in enumerate(data_max_dists[1:]):
            curr_i, next_i = i, i+1
            if next_i == num_groups:
                next_i = num_groups-1
            current_distances = data.iloc[:,0]
            data_valid_df = current_distances[(current_distances < data_max_dists[next_i]) & (current_distances > data_max_dists[curr_i])]
            data_valid_idx = data_valid_df.index
            data_group_df = data.iloc[data_valid_idx, :]

            if len(data_valid_idx) == 0 and curr_i != next_i:
                print('[INFO] at i:',i+1, '\t (',data_max_dists[curr_i],':',data_max_dists[next_i],'):', 'None')
                continue
            else:
                angle_mean = np.mean(data_group_df.iloc[:, 1])
                angle_stddev = np.std(data_group_df.iloc[:, 1])
                dist_median = np.mean([data_group_df.iloc[:,0].max(), data_group_df.iloc[:,0].min()])        
                if np.isnan(angle_mean) or np.isnan(angle_stddev) or np.isnan(dist_median):
                    angle_mean, angle_stddev, dist_median = 0, 0, 0
            stat_data[chip][str(case)].loc[len(stat_data[chip][str(case)]),:] = [dist_median, angle_mean, angle_stddev]


In [None]:
# visualize stat_data
colordict = ['r','b']

for j, chip in enumerate(chips):
    fig = plt.figure(figsize=(15,20))
    for i, case in enumerate(cases):
        ax = fig.add_subplot(len(cases)/2, 2, i+1)
        stat_data[chip][str(case)].plot(ax=ax, kind='scatter',x='dist_mean', y='angle_mean', color=colordict[0], label=f'{chip}_mean')
        stat_data[chip][str(case)].plot(ax=ax, kind='scatter',x='dist_mean', y='angle_stddev', color=colordict[1], label=f'{chip}_stddev')
        lower = [(m-s) for m,s in zip(stat_data[chip][str(case)].iloc[:,1], stat_data[chip][str(case)].iloc[:,2])]
        upper = [(m+s) for m,s in zip(stat_data[chip][str(case)].iloc[:,1], stat_data[chip][str(case)].iloc[:,2])]
        ax.fill_between(stat_data[chip][str(case)].iloc[:,0].astype(float), lower, upper, facecolor='green', alpha=0.2)
        
        ax.set_title(f'{chip}: {PRE_OR_POST_ANGLE} Angle vs. SPI-Post-AOI offset distance (case:{case+1})')
        if ABSOLUTE_ANGLE == True:
            ax.set_xlabel('SPI-Post-AOI offset distance in percentage of chip length (%)')
        else:
            ax.set_xlabel('SPI-Post-AOI offset distance (\u03BCm)')
        ax.set_ylabel(f'{PRE_OR_POST_ANGLE} angle (degree)')
        # ax.set_xlim([np.min(pad_plot_data.iloc[:,0]), np.max(pad_plot_data.iloc[:,0])])
        # ax.set_ylim([0, 3.5])
        ax.legend()
        ax.grid()
    fig.tight_layout()
    fig.savefig(f'{chip}_cases.png')
    fig.clf()

