# Data Preprocessing (Merging / Vt Datset Making)

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math

In [2]:
WIDTHS = [10, 20, 35, 50, 100, 1000]
LENGTHS = [5, 8, 10, 12]
TEMPS = [0, 25, 80]

## 1) Merge Train Data
We'll exclude W=35, L=8, T=25 for presentation test dataset.\
For submission, include this case to train dataset.

In [3]:
total_df = None
is_first = True

for width in WIDTHS:
    for length in LENGTHS:
        for temperature in TEMPS:
            if width == 35 and length == 8 and temperature == 25: # only for presentation sample test dataset
                pass
            else:
                cur_df = pd.read_csv('./train_data/'+'W'+str(width)+'L'+str(length)+'T'+str(temperature)+'.csv')
                if is_first:
                    total_df = cur_df
                    is_first = False
                else:
                    total_df = pd.concat([total_df, cur_df])

In [4]:
total_df.to_csv('./train_data/merged_train.csv', index=False)

## 2) Vt Dataset Making
We'll exclude W=35, L=8, T=25 for presentation test dataset.\
For submission, include this case to train dataset.

In [5]:
def get_vt_point_list(width, length, temperature, train=True):
    v_threshold_point_list = []
    std_value = [math.log(1e-8*width/length)]
    if train:
        df = pd.read_csv('./train_data/'+ 'W' + str(width) + 'L' + str(length) + 'T' + str(temperature) + '.csv')
    else:
        df = pd.read_csv('./test_data/'+ 'W' + str(width) + 'L' + str(length) + 'T' + str(temperature) + '.csv')
    vds_list = df['Vds'].unique()
    vgs_list = df['Vgs'].unique()
    for vds in vds_list:
        min_dist = 99
        v_threshold = -100
        ids_threshold = -100
        for vgs in vgs_list:
            point_ids = df.query('(Vds=='+str(vds)+') and (Vgs=='+str(vgs)+')')['Ids'].values[0]
            current_dist = math.dist(std_value, [math.log(point_ids)])
            if current_dist < min_dist:
                min_dist = current_dist
                v_threshold = vgs
                ids_threshold = point_ids
        v_threshold_point_list.append([vds, v_threshold, ids_threshold, width, length, temperature])
    return v_threshold_point_list

In [6]:
WIDTHS = [10, 20, 35, 50, 100, 1000]
LENGTHS = [5, 8, 10, 12]
TEMPS = [0, 25, 80]

In [7]:
total_v_threshold_point_list = []
test_v_threshold_point_list = []
for width in WIDTHS:
    for length in LENGTHS:
        for temperature in TEMPS:
            if width == 35 and length == 8 and temperature == 25: # only for presentation sample test dataset
                test_v_threshold_point_list += get_vt_point_list(width, length, temperature, False)
            else:
                total_v_threshold_point_list += get_vt_point_list(width, length, temperature, True)
len(test_v_threshold_point_list)
len(total_v_threshold_point_list)

426

In [8]:
vt_dataframe = pd.DataFrame(total_v_threshold_point_list, columns=['Vds', 'Vt', 'Ids', 'W', 'L', 'T'])
vt_dataframe.head()

Unnamed: 0,Vds,Vt,Ids,W,L,T
0,0.1,1.0,2.5559e-08,10,5,0
1,1.0,0.6,2.0249e-08,10,5,0
2,5.0,0.6,2.2716e-08,10,5,0
3,10.0,0.6,2.9482e-08,10,5,0
4,20.0,0.4,2.4382e-08,10,5,0


In [9]:
vt_dataframe.to_csv('./vt_data/vt_train.csv', index=False)

In [10]:
vt_test_dataframe = pd.DataFrame(test_v_threshold_point_list, columns=['Vds', 'Vt', 'Ids', 'W', 'L', 'T'])
vt_test_dataframe.head()

Unnamed: 0,Vds,Vt,Ids,W,L,T
0,0.1,1.0,4.056e-08,35,8,25
1,1.0,0.8,7.9329e-08,35,8,25
2,5.0,0.8,8.5315e-08,35,8,25
3,10.0,0.6,2.5124e-08,35,8,25
4,20.0,0.6,5.4691e-08,35,8,25


In [11]:
vt_test_dataframe.to_csv('./vt_data/vt_test.csv', index=False)