### Now that the data retreival is cleared up and we don't have any failed values or discrepencies, this notebook will allow the parsing from .xslx to .csv files that can be used in our modeling notebooks!

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

### To deal with .xlsx extension, you will need to install the necessary package with the following command:

*pip install openpyxl*

In [None]:
file_name = 'data/fixed_10gate.xlsx'
sheet_name = '10_Gates_1_run_FAILS_fixed'

In [None]:
def read_sheet(file_name, sheet_name): return pd.read_excel(file_name, sheet_name, engine='openpyxl')

In [None]:
data = read_sheet(file_name, sheet_name)

In [None]:
data.head(5)

Unnamed: 0,base,values
0,./g_buffer/100ps/0p1nm/ncfet_g_buffer_0.3,
1,T_RISE,3.131324e-12
2,T_FALL,3.17279e-12
3,T_DELAY,9.921646e-12
4,./g_buffer/100ps/0p1nm/ncfet_g_buffer_0.4,


In [None]:
param_strs = data[data.base.map(lambda x: len(x) > 10)].base.values
param_strs[:2]

array(['./g_buffer/100ps/0p1nm/ncfet_g_buffer_0.3',
       './g_buffer/100ps/0p1nm/ncfet_g_buffer_0.4'], dtype=object)

In [None]:
trise_vals = data[data.base == 'T_RISE ']['values'].values
tfall_vals = data[data.base == 'T_FALL ']['values'].values
tdelay_vals = data[data.base == 'T_DELAY ']['values'].values
len(trise_vals) == len(tfall_vals) == len(tdelay_vals) == len(param_strs)

True

In [None]:
#trise_vals

In [None]:
#param_strs[0].split(sep='/')

It goes - [1] gate name, [2] clock cylce, [3] thickness, [4] voltage --> after some parsing

In [None]:
gate_types = pd.Series(param_strs).map(lambda x: x.split('/')).map(lambda x: x[1][2:])

In [None]:
clock_cycle = pd.Series(map(lambda x: x.split(sep='/')[2][:-2], param_strs))
thickness = pd.Series(map(lambda x: x.split(sep='/')[3][:-2], param_strs))
clock_cycle[0], thickness[0]

('100', '0p1')

In [None]:
#how to remove the pesky 'p' value in the thickness
#thickness[0].replace('p', '.')

In [None]:
thickness = thickness.map(lambda x: x.replace('p', '.'))
thickness[:2]

0    0.1
1    0.1
dtype: object

In [None]:
voltage = pd.Series(map(lambda x: x.split('/')[-1].split('_')[-1], param_strs))

In [None]:
sample_clean_df = pd.DataFrame({'gate_types' : gate_types,
                        'voltage':voltage, 'thickness':thickness, 'clock_cycle': clock_cycle,
                        't_rise': trise_vals, 't_fall': tfall_vals, 't_delay': tdelay_vals})
sample_clean_df.tail(2)

Unnamed: 0,gate_types,voltage,thickness,clock_cycle,t_rise,t_fall,t_delay
11338,nor4,0.8,9,900,9.282352e-12,6.474237e-12,1.736532e-11
11339,nor4,0.9,9,900,9.847337e-12,8.520112e-12,1.743491e-11


In [None]:
def get_clean_df(df):
    param_strs = df[df.base.map(lambda x: len(x) > 10)].base.values
    gate_types = pd.Series(param_strs).map(lambda x: x.split('/')).map(lambda x: x[1][2:])
    trise_vals = df[df.base == 'T_RISE ']['values'].values
    tfall_vals = df[df.base == 'T_FALL ']['values'].values
    tdelay_vals = df[df.base == 'T_DELAY ']['values'].values
    clock_cycle = pd.Series(map(lambda x: x.split(sep='/')[2][:-2], param_strs))
    thickness = pd.Series(map(lambda x: x.split(sep='/')[3][:-2], param_strs))
    thickness = thickness.map(lambda x: x.replace('p', '.'))
    voltage = pd.Series(map(lambda x: x.split('/')[-1].split('_')[-1], param_strs))
    #pdb.set_trace()
    return pd.DataFrame({'type' : gate_types,
            'voltage':voltage, 'thickness':thickness, 'clock_cycle': clock_cycle,
            't_rise': trise_vals, 't_fall': tfall_vals, 't_delay': tdelay_vals})

In [None]:
run_df = get_clean_df(data).rename(columns={"type":"gate_type"})
run_df.shape

(11340, 7)

In [None]:
run_df.to_csv('data/ten_gate.csv', index=False)

In [None]:
run_df.head(1)

Unnamed: 0,type,voltage,thickness,clock_cycle,t_rise,t_fall,t_delay
0,buffer,0.3,0.1,100,3.131324e-12,3.17279e-12,9.921646e-12


In [None]:
run_df.tail().t_delay.values

array([1.495985e-11, 1.638343e-11, 1.708075e-11, 1.736532e-11,
       1.743491e-11])

### FAILED

Lets make sense of the number of FAILED values in the runs

In [None]:
def get_failed(df, row_type='t_delay'):
    failed = (df[row_type] == ' FAILED').sum()
    return failed, f'{row_type}: {failed/len(df) * 100: .2f}%'
def get_full_fail(df):
    time_vals = ['t_delay', 't_rise', 't_fall']
    return [get_failed(df, t_val) for t_val in time_vals]

In [None]:
#get_failed(run1_df, 't_fall'), get_failed(run1_df, 't_rise'), get_failed(run1_df, 't_delay')

In [None]:
run_df[run_df.t_fall == ' FAILED'].head(2)

Unnamed: 0,type,voltage,thickness,clock_cycle,t_rise,t_fall,t_delay


In [None]:
get_full_fail(run_df)

[(0, 't_delay:  0.00%'), (0, 't_rise:  0.00%'), (0, 't_fall:  0.00%')]

In [None]:
get_full_fail(run_df)

[(0, 't_delay:  0.00%'), (0, 't_rise:  0.00%'), (0, 't_fall:  0.00%')]

In [None]:
get_full_fail(run_df)

[(0, 't_delay:  0.00%'), (0, 't_rise:  0.00%'), (0, 't_fall:  0.00%')]

In [None]:
get_full_fail(run_df)

[(0, 't_delay:  0.00%'), (0, 't_rise:  0.00%'), (0, 't_fall:  0.00%')]

In [None]:
get_full_fail(run_df)

[(0, 't_delay:  0.00%'), (0, 't_rise:  0.00%'), (0, 't_fall:  0.00%')]

In [None]:
run_len = run_df.shape[0]
run_len

11340

Just to check that we are getting 11k samples as expected!