### Initial work exploring whether we can train generalizable models --> i.e. given training data on `and` gates and `nor` gates, can we make accurate predictions on `Nand` gates?

#### will stick with the simple models for now while fleshing it out. Can introduce other types of model/algo later on

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.ensemble import RandomForestRegressor 
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import ElasticNet, LassoCV, MultiTaskLassoCV, MultiTaskElasticNetCV
from sklearn import linear_model
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import xgboost as xgb

First, we'll extract the data for a single run. This should feature 10 different gates

In [None]:
df = pd.read_csv('data/ten_gate.csv')
df.head(2)

Unnamed: 0,gate_type,voltage,thickness,clock_cycle,t_rise,t_fall,t_delay
0,buffer,0.3,0.1,100,3.131324e-12,3.17279e-12,9.921646e-12
1,buffer,0.4,0.1,100,8.473565e-13,1.456597e-12,7.269524e-12


In [None]:
df.voltage.unique()

array([0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

In [None]:
df.thickness.unique()

array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. , 2. , 3. , 4. ,
       5. , 6. , 7. , 8. , 9. ])

In [None]:
df.clock_cycle.unique()

array([100, 200, 300, 400, 500, 600, 700, 800, 900])

In [None]:
unique_gates = list(df.gate_type.unique())
np.array(unique_gates)

array(['buffer', 'inv_x1', 'inv_x2', 'inv_x4', 'nand2', 'nand3', 'nand4',
       'nor2', 'nor3', 'nor4'], dtype='<U6')

We'll need to drop some failed values in our nand4 gate in order to proceed, you can see the heatmap at the bottom of notebook #3 in order to confirm the need for this

In [None]:
#these are the bad t_fall values in nand4 that need to be dropped
nand4_drop_idxs = ((df['gate_type'] == 'nand4') & (df['t_fall'] == ' FAILED')).values
nand4_drop_idxs.sum()

0

In [None]:
df = df.loc[~nand4_drop_idxs].reset_index(drop=True)
df.shape

(11340, 7)

Lets flesh out a few functions that will make our lives easier in order to index into our data appropriately and drop whatever items we might need to be removed

In [None]:
def get_gate(gate_type='buffer'): return df[df.gate_type == gate_type]

In [None]:
def get_gate_xy(gate_type='buffer'):
    #y_drop = 't_fall' if gate_type == 'buffer' else 't_rise'
    df = get_gate(gate_type)
    x_cols = ['clock_cycle', 'thickness', 'voltage']
    y_cols = ['t_delay', 't_rise', 't_fall']
    #y_cols.remove(y_drop)
    x = np.vstack(df[x_cols].values).astype('float')
    y = np.vstack(np.vstack(df[y_cols].values)).astype('float')
    return x, y

In [None]:
x1, y1 = get_gate_xy('nor3')
x2, y2 = get_gate_xy('nand2')
x3, y3 = get_gate_xy('buffer')
x1.shape, y1.shape, x2.shape, y2.shape, x3.shape, x3.shape

((1134, 3), (1134, 3), (1134, 3), (1134, 3), (1134, 3), (1134, 3))

In [None]:
gate_dict = dict.fromkeys(unique_gates)
gate_dict

{'buffer': None,
 'inv_x1': None,
 'inv_x2': None,
 'inv_x4': None,
 'nand2': None,
 'nand3': None,
 'nand4': None,
 'nor2': None,
 'nor3': None,
 'nor4': None}

In [None]:
for gate in unique_gates: 
    gate_dict[gate] = get_gate_xy(gate_type=gate)

In [None]:
x, y = gate_dict['buffer']
x_tr, x_val, y_tr, y_val = train_test_split(x,y)
x_tr.shape, x_val.shape, y_tr.shape, y_val.shape

((850, 3), (284, 3), (850, 3), (284, 3))

In [None]:
def get_mult_gate_df(gate_names=['buffer']):
    gates = [gate_dict[gate] for gate in gate_names]
    xs = np.vstack([gate[0] for gate in gates])
    ys = np.vstack([gate[1] for gate in gates])
    #xs = np.append(*xs, axis=0)
    #ys = np.append(*ys, axis=0)
    return xs, ys

In [None]:
xs_tr, ys_tr = get_mult_gate_df(['nor2', 'nand2'])
x_val, y_val = get_mult_gate_df(['buffer'])
xs_tr.shape, ys_tr.shape, x_val.shape, y_val.shape

((2268, 3), (2268, 3), (1134, 3), (1134, 3))

In [None]:
#lets introduce partial functions to clean up our code a little
from functools import partial

In [None]:
def normalize(x): return x / x.min() #used to normalize our Y values

def poly_x(x, num_poly=2): return PolynomialFeatures(num_poly).fit_transform(x)

def do_simple_model(x_tr, y_tr, x_val, y_val, model_name='ridge', num_poly=2, alpha=1.0, norm_y=True, print_vals=True,
                   get_model=False):
    #x_tr, x_val, y_tr, y_val = train_test_split(x,y)
    
    #lets convert our independent variable into a different polynomial combination
    if num_poly: x_tr, x_val = poly_x(x_tr, num_poly), poly_x(x_val, num_poly)
    
    alps = np.logspace(-5,-1)
    
    if model_name   == 'ridge': model = Ridge(alpha=alpha, random_state=0)
    elif model_name == 'ridge-cv': model = RidgeCV()#alphas=alps)
    #elif model_name == 'lasso': model = linear_model.Lasso(alpha=alpha,random_state=0)
    #elif model_name == 'elastic': model = ElasticNet(random_state=0)
    elif model_name == 'lasso': model = MultiTaskLassoCV(random_state=0, alphas=alps, max_iter=10000)
    elif model_name == 'elastic': model = MultiTaskElasticNetCV(random_state=0, alphas=alps, max_iter=10000,
                                                                  l1_ratio=np.arange(0.1, 1, 0.1))
    elif model_name == 'random_forest': model = RandomForestRegressor(max_depth=100, random_state=0)
    elif model_name == 'xgboost': model = xgb.XGBRegressor(eval_metric='rmse', random_state=0)
    
    #normally want to normalize y values for regression problem
    if norm_y: y_tr, y_val = normalize(y_tr), normalize(y_val)
    
    #now lets fit our model to our training data
    model.fit(x_tr, y_tr)
    
    #we'll predict our R2 score on only the validation data
    r2score = model.score(x_val, y_val)
    if print_vals: print(f'for a {model_name} model, the validation R2 score is: {r2score:2f}')
    if get_model: return model
    return r2score

In [None]:
get_mult_gate_df([gates[-1]])

(array([[1.e+02, 1.e-01, 3.e-01],
        [1.e+02, 1.e-01, 4.e-01],
        [1.e+02, 1.e-01, 5.e-01],
        ...,
        [9.e+02, 9.e+00, 7.e-01],
        [9.e+02, 9.e+00, 8.e-01],
        [9.e+02, 9.e+00, 9.e-01]]),
 array([[1.632233e-11, 1.977015e-12, 1.227493e-11],
        [1.053187e-11, 1.041346e-12, 6.330658e-12],
        [9.340901e-12, 8.290432e-13, 5.132249e-12],
        ...,
        [4.995822e-11, 8.932680e-12, 1.255144e-11],
        [5.445763e-11, 9.260474e-12, 1.234946e-11],
        [5.852438e-11, 9.273399e-12, 1.190102e-11]]))

In [None]:
def run_models(gate_types=['buffer','nand2', 'nor3', 'nor4', 'nor2'], random_state=0, print_vals=True):
    print(f'-----Training with {gate_types[:-1]} gate(s)-----')
    print(f'-----Validating with {gate_types[-1]} gate(s)-----')
    x_tr, y_tr = get_mult_gate_df(gate_types[:-1])
    x_val, y_val = get_mult_gate_df([gate_types[-1]])
    scores = []
    do_model = partial(do_simple_model, x_tr, y_tr,x_val, y_val, print_vals=print_vals)
    #do_model(model_name='lasso')
    #do_model(model_name='elastic')
    scores.append(do_model(model_name='ridge', norm_y=False))
    #scores.append(do_model(model_name='ridge-cv', norm_y=True))
    scores.append(do_model(model_name='lasso'))
    scores.append(do_model(model_name='elastic'))
    scores.append(do_model(model_name='random_forest', num_poly=False))
    scores.append(do_model(model_name='xgboost', num_poly=False))
    return scores
    print('\n')

In [None]:
#test run on just the buffer gate
_ = run_models(print_vals=True)

-----Training with ['buffer', 'nand2', 'nor3', 'nor4'] gate(s)-----
-----Validating with nor2 gate(s)-----
for a ridge model, the validation R2 score is: 0.560218
for a lasso model, the validation R2 score is: -6.008699
for a elastic model, the validation R2 score is: -5.997165
for a random_forest model, the validation R2 score is: -6.000715
for a xgboost model, the validation R2 score is: -6.001771


In [None]:
_ = run_models(print_vals=True, gate_types=['nor2', 'nor3', 'nor4'])

-----Training with ['nor2', 'nor3'] gate(s)-----
-----Validating with nor4 gate(s)-----
for a ridge model, the validation R2 score is: 0.510508
for a lasso model, the validation R2 score is: -0.094901
for a elastic model, the validation R2 score is: -0.089154
for a random_forest model, the validation R2 score is: -0.057613
for a xgboost model, the validation R2 score is: -0.056576


In [None]:
_ = run_models(print_vals=True, gate_types=['nor2', 'nor3', 'buffer', 'nor4'])

-----Training with ['nor2', 'nor3', 'buffer'] gate(s)-----
-----Validating with nor4 gate(s)-----
for a ridge model, the validation R2 score is: 0.176506
for a lasso model, the validation R2 score is: -13.249755
for a elastic model, the validation R2 score is: -13.232831
for a random_forest model, the validation R2 score is: -13.329106
for a xgboost model, the validation R2 score is: -13.302315


In [None]:
_ = run_models(print_vals=True, gate_types=['nor2', 'nor3', 'nor4', 'buffer'])

-----Training with ['nor2', 'nor3', 'nor4'] gate(s)-----
-----Validating with buffer gate(s)-----
for a ridge model, the validation R2 score is: -7.824065
for a lasso model, the validation R2 score is: -1.191767
for a elastic model, the validation R2 score is: -1.188271
for a random_forest model, the validation R2 score is: -1.145132
for a xgboost model, the validation R2 score is: -1.144619


In [None]:
_ = run_models(print_vals=True, gate_types=['nand2', 'nand3', 'nand4'])

-----Training with ['nand2', 'nand3'] gate(s)-----
-----Validating with nand4 gate(s)-----
for a ridge model, the validation R2 score is: 0.313560
for a lasso model, the validation R2 score is: 0.210744
for a elastic model, the validation R2 score is: 0.210168
for a random_forest model, the validation R2 score is: 0.242085
for a xgboost model, the validation R2 score is: 0.241073


In [None]:
#list(gate_dict.keys())

In [None]:
_ = run_models(print_vals=True, gate_types=list(gate_dict.keys()))

-----Training with ['buffer', 'inv_x1', 'inv_x2', 'inv_x4', 'nand2', 'nand3', 'nand4', 'nor2', 'nor3'] gate(s)-----
-----Validating with nor4 gate(s)-----
for a ridge model, the validation R2 score is: -2.759713
for a lasso model, the validation R2 score is: -38.186342
for a elastic model, the validation R2 score is: -38.178127
for a random_forest model, the validation R2 score is: -38.328649
for a xgboost model, the validation R2 score is: -38.329943
