In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor 
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import xgboost as xgb

In [None]:
df = pd.read_csv('data/run1.csv')
df.head(2)

Unnamed: 0,gate_type,voltage,thickness,clock_cycle,t_rise,t_fall,t_delay
0,buffer,0.3,0.1,100,3.131324e-12,3.17279e-12,9.921646e-12
1,buffer,0.4,0.1,100,8.473565e-13,1.456597e-12,7.269524e-12


In [None]:
#these are the bad t_fall values in nand4 that need to be dropped
nand4_drop_idxs = ((df['gate_type'] == 'nand4') & (df['t_fall'] == ' FAILED')).values
nand4_drop_idxs.sum()

36

In [None]:
df = df.loc[~nand4_drop_idxs].reset_index(drop=True)
df.shape

(11304, 7)

In [None]:
def get_gate(gate_type='buffer'): return df[df.gate_type == gate_type]

In [None]:
unique_gates = list(df.gate_type.unique())
unique_gates

['buffer',
 'inv_x1',
 'inv_x2',
 'inv_x4',
 'nand2',
 'nand3',
 'nand4',
 'nor2',
 'nor3',
 'nor4']

In [None]:
def get_gate_xy(gate_type='buffer'):
    y_drop = 't_fall' if gate_type=='buffer' else 't_rise'
    df = get_gate(gate_type)
    x_cols = ['clock_cycle', 'thickness', 'voltage']
    y_cols = ['t_delay', 't_rise', 't_fall']
    y_cols.remove(y_drop)
    x = np.vstack(df[x_cols].values).astype('float')
    y = np.vstack(np.vstack(df[y_cols].values)).astype('float')
    return x, y

In [None]:
x, y = get_gate_xy('nor3')
x.shape, y.shape

((1134, 3), (1134, 2))

In [None]:
gate_dict = dict.fromkeys(unique_gates)
gate_dict

{'buffer': None,
 'inv_x1': None,
 'inv_x2': None,
 'inv_x4': None,
 'nand2': None,
 'nand3': None,
 'nand4': None,
 'nor2': None,
 'nor3': None,
 'nor4': None}

In [None]:
for gate in unique_gates: 
    gate_dict[gate] = get_gate_xy(gate_type=gate)

In [None]:
def ridge_reg(x_train, y_train, x_valid, y_valid):
    print('Doing Ridge Regression.....')
    ridge = Ridge(alpha = 1)
    ridge.fit(x_train, y_train)
    print('training score = ', ridge.score(x_train, y_train))
    y_pred_ridge = ridge.predict(x_valid)
    mse_ridge = mean_squared_error(y_valid, y_pred_ridge) 
    print('test score =', ridge.score(x_valid, y_valid)) 

def do_rf(x_train, y_train, x_valid, y_valid):
    print('ndoing RF.....')
    RF = RandomForestRegressor(max_depth=100, random_state=0)
    RF.fit(x_train, y_train)
    print('trainig score = ', RF.score(x_train, y_train))
    y_pred = RF.predict(x_valid)
    mse = mean_squared_error(y_valid, y_pred)
    r2 = r2_score(y_valid, y_pred) # coefficient of determination
    print('test score =', RF.score(x_valid, y_valid))
    print(f'the mse is {mse}')
    print(f"the r2 score is: {r2}")
    return y_pred


def do_xgb(x_train, y_train, x_valid, y_valid):
    print('nboosting the trees.....')
    regressor=xgb.XGBRegressor(eval_metric='rmse')
    regressor.fit(x_train, y_train)
    train_score = regressor.score(x_train, y_train) ** 2 #bc using RMSE
    y_pred = regressor.predict(x_valid)
    mse = mean_squared_error(y_valid, y_pred)
    r2 = r2_score(y_valid, y_pred) # coefficient of determination
    valid_score = regressor.score(x_valid, y_valid)

    print(f'the training score is {train_score:3f}')    
    print(f"the validation score is {valid_score:3f}")
    print(f'the mse is {mse:3f}')
    print(f"the r2 score is: {r2:3f}")
    return y_pred



def train_models(gate_type='buffer', random_state=0, normalize_y=True):
    print(f'-----Analyzing {gate_type} gates-----\n')
    x, y = gate_dict[gate_type]
    x_train, x_valid, y_train, y_valid = train_test_split(x, y, 
                                                        random_state=random_state, 
                                                        test_size=0.1)    
    ridge_reg(x_train, y_train, x_valid, y_valid)
    if normalize_y: 
        y_train = y_train / y_train.min()
        y_valid = y_valid / y_valid.min()
    y_pred = do_rf(x_train, y_train, x_valid, y_valid)
    _ = do_xgb(x_train, y_train, x_valid, y_valid)
    print('\n\n')
    #return y_pred

In [None]:
x, y = gate_dict['buffer']
x_tr, x_val, y_tr, y_val = train_test_split(x,y)
x_tr.shape, x_val.shape, y_tr.shape, y_val.shape

((850, 3), (284, 3), (850, 2), (284, 2))

In [None]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import ElasticNet, LassoCV, MultiTaskLassoCV, MultiTaskElasticNetCV
from sklearn import linear_model

In [None]:
PolynomialFeatures(2).fit_transform(x).shape

(1134, 10)

In [None]:
#without polynomial features.....
regr = ElasticNet()
regr.fit(x_tr,y_tr);
#print(regr.coef_)
print(regr.score(x_val,y_val))

-0.0036199686533131548


In [None]:
#with polynomial features.....
regr = ElasticNet()
poly_x = PolynomialFeatures(2).fit_transform(x_tr)
regr.fit(poly_x, y_tr);
poly_x_val = PolynomialFeatures(2).fit_transform(x_val)
print(regr.score(poly_x_val, y_val))

-0.0036199686533131548


In [None]:
#lasso without polynomial features
clf = linear_model.Lasso(alpha=0.1)
clf.fit(x_tr,y_tr)
print(clf.score(x_val, y_val))

-0.0036199686533131548


In [None]:
#lasso with polynomial features
clf = linear_model.Lasso(alpha=0.1)
poly_x_tr = PolynomialFeatures(4).fit_transform(x_tr)
clf.fit(poly_x_tr, y_tr)
poly_x_val = PolynomialFeatures(4).fit_transform(x_val)
print(clf.score(poly_x_val, y_val))

0.46008202167999784


In [None]:
#Ridge without polynomial features
clf = Ridge(alpha=1.0)
clf.fit(x_tr,y_tr);
clf.score(x_val,y_val)

0.6831539257356407

In [None]:
#Ridge with polynomial features
clf = Ridge(alpha=1.0)
clf.fit(x_tr,y_tr);
clf.score(x_val,y_val)

In [None]:
do_simple_model(x,y, 'random_forest', num_poly=None, norm_y=True)

for a random_forest model, the validation score is: 1.0


In [None]:
do_simple_model(x,y, 'xgboost', num_poly=None, norm_y=True)

for a xgboost model, the validation score is: 0.9999995886556486


In [None]:
def normalize(x): return x / x.min()

def do_simple_model(x, y, model_name='ridge', num_poly=2, alpha=1.0, norm_y=False):
    x_tr, x_val, y_tr, y_val = train_test_split(x,y)
    if num_poly:
        x_tr = PolynomialFeatures(num_poly).fit_transform(x_tr)
        x_val = PolynomialFeatures(num_poly).fit_transform(x_val)
    if model_name == 'ridge': model = Ridge(alpha=alpha)
    elif model_name == 'lasso': model = linear_model.Lasso(alpha=alpha)
    elif model_name == 'elastic': model = ElasticNet()
    elif model_name == 'lasso-cv': model = MultiTaskLassoCV(cv=5, random_state=0)
    elif model_name == 'elastic-cv': model = MultiTaskElasticNetCV(cv=5, random_state=0)
    elif model_name == 'random_forest': model = RandomForestRegressor(max_depth=100, random_state=0)
    elif model_name == 'xgboost': model = xgb.XGBRegressor(eval_metric='rmse')
    if norm_y: 
        y_tr, y_val = normalize(y_tr), normalize(y_val)
    model.fit(x_tr, y_tr)
    r2score = model.score(x_val, y_val)
    print(f'for a {model_name} model, the validation score is: {r2score}')

In [None]:
def run_models(gate_type='buffer', random_state=0, normalize_y=True):
    print(f'-----Analyzing {gate_type} gates-----')
    x, y = gate_dict[gate_type]
    do_simple_model(x, y, model_name='ridge')
    #do_simple_model(x, y, model_name='lasso')
    #do_simple_model(x, y, model_name='elastic')
    do_simple_model(x, y, model_name='lasso-cv')
    do_simple_model(x, y, model_name='elastic-cv')
    do_simple_model(x, y, model_name='random_forest', norm_y=True, num_poly=0)
    do_simple_model(x, y, model_name='xgboost', norm_y=True)
    print('\n')

In [None]:
run_models()

-----Analyzing buffer gates-----
for a ridge model, the validation score is: 0.7977589152008588
for a lasso-cv model, the validation score is: 0.5943788405574078
for a elastic-cv model, the validation score is: 0.6307733504389321
for a random_forest model, the validation score is: 1.0
for a xgboost model, the validation score is: 0.9999999870819181




In [None]:
for gate_type in gate_dict.keys(): run_models(gate_type, random_state=123)

-----Analyzing buffer gates-----
for a ridge model, the validation score is: 0.7995587181468095


-----Analyzing inv_x1 gates-----
for a ridge model, the validation score is: 0.8730475028787507


-----Analyzing inv_x2 gates-----
for a ridge model, the validation score is: 0.8643724159480674


-----Analyzing inv_x4 gates-----
for a ridge model, the validation score is: 0.8545830770987946


-----Analyzing nand2 gates-----
for a ridge model, the validation score is: 0.873350514053701


-----Analyzing nand3 gates-----
for a ridge model, the validation score is: 0.9258983518621478


-----Analyzing nand4 gates-----
for a ridge model, the validation score is: 0.8612734166190821


-----Analyzing nor2 gates-----
for a ridge model, the validation score is: 0.831350407683912


-----Analyzing nor3 gates-----
for a ridge model, the validation score is: 0.8343835830053519


-----Analyzing nor4 gates-----
for a ridge model, the validation score is: 0.8248249782199548




In [None]:
#ridge regression without polynomial factors
do_simple_model(x,y, num_poly=None)

0.682792060680128

In [None]:
#ridge regression with polynomial factors
do_simple_model(x,y, num_poly=2)

0.8114806177882217

In [None]:
#lasso without polynonmial
do_simple_model(x,y, model='lasso', num_poly=None, alpha=0.1)

-0.005667047663951097

In [None]:
#lasso with polynonmial
do_simple_model(x,y, model='lasso', alpha=0.1, num_poly=4)

0.43311576254961603

In [None]:
#lassoCV with polynonmial
do_simple_model(x,y, model='lassocv', alpha=0.1, num_poly=2)

0.599422765667379

In [None]:
#elastic without polynonmial
do_simple_model(x,y, model='elastic', num_poly=None)

-0.0049294150336509635

In [None]:
#elastic with polynonmial
do_simple_model(x,y, model='elastic', num_poly=2)

-0.005392408948376115

In [None]:
#elasticCV with polynonmial
do_simple_model(x,y, model='elastic-cv', num_poly=2)

0.6408125065163138

It looks as like Lasso, Ridge and Elasticnet are all performing around the same. None are approaching the accuracy of the Random Forest or XgBoost models (as seen below). Need to do parameter search to see optimal values 

In [None]:
def do_simple_model(x, y, model='ridge', num_poly=2, alpha=1.0):
    x_tr, x_val, y_tr, y_val = train_test_split(x,y)
    if num_poly:
        x_tr = PolynomialFeatures(num_poly).fit_transform(x_tr)
        x_val = PolynomialFeatures(num_poly).fit_transform(x_val)
    if model == 'ridge': model = Ridge(alpha=alpha)
    elif model == 'lasso': model = linear_model.Lasso(alpha=alpha)
    elif model == 'elastic': model = ElasticNet()
    elif model == 'lasso-cv': model = MultiTaskLassoCV(cv=5, random_state=0)
    elif model == 'elastic-cv': model = MultiTaskElasticNetCV(cv=5, random_state=0)
    model.fit(x_tr, y_tr)
    return model.score(x_val, y_val)

In [None]:
for gate_type in gate_dict.keys(): train_models(gate_type, random_state=123)

-----Analyzing buffer gates-----

Doing Ridge Regression.....
training score =  0.6868883807464006
test score = 0.7054164133458096


doing RF.....
trainig score =  1.0
test score = 1.0
the mse is 1.5906988750755812e-27
the r2 score is: 1.0


boosting the trees.....
the training score is 0.999999
the validation score is 1.000000
the mse is 0.000005
the r2 score is: 1.000000



-----Analyzing inv_x1 gates-----

Doing Ridge Regression.....
training score =  0.7802399691292567
test score = 0.7843436443699999


doing RF.....
trainig score =  1.0
test score = 1.0
the mse is 1.565594803982376e-28
the r2 score is: 1.0


boosting the trees.....
the training score is 1.000000
the validation score is 1.000000
the mse is 0.000005
the r2 score is: 1.000000



-----Analyzing inv_x2 gates-----

Doing Ridge Regression.....
training score =  0.7799249311934082
test score = 0.7837168811482245


doing RF.....
trainig score =  1.0
test score = 1.0
the mse is 1.7518788573822228e-28
the r2 score is: 1.0


b

All of the R2 scores for Random forests and Gradient boosted trees are showing 1 and the training and validation scores are all perfect......