In [74]:
%run functions.py

# ES

In [75]:
# load data
benchmarks = ('adpcm_encoder', 'average', 'fir')
asic_45, fpga_v4, fpga_v5 = load_data_all(benchmarks, load_fpga_v5=False)

In [76]:
# combine attr and latency
for i in benchmarks:
    asic_45[i] = combine_attr_and_latency(asic_45[i])
    fpga_v4[i] = combine_attr_and_latency(fpga_v4[i])

In [77]:
# merge features and labels, remove items with mismatched latency
data_v4 = {}
for i in benchmarks:
    data_v4[i] = concatenate_filter_data(asic_45[i], fpga_v4[i])

In [78]:
# select features based on correlations
features = select_features(data_v4, benchmarks)
features.append('Slices')

In [79]:
# data for machine learning
training_set_v4 = {}
for i in benchmarks:
    training_set_v4[i] = data_v4[i][features]

In [80]:
# data for training and testing
from sklearn.preprocessing import StandardScaler
X_v4 = {}
y_v4 = {}
X_v4_scaled = {}
scaler = StandardScaler()
for i in benchmarks:
    X_v4[i] = training_set_v4[i][features[:-1]]
    X_v4_scaled[i] = scaler.fit_transform(X_v4[i])
    y_v4[i] = training_set_v4[i][features[-1]]

In [87]:
# feature selection: recursive feature elimination
# https://machinelearningmastery.com/feature-selection-in-python-with-scikit-learn/
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
rfe = RFE(model, 4)
rfe = rfe.fit(X_v4_scaled['adpcm_encoder'], y_v4['adpcm_encoder'])

In [88]:
rfe.support_

array([False, False,  True, False, False, False,  True, False, False,
        True, False,  True, False], dtype=bool)

In [89]:
rfe.ranking_

array([10,  4,  1,  9,  7,  8,  1,  2,  5,  1,  3,  1,  6])

In [90]:
np.array(features[:-1])[rfe.support_]

array(['pin_pair', 'FU', 'AREA', 'net'], 
      dtype='<U8')

In [85]:
model.fit(X_v4_scaled['adpcm_encoder'], y_v4['adpcm_encoder'])

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)

In [86]:
# feature selection: feature importance
model.feature_importances_

array([ 0.  ,  0.  ,  0.21,  0.  ,  0.  ,  0.  ,  0.36,  0.01,  0.  ,
        0.19,  0.01,  0.23,  0.  ])