# GA

In [1]:
import pandas as pd
import numpy as np

In [11]:
def load_data_single(file_dir):
    asic_45 = pd.read_csv(file_dir + 'asic-45.csv', thousands=',')
    fpga_v4 = pd.read_csv(file_dir + 'fpga-v4.csv', thousands=',')
    fpga_v5 = pd.read_csv(file_dir + 'fpga-v5.csv', thousands=',')
    return asic_45, fpga_v4, fpga_v5

In [14]:
benchmarks = ('average', 'decimation', 'fir', 'interpolation',
              'qsort', 'snow3g', 'sobel')
def load_data_all(benchmarks):
    asic_45, fpga_v4, fpga_v5 = {}, {}, {}
    for i in benchmarks:
        file_dir = 'data/GA/{}/'.format(i)
        asic_45[i], fpga_v4[i], fpga_v5[i] = load_data_single(file_dir)
    return asic_45, fpga_v4, fpga_v5

In [15]:
asic_45, fpga_v4, fpga_v5 = load_data_all(benchmarks)

In [23]:
asic_45['fir'].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 589 entries, 0 to 588
Data columns (total 28 columns):
AREA              589 non-null int64
state             589 non-null int64
FU                589 non-null int64
REG               589 non-null int64
MUX               589 non-null int64
DEC               589 non-null int64
pin_pair          589 non-null int64
net               589 non-null int64
max               589 non-null int64
min               589 non-null int64
ave               589 non-null float64
MISC              589 non-null int64
MEM               589 non-null object
CP_delay          589 non-null object
sim               589 non-null object
Pmax              589 non-null object
Pmin              589 non-null object
Pave              589 non-null object
Latency           589 non-null int64
BlockMemoryBit    589 non-null object
DSP               589 non-null object
FU.1              589 non-null int64
ATTR              589 non-null object
ARRAY_1           589 non-null ob

In [26]:
asic_45['decimation']['Attr'][1]

'ATTR=Cyber;ARRAY_1=array=RAM;ARRAY_2=array=RAM;ARRAY_3=array=REG;ARRAY_4=array=RAM;ARRAY_5=array=RAM;ARRAY_6=array=REG;ARRAY_7=array=REG;ARRAY_8=array=RAM;ARRAY_9=array=REG;ARRAY_10=array=RAM;LOOP_1=unroll_times=all;LOOP_2=folding=1;LOOP_3=unroll_times=0;LOOP_4=folding=1;LOOP_5=unroll_times=0;LOOP_6=folding=1;LOOP_7=unroll_times=all;LOOP_8=unroll_times=0;LOOP_9=folding=0;LOOP_10=folding=1;LOOP_11=folding=0;LOOP_12=folding=0;LOOP_13=folding=0;LOOP_14=unroll_times=all;LOOP_15=folding=1;-50'

# ES

In [28]:
def load_data_single(file_dir):
    asic_45 = pd.read_csv(file_dir + 'asic-45.csv', thousands=',')
    fpga_v4 = pd.read_csv(file_dir + 'fpga-v4.csv', thousands=',')
#     fpga_v5 = pd.read_csv(file_dir + 'fpga-v5.csv', thousands=',')
    return asic_45, fpga_v4

In [29]:
benchmarks = ('adpcm_encoder', 'average', 'fir')
def load_data_all(benchmarks):
    asic_45, fpga_v4= {}, {}
    for i in benchmarks:
        file_dir = 'data/ES/{}/'.format(i)
        asic_45[i], fpga_v4[i] = load_data_single(file_dir)
    return asic_45, fpga_v4

asic_45, fpga_v4 = load_data_all(benchmarks)

In [41]:
# combine Attr and Latency
def combine_attr_and_latency(df):
    for i in benchmarks:
        df[i]['Attr_with_L'] = df[i]['Attr'] + ';' + df[i]['Latency'].astype(str)
    return df

asic_45 = combine_attr_and_latency(asic_45)
fpga_v4 = combine_attr_and_latency(fpga_v4)

In [86]:
# remove items with mismatched latency
# fpga_v4_latency_match = {}
data_v4 = {}
for i in benchmarks:
    fpga_v4_latency_match = asic_45[i]['Attr_with_L'] == fpga_v4[i]['Attr_with_L']
    data_v4[i] = pd.concat([asic_45[i], fpga_v4[i][['AREA']].rename(columns={'AREA': 'Slices'})], axis=1)
    data_v4[i] = data_v4[i][fpga_v4_latency_match]

In [92]:
for i in benchmarks:
    correlation_with_slices = data_v4[i].corr()['Slices'].sort_values(ascending=False)
    print(f'[{i}]')
    print(correlation_with_slices[correlation_with_slices > 0])
    print()

[adpcm_encoder]
Slices      1.000000
AREA        0.983713
pin_pair    0.963431
net         0.950855
FU          0.948724
MUX         0.470998
FU.1        0.302408
Name: Slices, dtype: float64

[average]
Slices      1.000000
MUX         0.942432
pin_pair    0.930450
AREA        0.865904
net         0.853217
REG         0.475001
FU          0.458792
Latency     0.029155
Name: Slices, dtype: float64

[fir]
Slices      1.000000
MUX         0.968012
pin_pair    0.678254
REG         0.566631
net         0.555997
AREA        0.532947
MISC        0.445326
state       0.192941
ave         0.183031
min         0.183031
max         0.183031
FU          0.170666
DEC         0.165440
Latency     0.145600
FU.1        0.006865
Name: Slices, dtype: float64



In [93]:
# data for machine learning
training_set_v4 = {}
attributes = ['Slices', 'AREA', 'MUX', 'pin_pair', 'net', 'FU', 'REG', 'MISC', 'state', 'DEC']
for i in benchmarks:
    training_set_v4[i] = data_v4[i][attributes]

In [94]:
# data for training and testing
from sklearn.preprocessing import StandardScaler
X_v4 = {}
y_v4 = {}
X_v4_scaled = {}
scaler = StandardScaler()
for i in benchmarks:
    X_v4[i] = training_set_v4[i][attributes[1:]]
    X_v4_scaled[i] = scaler.fit_transform(X_v4[i])
    y_v4[i] = training_set_v4[i][attributes[0]]

In [95]:
X_v4_scaled['adpcm_encoder']

array([[-1.0142756 ,  0.05718519, -0.9868265 , ..., -0.36152052,
        -0.17034365,  0.        ],
       [-0.86375118, -0.01673104, -0.88346255, ..., -0.65550424,
        -0.5020655 ,  0.        ],
       [-0.86375118, -0.01673104, -0.88346255, ..., -0.65550424,
        -0.5020655 ,  0.        ],
       ..., 
       [-0.90829412,  0.27893389, -0.90433098, ..., -0.0675368 ,
        -0.17034365,  0.        ],
       [-0.79232887, -0.0352101 , -0.87433261, ..., -0.14103273,
        -0.17034365,  0.        ],
       [-0.79232887, -0.0352101 , -0.87433261, ..., -0.14103273,
        -0.17034365,  0.        ]])

In [96]:
# feature selection: recursive feature elimination
# https://machinelearningmastery.com/feature-selection-in-python-with-scikit-learn/
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
rfe = RFE(model, 3)
rfe = rfe.fit(X_v4_scaled['adpcm_encoder'], y_v4['adpcm_encoder'])

In [97]:
rfe.support_

array([ True, False,  True,  True, False, False, False, False, False], dtype=bool)

In [98]:
rfe.ranking_

array([1, 4, 1, 1, 2, 6, 3, 5, 7])

In [99]:
model.fit(X_v4_scaled['adpcm_encoder'], y_v4['adpcm_encoder'])

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)

In [100]:
# feature selection: feature importance
model.feature_importances_

array([ 0.44722617,  0.00482289,  0.13520439,  0.11272506,  0.289089  ,
        0.00050145,  0.00902325,  0.00140778,  0.        ])