In [2]:
%run ml_header.py

In [3]:
%run get_data.py
benchmarks = ('adpcm_encoder', 'aes', 'ann', 'average', 'decimation', 'fft_fixed', 'fir',
              'idct', 'interpolation', 'kasumi', 'qsort', 'snow3g', 'sobel')  # 13
gd = GetData('data/ASIC-2-FPGA', benchmarks)
gd.main()

In [15]:
%run methods.py

In [5]:
%run machine_learning.py

In [6]:
import itertools

In [None]:
list(itertools.combinations(benchmarks, 12))

In [7]:
features = ['AREA', 'state', 'FU', 'REG', 'MUX', 'DEC', 'pin_pair',
            'net', 'max', 'min', 'ave', 'MISC', 'MEM', 'sim', 'Pmax',
            'Pmin', 'Pave', 'Latency', 'BlockMemoryBit', 'DSP', 'Slices']
invalid_features = ['Slices', 'Latency']
valid_features = [i for i in features if i not in invalid_features]
label = 'Slices'

In [8]:
def execute(df, estimator, kind):
    # remove attribute columns
    data = df[features].copy()
    # fix missing data
    data = ML.fix_missing_data(data)
    # X, y
    X, y = ML.separate_feature_label(data, valid_features=valid_features, label=label)
    # feature scaling
    X = ML.feature_scaling(X)
    if kind == 'training':
        estimator.fit(X, y)
        return estimator
    elif kind == 'testing':
        y_pred = estimator.predict(X)
        data['AREA'] = y_pred
        return DirectMapping.main(data)

In [9]:
estimator = execute(gd.data_v4['adpcm_encoder'], estimators[0], kind='training')
scores = execute(gd.data_v4['average'], estimator, kind='testing')
scores

{'adrs_ave': 0.0,
 'adrs_ave_rms': 0.0,
 'adrs_max': 0.0,
 'adrs_max_rms': 0.0,
 'cardinality': 3,
 'dominance': 1.0,
 'hypervolume': 0.43148731064332085}

In [10]:
estimator.coef_

array([  123.3893,   -19.6479,   120.7541,    18.8534,    -5.8628,
          -0.    ,  1292.9906, -1136.0835,   -19.6479,   -19.6479,
         -19.6479,    83.4866,     0.    ,     0.    ,     0.    ,
           0.    ,     0.    ,     0.    ,     0.    ])

In [16]:
%%time
scores_1 = dict()
for benchmarks_train in itertools.combinations(benchmarks, 1):
    benchmarks_test = (i for i in benchmarks if i not in benchmarks_train)
    data_train = pd.concat([gd.data_v4[i] for i in benchmarks_train], axis=0, ignore_index=True)
    estimator = execute(data_train, estimators[0], kind='training')
    key = ';'.join(list(benchmarks_train))
    scores_1[key] = dict()
    for benchmark_test in benchmarks_test:
        scores_1[key][benchmark_test] = execute(gd.data_v4[benchmark_test], estimator, kind='testing')

Wall time: 20.6 s


In [17]:
scores_1

{'adpcm_encoder': {'aes': {'adrs_ave': 0.91120011378505994,
   'adrs_ave_rms': 0.87078676873416361,
   'adrs_max': 2.3922580645161289,
   'adrs_max_rms': 2.2585654010746232,
   'cardinality': 3,
   'dominance': 0.4,
   'hypervolume': 0.31647048221712692},
  'ann': {'adrs_ave': 0.038912742469853462,
   'adrs_ave_rms': 0.17391771356364275,
   'adrs_max': 0.098073555166374782,
   'adrs_max_rms': 0.74437464757074134,
   'cardinality': 7,
   'dominance': 0.3333333333333333,
   'hypervolume': 0.68937674375390501},
  'average': {'adrs_ave': 0.0,
   'adrs_ave_rms': 0.0,
   'adrs_max': 0.0,
   'adrs_max_rms': 0.0,
   'cardinality': 3,
   'dominance': 1.0,
   'hypervolume': 0.43148731064332085},
  'decimation': {'adrs_ave': 0.01003851791253366,
   'adrs_ave_rms': 0.016181873625394656,
   'adrs_max': 0.055118110236220472,
   'adrs_max_rms': 0.057173852376087728,
   'cardinality': 7,
   'dominance': 0.7142857142857143,
   'hypervolume': 0.98913252089785209},
  'fft_fixed': {'adrs_ave': 0.117629523

In [18]:
%%time
scores_2 = dict()
for benchmarks_train in itertools.combinations(benchmarks, 2):
    benchmarks_test = (i for i in benchmarks if i not in benchmarks_train)
    data_train = pd.concat([gd.data_v4[i] for i in benchmarks_train], axis=0, ignore_index=True)
    estimator = execute(data_train, estimators[0], kind='training')
    key = ';'.join(list(benchmarks_train))
    scores_2[key] = dict()
    for benchmark_test in benchmarks_test:
        scores_2[key][benchmark_test] = execute(gd.data_v4[benchmark_test], estimator, kind='testing')

Wall time: 1min 54s


In [19]:
scores_2.keys()

dict_keys(['adpcm_encoder;aes', 'adpcm_encoder;ann', 'adpcm_encoder;average', 'adpcm_encoder;decimation', 'adpcm_encoder;fft_fixed', 'adpcm_encoder;fir', 'adpcm_encoder;idct', 'adpcm_encoder;interpolation', 'adpcm_encoder;kasumi', 'adpcm_encoder;qsort', 'adpcm_encoder;snow3g', 'adpcm_encoder;sobel', 'aes;ann', 'aes;average', 'aes;decimation', 'aes;fft_fixed', 'aes;fir', 'aes;idct', 'aes;interpolation', 'aes;kasumi', 'aes;qsort', 'aes;snow3g', 'aes;sobel', 'ann;average', 'ann;decimation', 'ann;fft_fixed', 'ann;fir', 'ann;idct', 'ann;interpolation', 'ann;kasumi', 'ann;qsort', 'ann;snow3g', 'ann;sobel', 'average;decimation', 'average;fft_fixed', 'average;fir', 'average;idct', 'average;interpolation', 'average;kasumi', 'average;qsort', 'average;snow3g', 'average;sobel', 'decimation;fft_fixed', 'decimation;fir', 'decimation;idct', 'decimation;interpolation', 'decimation;kasumi', 'decimation;qsort', 'decimation;snow3g', 'decimation;sobel', 'fft_fixed;fir', 'fft_fixed;idct', 'fft_fixed;interp

In [20]:
scores_2['adpcm_encoder;aes']

{'ann': {'adrs_ave': 0.014193121644619354,
  'adrs_ave_rms': 0.072782421655973761,
  'adrs_max': 0.060034305317324184,
  'adrs_max_rms': 0.54334825022020627,
  'cardinality': 10,
  'dominance': 0.5555555555555556,
  'hypervolume': 0.76776671464384405},
 'average': {'adrs_ave': 0.0,
  'adrs_ave_rms': 0.0,
  'adrs_max': 0.0,
  'adrs_max_rms': 0.0,
  'cardinality': 1,
  'dominance': 1.0,
  'hypervolume': 1.0},
 'decimation': {'adrs_ave': 0.010786461068853781,
  'adrs_ave_rms': 0.017426566165660348,
  'adrs_max': 0.055118110236220472,
  'adrs_max_rms': 0.057173852376087728,
  'cardinality': 5,
  'dominance': 0.5714285714285714,
  'hypervolume': 0.99788639751106534},
 'fft_fixed': {'adrs_ave': 0.092572712522141165,
  'adrs_ave_rms': 0.043509585396115553,
  'adrs_max': 0.16666666666666666,
  'adrs_max_rms': 0.099705243467196558,
  'cardinality': 6,
  'dominance': 0.0,
  'hypervolume': 0.78191700106956286},
 'fir': {'adrs_ave': 0.0,
  'adrs_ave_rms': 0.0,
  'adrs_max': 0.0,
  'adrs_max_rms': 

In [21]:
%%time
scores_v4 = dict()
for benchmarks_cnt in range(1, len(benchmarks)):
    scores = dict()
    for benchmarks_train in itertools.combinations(benchmarks, benchmarks_cnt):
        benchmarks_test = (i for i in benchmarks if i not in benchmarks_train)
        data_train = pd.concat([gd.data_v4[i] for i in benchmarks_train], axis=0, ignore_index=True)
        estimator = execute(data_train, estimators[0], kind='training')
        key = ';'.join(list(benchmarks_train))
        scores[key] = dict()
        for benchmark_test in benchmarks_test:
            scores[key][benchmark_test] = execute(gd.data_v4[benchmark_test], estimator, kind='testing')
    scores_v4[str(benchmarks_cnt)] = scores

Wall time: 2h 3min 44s
