In [14]:
%run functions.py

# ES

In [15]:
# load data
benchmarks = ('adpcm_encoder', 'average', 'fir')
# asic_45, fpga_v4, fpga_v5 = load_data_all(benchmarks, load_fpga_v5=False)
gd = GetData('data/ES', benchmarks, load_fpga_v5=False)

In [16]:
gd.main()

In [None]:
# combine attr and latency
for i in benchmarks:
    asic_45[i] = combine_attr_and_latency(asic_45[i])
    fpga_v4[i] = combine_attr_and_latency(fpga_v4[i])

In [None]:
# merge features and labels, remove items with mismatched latency
data_v4 = {}
for i in benchmarks:
    data_v4[i] = concatenate_filter_data(asic_45[i], fpga_v4[i])

In [None]:
# feature selection: recursive feature elimination
# https://machinelearningmastery.com/feature-selection-in-python-with-scikit-learn/
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
rfe = RFE(model, 4)
rfe = rfe.fit(X_v4_scaled['adpcm_encoder'], y_v4['adpcm_encoder'])

In [None]:
rfe.support_

In [None]:
rfe.ranking_

In [None]:
# feature selection: feature importance
model.feature_importances_

## Hands-On ML Chapter 2

In [None]:
benchmark_training = 'adpcm_encoder'

In [None]:
data_train = data_v4[benchmark_training]
data_train['Latency'].hist(bins=50)
plt.show()

In [None]:
for i in benchmarks:
    print(i)
    print(direct_mapping(data_v4[i]))

In [None]:
for i in benchmarks:
    print(i)
    visulize_trade_off(data_v4[i])
    plt.show()

In [None]:
# split data
strat_train_set, strat_test_set = stratify_split_data(data_train, data_train['Latency'])

In [None]:
corr_matrix = strat_train_set.corr()
corr_matrix['Slices'].sort_values(ascending=False)

In [None]:
X_train = strat_train_set.drop(['Latency'], axis=1)
y_train = strat_train_set['Slices'].copy()

In [None]:
# select features
features = select_features(X_train, 'Slices', 0)
features

In [None]:
X_train

In [None]:
# deal with missing values
# from sklearn.preprocessing import Imputer
# imputer = Imputer(strategy='median')

# feature_num = list(corr_matrix.keys())
# feature_num.remove('Slices')
# feature_num.remove('DEC')
# X_train_num = X_train[feature_num]
# imputer.fit(X_train_num)

# X_train_num_transformed = imputer.transform(X_train_num)

# X_train_num_tr = pd.DataFrame(X_train_num_transformed, columns=X_train_num.columns)

In [None]:
# feature scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_num_tr_scaled = scaler.fit_transform(X_train[features])

In [None]:
# Or use pipeline for the above preprocessing
# from sklearn.pipeline import Pipeline
# num_pipeline = Pipeline([
#     ('imputer', Imputer(strategy='median')),
#     ('std_scaler', StandardScaler()),
# ])
# X_train_num_tr_scaled = num_pipeline.fit_transform(X_train_num)

In [None]:
X_train_prepared = X_train_num_tr_scaled.copy()

In [None]:
# select and train a model
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X_train_prepared, y_train)
measure_rmse(lin_reg, X_train_prepared, y_train)

In [None]:
from sklearn.tree import DecisionTreeRegressor
tree_reg = DecisionTreeRegressor()
tree_reg.fit(X_train_prepared, y_train)
measure_rmse(tree_reg, X_train_prepared, y_train)

In [None]:
scores_cv = measure_cv(lin_reg, X_train_prepared, y_train)

In [None]:
scores_cv = measure_cv(tree_reg, X_train_prepared, y_train)

In [None]:
from sklearn.ensemble import RandomForestRegressor
forest_reg = RandomForestRegressor()
scores_cv = measure_cv(forest_reg, X_train_prepared, y_train)

In [None]:
forest_reg.fit(X_train_prepared, y_train)
measure_rmse(forest_reg, X_train_prepared, y_train)
# forest_reg.feature_importances_

In [None]:
# fine tune the model
from sklearn.model_selection import GridSearchCV
param_grid = [
    {'n_estimators': [3, 10, 30], 'max_features': [2, 4, 6]},
    {'bootstrap': [False], 'n_estimators': [3, 10], 'max_features': [2, 3, 4]},
  ]
grid_search = GridSearchCV(forest_reg, param_grid, cv=5,
                           scoring='neg_mean_squared_error')
grid_search.fit(X_train_prepared, y_train)

In [None]:
grid_search.best_params_

In [None]:
measure_rmse(grid_search.best_estimator_, X_train_prepared, y_train)

In [None]:
model = train_one_benchmark(data_train)

In [None]:
# testing
X_test = strat_test_set[features].copy()
y_test = strat_test_set['Slices'].copy()

In [None]:
X_test_prepared = scaler.fit_transform(X_test)

In [None]:
measure_rmse(model, X_test_prepared, y_test)

In [None]:
X_test = data_v4['adpcm_encoder'][features].copy()
y_test = data_v4['adpcm_encoder']['Slices'].copy()

In [None]:
X_test_prepared = scaler.fit_transform(X_test)

In [None]:
print(y_test.median())
print(measure_rmse(forest_reg, X_test_prepared, y_test))