In [1]:
import pandas as pd
from sklearn import datasets, linear_model
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
import numpy as np
import numbers

In [2]:
NNLS = 0
OLS = 1

In [3]:
def get_mode_name(mode):
    '''Return name of the current mode.'''
    return ('OLS' if i == OLS else 'NNLS')

def get_file_label(mode):
    '''Return filesystem-friendly name of the regressand field.'''
    return ('Energy' if 'Energy' in mode else 'Cycles')

def display_scores(scores):
    print("Scores:", scores)
    print("Mean:", scores.mean())
    print("Standard deviation:", scores.std())

In [4]:
df = pd.read_csv("stats_and_modeling/COMBINED/48/1/0/data-HW-and-TL.csv")

In [5]:
param_list = ['Executed insns (no MULS)', 'MULS insns', 'Taken branches', 'RAM data reads', 'RAM writes', 'Flash data reads', 'Flash insn reads', 'BL insns', 'PUSH/POP PC/LR']

In [6]:
target_column = 'HW Cycles (IT adjusted)'

In [7]:
adjust = 1.0

In [8]:
round = 5

In [9]:
y = df.loc[:,target_column].values
print(y)

[8.73163200e+07 1.07736480e+08 5.18880000e+05 4.00933000e+08
 4.85280000e+07 3.06888000e+07 1.96536000e+07 1.10808000e+07
 4.73133600e+08 1.10771520e+08 1.26065760e+08 9.85972800e+07
 4.35984048e+09 7.06290000e+09 7.85467200e+07 4.79568000e+06
 3.00807840e+08 4.88630000e+07 3.05136000e+06 2.33052370e+10
 1.21296000e+07 1.71552000e+06 2.04345024e+09 1.25347008e+09
 3.46752000e+06 1.62528000e+06 3.28766400e+07 9.60000000e+05
 4.51644000e+08 1.91541600e+08 1.68254208e+09 9.10933920e+08
 4.88007552e+09 9.38337600e+07 2.73515000e+10 3.21152160e+08
 1.21051680e+08 2.43754000e+07 2.34907200e+07 5.86128000e+06
 5.53805000e+07 7.66747200e+07 5.76350400e+07 1.16400000e+06
 1.14739200e+07 3.31665600e+07 5.44980624e+09 2.33492000e+08
 4.98379536e+09 2.41238400e+07 1.53757440e+08 1.17700800e+07
 7.48320480e+09 8.34768000e+06 8.59372800e+07 9.59155200e+07
 7.29811200e+07 3.14754240e+08 2.91341280e+08 3.39794880e+08
 2.50866240e+08 1.40255040e+08 6.03558720e+08 2.43208320e+08
 1.26459005e+10 3.152730

In [10]:
# Adjust the regressand.
y = y * adjust

In [11]:
fixed = "{}"

In [12]:
param_value_dict = eval(fixed)
fixed_params = param_value_dict.keys()
unconstrained_params = []

In [13]:
if param_value_dict:
    for param in param_list:
        if param in fixed_params and isinstance(param_value_dict[param], numbers.Number):
            # Subtract the contribution of the param from the Y vector
            print('')
            print("Ratio of residual/original")
            print((y - (df.loc[:,param].values * param_value_dict[param]))/y)
            print('')
            y = y - (df.loc[:,param].values * param_value_dict[param])
        else:
            unconstrained_params.append(param)
    # Reset param list to the free-running parameters only.
    param_list = unconstrained_params
else:
    pass

In [14]:
x = df.loc[:,param_list].values
print(y)

[8.73163200e+07 1.07736480e+08 5.18880000e+05 4.00933000e+08
 4.85280000e+07 3.06888000e+07 1.96536000e+07 1.10808000e+07
 4.73133600e+08 1.10771520e+08 1.26065760e+08 9.85972800e+07
 4.35984048e+09 7.06290000e+09 7.85467200e+07 4.79568000e+06
 3.00807840e+08 4.88630000e+07 3.05136000e+06 2.33052370e+10
 1.21296000e+07 1.71552000e+06 2.04345024e+09 1.25347008e+09
 3.46752000e+06 1.62528000e+06 3.28766400e+07 9.60000000e+05
 4.51644000e+08 1.91541600e+08 1.68254208e+09 9.10933920e+08
 4.88007552e+09 9.38337600e+07 2.73515000e+10 3.21152160e+08
 1.21051680e+08 2.43754000e+07 2.34907200e+07 5.86128000e+06
 5.53805000e+07 7.66747200e+07 5.76350400e+07 1.16400000e+06
 1.14739200e+07 3.31665600e+07 5.44980624e+09 2.33492000e+08
 4.98379536e+09 2.41238400e+07 1.53757440e+08 1.17700800e+07
 7.48320480e+09 8.34768000e+06 8.59372800e+07 9.59155200e+07
 7.29811200e+07 3.14754240e+08 2.91341280e+08 3.39794880e+08
 2.50866240e+08 1.40255040e+08 6.03558720e+08 2.43208320e+08
 1.26459005e+10 3.152730

In [15]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import KFold
from sklearn.base import clone

In [16]:
regressor = LinearRegression(fit_intercept=False)
rkf = RepeatedKFold(n_splits=10, n_repeats=1, random_state=None)
scrs = []
count = 0
for train_index, test_index in rkf.split(x):
    clone_regressor = clone(regressor)
    #print("Train:", train_index, "\nValidation:", test_index)
    X_train, X_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    clone_regressor.fit(X_train,y_train)
    y_pred = clone_regressor.predict(X_test)
    mse = mean_squared_error(y_test,y_pred)
    rmse = np.sqrt(mse)
    scrs.append(rmse)
    count = count + 1;
    print("")
    print("-------Model using Fold No " + str(count) + "-------")
    print("Coef: " + str(clone_regressor.coef_))
    print ("Score: " + str(rmse))
    print ("R2 Score: " +  str(r2_score(y_test, y_pred)))


-------Model using Fold No 1-------
Coef: [0.98943264 1.0008391  2.00312182 0.99839604 1.00529889 0.99146199
 0.01248459 0.93976833 1.03683859]
Score: 123020.2889789201
R2 Score: 0.9999999676834664

-------Model using Fold No 2-------
Coef: [0.98209243 0.9877265  1.99525145 1.00231353 0.99466371 0.9945129
 0.02773495 0.95153317 1.03578357]
Score: 1087436.891583627
R2 Score: 0.9999989757601249

-------Model using Fold No 3-------
Coef: [0.98946299 1.00086398 2.00301513 0.99843435 1.00506962 0.99154006
 0.01245443 0.94030776 1.03669666]
Score: 88244.50173156077
R2 Score: 0.9999999974723458

-------Model using Fold No 4-------
Coef: [9.96159151e-01 1.00459595e+00 1.99943062e+00 9.98618128e-01
 1.00110670e+00 9.92287034e-01 1.08676389e-03 9.79199457e-01
 1.00088538e+00]
Score: 1257744.5986741087
R2 Score: 0.9999998756110965

-------Model using Fold No 5-------
Coef: [0.99476263 1.0053987  2.00429497 1.00100932 0.99490931 0.99138967
 0.00214238 0.9846323  1.05032929]
Score: 1592688.7009587

In [17]:
scrs_array = np.asarray(scrs)
display_scores(scrs_array)

Scores: [ 123020.28897892 1087436.89158363   88244.50173156 1257744.59867411
 1592688.70095877  588837.36879584  772783.36460553  416488.10212736
  456848.03662706  192923.01671128]
Mean: 657701.4870794055
Standard deviation: 485965.1965950298


In [22]:
from sklearn.model_selection import cross_val_score
# Evaluate score by cross validation
regressor2 = LinearRegression(fit_intercept=False)
scores = cross_val_score(regressor2, x, y, scoring="neg_mean_squared_error", cv=10)
try:
    rmse_scores = np.sqrt(-scores)
except:
    print("### np.sqrt(-scores) failed, scores = " + str(scores))
display_scores(rmse_scores)

Scores: [ 586006.57272413 1570941.83569659 2498356.61550217 1271106.334445
  174371.71746824  103857.87057609  165611.47428437  119682.43460323
   52809.12027529   30558.75104579]
Mean: 657330.272662089
Standard deviation: 802159.1993910861


In [19]:
coefs = [None] * 2
predicted = [None] * 2
outliers = [None] * 2

In [20]:
print("")
print("-------Final Model using all data  -------")
regressor3 = LinearRegression(fit_intercept=False)
regressor3.fit(x, y)
pred = regressor3.predict(x)
#print("predict(x) [len(predict(x)) = %d] = %s" % (len(pred), str(pred)))
print(param_list)
with np.printoptions(linewidth=200):
    print(regressor3.coef_)
coefs[OLS] = regressor3.coef_
predicted[OLS] = pred


-------Final Model using all data  -------
['Executed insns (no MULS)', 'MULS insns', 'Taken branches', 'RAM data reads', 'RAM writes', 'Flash data reads', 'Flash insn reads', 'BL insns', 'PUSH/POP PC/LR']
[0.98963003 1.001042   2.00309931 0.99842728 1.00505224 0.99153485 0.01213653 0.94068954 1.03602931]


In [21]:
# Use constrained non-negative coefficients

print("Coefficients constrained to non-negative values, least-squares method")
from scipy.optimize import lsq_linear

lb = 0
ub = np.Inf
res = lsq_linear(x, y, bounds=(lb, ub))

# Round the coefficients if requested to.
print(param_list)
if round is not None:
    res.x = np.round(res.x, round)

with np.printoptions(linewidth=200):
    print(res.x)

Coefficients constrained to non-negative values, least-squares method
['Executed insns (no MULS)', 'MULS insns', 'Taken branches', 'RAM data reads', 'RAM writes', 'Flash data reads', 'Flash insn reads', 'BL insns', 'PUSH/POP PC/LR']
[0.98963 1.00104 2.0031  0.99843 1.00505 0.99153 0.01214 0.94069 1.03603]
