In [None]:
from pipeline.trainer import Trainer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LassoCV
from config.convolutional_features import CONV_FEATURES, TOTAL_CONV_OPS_PER_INPUT, TOTAL_CONV_OPS_PER_BATCH

In [None]:
data_config = {
    'data_dir': 'training_data',
    'cross_validation': 10,
    'test_models': ["lenet"]
}


CONVOLUTION_PIPELINE = {
    "power": {
        "is_log": True,
        "degree": 3,
        "special_terms_list": [TOTAL_CONV_OPS_PER_INPUT, TOTAL_CONV_OPS_PER_BATCH],
    },
    "runtime": {
        "is_log": False,
        "degree": 3,
        "special_terms_list": [TOTAL_CONV_OPS_PER_INPUT, TOTAL_CONV_OPS_PER_BATCH],
    },
}


In [None]:
trainer = Trainer(data_config=data_config, features=CONV_FEATURES)

dataset = trainer.get_dataset(pattern='**/convolutional.csv')

pipeline = trainer.get_model(features_mapping=trainer.dataset_builder.features_mapping,
                                                   polynomial_degree=CONVOLUTION_PIPELINE['power']['degree'],
                                                   is_log=CONVOLUTION_PIPELINE['power']['degree'],
                                                   special_terms_list=CONVOLUTION_PIPELINE['power']['special_terms_list']
                                                  )

print(pipeline)

In [None]:
%%time
train_features, power_train = dataset.train.input_features.values, dataset.train.power.values
pipeline.fit(train_features, power_train)

In [None]:
import matplotlib.pyplot as plt

ymin, ymax = 0, 100
lasso = pipeline[-1]
plt.semilogx(lasso.alphas_, lasso.mse_path_, linestyle=":")
plt.plot(
    lasso.alphas_,
    lasso.mse_path_.mean(axis=-1),
    color="black",
    label="Average across the folds",
    linewidth=2,
)
plt.axvline(lasso.alpha_, linestyle="--", color="black", label="alpha: CV estimate")

plt.ylim(ymin, ymax)
plt.xlabel(r"$\alpha$")
plt.ylabel("Mean square error")
plt.legend()
_ = plt.title("Mean square error on each fold: coordinate descent")

In [None]:
pipeline = trainer.get_model(features_mapping=trainer.dataset_builder.features_mapping,
                                                   polynomial_degree=CONVOLUTION_PIPELINE['runtime']['degree'],
                                                   is_log=CONVOLUTION_PIPELINE['runtime']['degree'],
                                                   special_terms_list=CONVOLUTION_PIPELINE['runtime']['special_terms_list']
                                                  )

print(pipeline)

In [None]:
%%time
train_features, runtime_train = dataset.train.input_features.values, dataset.train.runtime.values
pipeline.fit(train_features, runtime_train)

In [None]:
import matplotlib.pyplot as plt

ymin, ymax = 0, 100
lasso = pipeline[-1]
plt.semilogx(lasso.alphas_, lasso.mse_path_, linestyle=":")
plt.plot(
    lasso.alphas_,
    lasso.mse_path_.mean(axis=-1),
    color="black",
    label="Average across the folds",
    linewidth=2,
)
plt.axvline(lasso.alpha_, linestyle="--", color="black", label="alpha: CV estimate")

plt.ylim(ymin, ymax)
plt.xlabel(r"$\alpha$")
plt.ylabel("Mean square error")
plt.legend()
_ = plt.title("Mean square error on each fold: coordinate descent")