In [1]:
#import tuxml
import pandas as pd 
import warnings
warnings.filterwarnings('ignore')

#definitions of sampling and the target non-functional property
samples_config = ['distBased', 'divDistBased','henard', 'random', 'solverBased', 'twise']
size_methods = ["Performance"]


In [5]:
#learning a model by using linear regression
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import ElasticNet
from sklearn import metrics
from sklearn import linear_model

def run_regressorML(reg, test_size, size_target, x_train, x_test, y_train, y_test):
    assert(size_target in size_methods)
    reg.fit(x_train, y_train)
    y_pred = reg.predict(x_test)
    dfErrors = pd.DataFrame({'Actual':y_test, 'Predicted':y_pred, "error":(y_pred - y_test).abs(), "% error":((y_pred - y_test)/y_test).abs()*100})
    return dfErrors["% error"].describe()

In [3]:
def ft_importances(coef_, col):
    importanceSeries = pd.Series(coef_, index=col.values)
    return importanceSeries[importanceSeries != 0].abs().sort_values(ascending = False)

In [4]:
df_result  = pd.DataFrame(columns = ['algorithm_name', 'ft_dummyfication', 'origin_ft_selection', 'ft_selection', 'hyperparameters', 'size_target', 'test_size', 'coef_order', 'accuracy'])
df_result

Unnamed: 0,algorithm_name,ft_dummyfication,origin_ft_selection,ft_selection,hyperparameters,size_target,test_size,coef_order,accuracy


In [43]:
%run feature_importance.ipynb

def run_Algorithms(df, ft_importance_enable, system=None): 
    
    for config_name in samples_config:
        path_name = config_name+"_t3"
        if(system==None):
            path = r"sampledConfigurations_"+path_name+".csv"
        else:
            path = r+system+"/sampledConfigurations_"+path_name+".csv"

        df = pd.read_csv(path, sep=';')
                
        for samplePerc in [0.3,0.4,0.5,0.6]:
                
                #Run fetures engines: #yes and encoding
                df = run_features_engine_yes(df)
                df = run_feature_encoding(df)
                
                #number of features after features engine execution
                num_features = round(samplePerc*len(df.columns))
                
                path_name = config_name+"_t3_features_engine"
                
                #Run feature selection with and without hyperparams (after features engine execution)
                run_featureSelection(df, None, path_name)
                run_featureSelection(df, hyperparams_RF, path_name)
                

                if(system==None):
                    df_importance = pd.read_csv("feature_importance_"+path_name+".csv")
                else:
                    df_importance = pd.read_csv(system+"/feature_importance_"+path_name+".csv")

                df_importance = df[df_importance[:num_features]["Unnamed: 0"].values]
                df_importance.head()

                print("Sample features number: "+ str(num_features))
#                 display(df_importance)
                df_importance.head()

                target = "Performance"
                paramsLasso = {'alpha': [25,10,4,2,1.0,0.8,0.5,0.3,0.2,0.1,0.05,0.02,0.01],
                               'max_iter': [1, 5, 10, 100, 1000],}
                paramsRidge = {'alpha': [25,10,4,2,1.0,0.8,0.5,0.3,0.2,0.1,0.05,0.02,0.01]}
                paramsENet = {'alpha': [25,10,4,2,1.0,0.8,0.5,0.3,0.2,0.1,0.05,0.02,0.01],
                              'l1_ratio': [0.0, 0.5, 1.0, 0.1],
                              'max_iter': [1, 5, 10, 100, 1000],}

                for ft_dummyfication in [False, True]:
                    if ft_dummyfication:
                        #Creating dummy variables in pandas
                        df = pd.get_dummies(df, columns=list(df.drop(columns=size_methods).columns.values))
    #                     df_importance = pd.get_dummies(df_importance, columns=list(df_importance.drop(columns=size_methods).columns.values))
                        df_importance = pd.get_dummies(df_importance, columns=list(df_importance.columns.values))

                    if ft_importance_enable:
                        ft_selection =  {'None': df.drop(columns=size_methods).columns,
                                    'RandomForest': df_importance.columns[:num_features],}

                    else:
                        ft_selection =  {'None': df.drop(columns=size_methods).columns,
                                         'RandomForest': df.columns[:num_features],}

                    for origin_ft_selection in ft_selection:
                        for size in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]:
                            # size = number of configurations
                            x_train, x_test, y_train, y_test = train_test_split(df[ft_selection[origin_ft_selection]], df[target], test_size=size, random_state=42)  
                            x_train, y_train = np.array(x_train.values), np.array(y_train.values)

#                             print(x_train)
#                             print(y_train)

                            if len(x_train) > 3:

                                model = {'LinearRegression': LinearRegression(),
                                         'Lasso': GridSearchCV(linear_model.Lasso(), param_grid=paramsLasso, cv=3).fit(x_train, y_train).best_estimator_,
                                         'Ridge': GridSearchCV(linear_model.Ridge(), param_grid=paramsRidge, cv=3).fit(x_train, y_train).best_estimator_,
                                         'ElasticNet': GridSearchCV(ElasticNet(), param_grid=paramsENet, scoring='r2', cv=3).fit(x_train, y_train).best_estimator_,
                                         'PolynomialRegression': LinearRegression()}
                            else:
                                if len(x_train) >= 2:
                                    model = {'LinearRegression': LinearRegression(),
                                             'Lasso': GridSearchCV(linear_model.Lasso(), param_grid=paramsLasso, cv=2).fit(x_train, y_train).best_estimator_,
                                             'Ridge': GridSearchCV(linear_model.Ridge(), param_grid=paramsRidge, cv=2).fit(x_train, y_train).best_estimator_,
                                             'ElasticNet': GridSearchCV(ElasticNet(), param_grid=paramsENet, scoring='r2', cv=2).fit(x_train, y_train).best_estimator_,
                                             'PolynomialRegression': LinearRegression()}
                            for key in model:
                                if (key == 'PolynomialRegression') and (origin_ft_selection == 'None'):
                                    print("It doesn't scale")
                                else:
                                    if (key == 'PolynomialRegression'):
                                        #for 2 options we have: 𝑓(𝑥₁, 𝑥₂) = 𝑏₀ + 𝑏₁𝑥₁ + 𝑏₂𝑥₂ + 𝑏₃𝑥₁² + 𝑏₄𝑥₁𝑥₂ + 𝑏₅𝑥₂²
                                        x_train = PolynomialFeatures(degree=2, include_bias=False).fit_transform(x_train)
                                        x_test = PolynomialFeatures(degree=2, include_bias=False).fit_transform(x_test)

                                    res_model = run_regressorML(model[key], test_size=size, size_target=target, x_train=x_train, x_test=x_test, y_train=y_train, y_test=y_test)

                                    if (key == 'PolynomialRegression'):
                                        model[key].coef_ = model[key].coef_[:num_features]

    #                                 print(model[key].coef_)
                                    coef_order = ft_importances(model[key].coef_, col=ft_selection[origin_ft_selection])
                                    df_result.loc[len(df_result)] = [key, ft_dummyfication, origin_ft_selection, ft_selection[origin_ft_selection], model[key], target, size, coef_order, res_model]
                display(df_result)
                if ft_importance_enable:
                    df_result.to_csv("results_with_feature_importance_"+path_name+"_"+str(num_features)+".csv", header=True)
                else:
                    df_result.to_csv("results_"+path_name+"_"+str(num_features)+".csv", header=True)
                display(df_result)
    return df_result

In [None]:
df_result_fi = run_Algorithms(df, True, None)
df_result_nfi = run_Algorithms(df, False, None)

Unnamed: 0,root,encryption,compression,compressionBzip2,compressionGzip,compressionLzo,compressionZpaq,compressionLrzip,level,level1,...,level4,level5,level6,level7,level8,level9,unlimitedWindowSize,disableCompressibilityTesting,Performance,#yes
0,1,0,1,0,0,0,0,1,1,1,...,0,0,0,0,0,0,0,0,208900,5
1,1,0,1,0,0,0,0,1,1,0,...,0,0,0,0,0,0,0,0,210450,5
2,1,0,1,0,0,0,0,1,1,0,...,0,0,0,0,0,0,0,0,283750,5
3,1,0,1,0,0,0,0,1,1,0,...,1,0,0,0,0,0,0,0,339780,5
4,1,0,1,0,0,0,0,1,1,0,...,0,1,0,0,0,0,0,0,340590,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,1,1,1,0,0,0,1,0,1,0,...,0,0,0,0,0,1,1,1,5765570,8
246,1,1,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,1,1,55520,6
247,1,1,0,0,0,0,0,0,1,0,...,0,0,1,0,0,0,1,1,57860,6
248,1,1,0,0,0,0,0,0,1,0,...,0,0,0,1,0,0,1,1,58700,6


Columns eliminated: 


['root', 'level']

                               importance
compressionZpaq                  0.593922
level8                           0.177819
level9                           0.141324
level3                           0.023443
compressionLrzip                 0.022256
level2                           0.011488
level1                           0.011391
level7                           0.007478
compressionBzip2                 0.003773
level6                           0.002243
#yes                             0.001905
level5                           0.001648
compressionGzip                  0.000675
level4                           0.000320
encryption                       0.000217
disableCompressibilityTesting    0.000086
unlimitedWindowSize              0.000009
compression                      0.000002
compressionLzo                   0.000002
                               importance
compressionZpaq                  0.577812
level8                           0.193701
level9                           0

Unnamed: 0,algorithm_name,ft_dummyfication,origin_ft_selection,ft_selection,hyperparameters,size_target,test_size,coef_order,accuracy
0,LinearRegression,False,,"Index(['encryption', 'compression', 'compressi...",LinearRegression(),Performance,0.1,compression 2.058711e+19 ...,count 25.000000 mean 295.603633 std ...
1,Lasso,False,,"Index(['encryption', 'compression', 'compressi...","Lasso(alpha=25, max_iter=5)",Performance,0.1,compressionZpaq 1.763871e+06 ...,count 25.000000 mean 286.640185 std ...
2,Ridge,False,,"Index(['encryption', 'compression', 'compressi...",Ridge(alpha=2),Performance,0.1,compressionZpaq 1.819683e+06 ...,count 25.000000 mean 249.435121 std ...
3,ElasticNet,False,,"Index(['encryption', 'compression', 'compressi...","ElasticNet(alpha=0.02, l1_ratio=0.1, max_iter=5)",Performance,0.1,compressionZpaq 1.689284e+06 ...,count 25.000000 mean 226.158701 std ...
4,LinearRegression,False,,"Index(['encryption', 'compression', 'compressi...",LinearRegression(),Performance,0.2,compression 4.035688e+19 ...,count 50.000000 mean 269.164575 std ...
...,...,...,...,...,...,...,...,...,...
489,LinearRegression,True,RandomForest,"Index(['compressionZpaq_0', 'compressionZpaq_1...",LinearRegression(),Performance,0.9,level9_0 6.189306e+04 level9_1 ...,count 225.000000 mean 139.970211 std ...
490,Lasso,True,RandomForest,"Index(['compressionZpaq_0', 'compressionZpaq_1...","Lasso(alpha=25, max_iter=1)",Performance,0.9,level9_0 1.215406e+05 level8_0 8.692855e...,count 225.000000 mean 139.296586 std ...
491,Ridge,True,RandomForest,"Index(['compressionZpaq_0', 'compressionZpaq_1...",Ridge(alpha=25),Performance,0.9,level8_1 11566.664634 level8_0 11566.664...,count 225.000000 mean 149.823539 std ...
492,ElasticNet,True,RandomForest,"Index(['compressionZpaq_0', 'compressionZpaq_1...","ElasticNet(alpha=25, l1_ratio=0.0, max_iter=5)",Performance,0.9,level8_0 625.928283 level8_1 625.928283 ...,count 225.000000 mean 152.679354 std ...


Unnamed: 0,algorithm_name,ft_dummyfication,origin_ft_selection,ft_selection,hyperparameters,size_target,test_size,coef_order,accuracy
0,LinearRegression,False,,"Index(['encryption', 'compression', 'compressi...",LinearRegression(),Performance,0.1,compression 2.058711e+19 ...,count 25.000000 mean 295.603633 std ...
1,Lasso,False,,"Index(['encryption', 'compression', 'compressi...","Lasso(alpha=25, max_iter=5)",Performance,0.1,compressionZpaq 1.763871e+06 ...,count 25.000000 mean 286.640185 std ...
2,Ridge,False,,"Index(['encryption', 'compression', 'compressi...",Ridge(alpha=2),Performance,0.1,compressionZpaq 1.819683e+06 ...,count 25.000000 mean 249.435121 std ...
3,ElasticNet,False,,"Index(['encryption', 'compression', 'compressi...","ElasticNet(alpha=0.02, l1_ratio=0.1, max_iter=5)",Performance,0.1,compressionZpaq 1.689284e+06 ...,count 25.000000 mean 226.158701 std ...
4,LinearRegression,False,,"Index(['encryption', 'compression', 'compressi...",LinearRegression(),Performance,0.2,compression 4.035688e+19 ...,count 50.000000 mean 269.164575 std ...
...,...,...,...,...,...,...,...,...,...
489,LinearRegression,True,RandomForest,"Index(['compressionZpaq_0', 'compressionZpaq_1...",LinearRegression(),Performance,0.9,level9_0 6.189306e+04 level9_1 ...,count 225.000000 mean 139.970211 std ...
490,Lasso,True,RandomForest,"Index(['compressionZpaq_0', 'compressionZpaq_1...","Lasso(alpha=25, max_iter=1)",Performance,0.9,level9_0 1.215406e+05 level8_0 8.692855e...,count 225.000000 mean 139.296586 std ...
491,Ridge,True,RandomForest,"Index(['compressionZpaq_0', 'compressionZpaq_1...",Ridge(alpha=25),Performance,0.9,level8_1 11566.664634 level8_0 11566.664...,count 225.000000 mean 149.823539 std ...
492,ElasticNet,True,RandomForest,"Index(['compressionZpaq_0', 'compressionZpaq_1...","ElasticNet(alpha=25, l1_ratio=0.0, max_iter=5)",Performance,0.9,level8_0 625.928283 level8_1 625.928283 ...,count 225.000000 mean 152.679354 std ...


Unnamed: 0,Performance,encryption_0,encryption_1,compression_0,compression_1,compressionBzip2_0,compressionBzip2_1,compressionGzip_0,compressionGzip_1,compressionLzo_0,...,unlimitedWindowSize_1,disableCompressibilityTesting_0,disableCompressibilityTesting_1,#yes_3,#yes_4,#yes_5,#yes_6,#yes_7,#yes_8,#yes
0,208900,1,0,0,1,1,0,1,0,1,...,0,1,0,0,0,1,0,0,0,19
1,210450,1,0,0,1,1,0,1,0,1,...,0,1,0,0,0,1,0,0,0,19
2,283750,1,0,0,1,1,0,1,0,1,...,0,1,0,0,0,1,0,0,0,19
3,339780,1,0,0,1,1,0,1,0,1,...,0,1,0,0,0,1,0,0,0,19
4,340590,1,0,0,1,1,0,1,0,1,...,0,1,0,0,0,1,0,0,0,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,5765570,0,1,0,1,1,0,1,0,1,...,1,0,1,0,0,0,0,0,1,19
246,55520,0,1,1,0,1,0,1,0,1,...,1,0,1,0,0,0,1,0,0,19
247,57860,0,1,1,0,1,0,1,0,1,...,1,0,1,0,0,0,1,0,0,19
248,58700,0,1,1,0,1,0,1,0,1,...,1,0,1,0,0,0,1,0,0,19


Columns eliminated: 


[]

                                   importance
compressionZpaq_1                3.519565e-01
compressionZpaq_0                2.453624e-01
level8_0                         1.067562e-01
level9_0                         8.112260e-02
level8_1                         6.944547e-02
level9_1                         5.520911e-02
level3_0                         1.320370e-02
level3_1                         1.223351e-02
compressionLrzip_0               1.180564e-02
compressionLrzip_1               9.309198e-03
level2_0                         7.335257e-03
level1_1                         6.033126e-03
level2_1                         6.000176e-03
level1_0                         5.445376e-03
level7_0                         3.502513e-03
level7_1                         3.239845e-03
compressionBzip2_1               1.910402e-03
compressionBzip2_0               1.866163e-03
#yes_8                           1.838542e-03
#yes_5                           1.406856e-03
level6_0                         8

Unnamed: 0,algorithm_name,ft_dummyfication,origin_ft_selection,ft_selection,hyperparameters,size_target,test_size,coef_order,accuracy
0,LinearRegression,False,,"Index(['encryption', 'compression', 'compressi...",LinearRegression(),Performance,0.1,compression 2.058711e+19 ...,count 25.000000 mean 295.603633 std ...
1,Lasso,False,,"Index(['encryption', 'compression', 'compressi...","Lasso(alpha=25, max_iter=5)",Performance,0.1,compressionZpaq 1.763871e+06 ...,count 25.000000 mean 286.640185 std ...
2,Ridge,False,,"Index(['encryption', 'compression', 'compressi...",Ridge(alpha=2),Performance,0.1,compressionZpaq 1.819683e+06 ...,count 25.000000 mean 249.435121 std ...
3,ElasticNet,False,,"Index(['encryption', 'compression', 'compressi...","ElasticNet(alpha=0.02, l1_ratio=0.1, max_iter=5)",Performance,0.1,compressionZpaq 1.689284e+06 ...,count 25.000000 mean 226.158701 std ...
4,LinearRegression,False,,"Index(['encryption', 'compression', 'compressi...",LinearRegression(),Performance,0.2,compression 4.035688e+19 ...,count 50.000000 mean 269.164575 std ...
...,...,...,...,...,...,...,...,...,...
651,LinearRegression,True,RandomForest,"Index(['compressionZpaq_1_0', 'compressionZpaq...",LinearRegression(),Performance,0.9,compressionLrzip_0_1 1.523343e+05 compressi...,count 225.000000 mean 72.716377 std ...
652,Lasso,True,RandomForest,"Index(['compressionZpaq_1_0', 'compressionZpaq...","Lasso(alpha=25, max_iter=1)",Performance,0.9,compressionLrzip_0_0 2.834679e+05 level9_0_...,count 225.000000 mean 82.876252 std ...
653,Ridge,True,RandomForest,"Index(['compressionZpaq_1_0', 'compressionZpaq...",Ridge(alpha=25),Performance,0.9,compressionLrzip_0_0 41091.052954 compressi...,count 225.000000 mean 126.484859 std ...
654,ElasticNet,True,RandomForest,"Index(['compressionZpaq_1_0', 'compressionZpaq...","ElasticNet(alpha=2, max_iter=10)",Performance,0.9,compressionLrzip_0_0 41090.261606 compressi...,count 225.000000 mean 126.485576 std ...


Unnamed: 0,algorithm_name,ft_dummyfication,origin_ft_selection,ft_selection,hyperparameters,size_target,test_size,coef_order,accuracy
0,LinearRegression,False,,"Index(['encryption', 'compression', 'compressi...",LinearRegression(),Performance,0.1,compression 2.058711e+19 ...,count 25.000000 mean 295.603633 std ...
1,Lasso,False,,"Index(['encryption', 'compression', 'compressi...","Lasso(alpha=25, max_iter=5)",Performance,0.1,compressionZpaq 1.763871e+06 ...,count 25.000000 mean 286.640185 std ...
2,Ridge,False,,"Index(['encryption', 'compression', 'compressi...",Ridge(alpha=2),Performance,0.1,compressionZpaq 1.819683e+06 ...,count 25.000000 mean 249.435121 std ...
3,ElasticNet,False,,"Index(['encryption', 'compression', 'compressi...","ElasticNet(alpha=0.02, l1_ratio=0.1, max_iter=5)",Performance,0.1,compressionZpaq 1.689284e+06 ...,count 25.000000 mean 226.158701 std ...
4,LinearRegression,False,,"Index(['encryption', 'compression', 'compressi...",LinearRegression(),Performance,0.2,compression 4.035688e+19 ...,count 50.000000 mean 269.164575 std ...
...,...,...,...,...,...,...,...,...,...
651,LinearRegression,True,RandomForest,"Index(['compressionZpaq_1_0', 'compressionZpaq...",LinearRegression(),Performance,0.9,compressionLrzip_0_1 1.523343e+05 compressi...,count 225.000000 mean 72.716377 std ...
652,Lasso,True,RandomForest,"Index(['compressionZpaq_1_0', 'compressionZpaq...","Lasso(alpha=25, max_iter=1)",Performance,0.9,compressionLrzip_0_0 2.834679e+05 level9_0_...,count 225.000000 mean 82.876252 std ...
653,Ridge,True,RandomForest,"Index(['compressionZpaq_1_0', 'compressionZpaq...",Ridge(alpha=25),Performance,0.9,compressionLrzip_0_0 41091.052954 compressi...,count 225.000000 mean 126.484859 std ...
654,ElasticNet,True,RandomForest,"Index(['compressionZpaq_1_0', 'compressionZpaq...","ElasticNet(alpha=2, max_iter=10)",Performance,0.9,compressionLrzip_0_0 41090.261606 compressi...,count 225.000000 mean 126.485576 std ...


Unnamed: 0,Performance,encryption_0_0,encryption_0_1,encryption_1_0,encryption_1_1,compression_0_0,compression_0_1,compression_1_0,compression_1_1,compressionBzip2_0_0,...,#yes_5_0,#yes_5_1,#yes_6_0,#yes_6_1,#yes_7_0,#yes_7_1,#yes_8_0,#yes_8_1,#yes_19,#yes
0,208900,0,1,1,0,1,0,0,1,0,...,0,1,1,0,1,0,1,0,1,43
1,210450,0,1,1,0,1,0,0,1,0,...,0,1,1,0,1,0,1,0,1,43
2,283750,0,1,1,0,1,0,0,1,0,...,0,1,1,0,1,0,1,0,1,43
3,339780,0,1,1,0,1,0,0,1,0,...,0,1,1,0,1,0,1,0,1,43
4,340590,0,1,1,0,1,0,0,1,0,...,0,1,1,0,1,0,1,0,1,43
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,5765570,1,0,0,1,1,0,0,1,0,...,1,0,1,0,1,0,0,1,1,43
246,55520,1,0,0,1,0,1,1,0,0,...,1,0,0,1,1,0,1,0,1,43
247,57860,1,0,0,1,0,1,1,0,0,...,1,0,0,1,1,0,1,0,1,43
248,58700,1,0,0,1,0,1,1,0,0,...,1,0,0,1,1,0,1,0,1,43


Columns eliminated: 


['#yes_19']

                       importance
compressionZpaq_0_0  1.891650e-01
compressionZpaq_1_0  1.414372e-01
compressionZpaq_0_1  1.389634e-01
compressionZpaq_1_1  1.188897e-01
level8_1_1           6.564784e-02
...                           ...
#yes_4_1             4.300442e-08
#yes_4_0             3.230541e-08
#yes_3_1             9.599642e-09
#yes_3_0             7.319716e-09
#yes                 0.000000e+00

[85 rows x 1 columns]
                       importance
compressionZpaq_0_0  2.419540e-01
compressionZpaq_1_1  1.304202e-01
compressionZpaq_1_0  1.209840e-01
compressionZpaq_0_1  1.181375e-01
level9_1_1           6.354133e-02
...                           ...
#yes_3_0             5.513493e-09
#yes_3_1             1.603696e-09
compressionGzip_1_1  0.000000e+00
compressionLzo_0_0   0.000000e+00
#yes                 0.000000e+00

[85 rows x 1 columns]
Sample features number: 43
It doesn't scale
It doesn't scale
It doesn't scale
It doesn't scale
It doesn't scale
It doesn't scale


In [None]:
df_result['accuracy'][0]

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

rs = []
for i in range(len(df_result_fi['accuracy'])):
    rs.append(df_result_fi['accuracy'][i]['mean'])

# rs
# display (df_result['algorithm_name'], df_result['accuracy'], rs)

plt.scatter(df_result_fi['algorithm_name'], rs, color ="blue", marker = "s")
plt.plot(df_result_fi['algorithm_name'], rs, color ="blue")


In [None]:
# Boxplot
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd


data = [model['LinearRegression'].coef_, model['Lasso'].coef_, model['Ridge'].coef_, model['ElasticNet'].coef_, model['PolynomialRegression'].coef_]
fig1, ax1 = plt.subplots()
ax1.set_title('Coeficient of feature importance for all algorithms')
ax1.boxplot(data)
plt.show()