In [1]:
#import tuxml
import pandas as pd 
import warnings
warnings.filterwarnings('ignore')

#definitions
# systems = ['7z','BerkeleyDBC','Dune','Hipacc','Irzip','LLVM','Polly','x264']
# systems = ['LLVM','x264','BerkeleyDBC','Irzip','Polly','7z','Hipacc','Dune']
systems = ['LLVM']

# samples_config = ['random']
samples_config = ['distBased', 'divDistBased','henard', 'random', 'solverBased', 'twise']
size_methods = ["Performance"]


In [2]:
#learning a model by using linear regression
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.linear_model import ElasticNet

def run_regressorML(reg, test_size, size_target, x_train, x_test, y_train, y_test):
    assert(size_target in size_methods)
    reg.fit(x_train, y_train)       
    y_pred = reg.predict(x_test)
    #y_pred = reg.intercept_ + np.sum(reg.coef_ * x_test.values, axis=1)
    dfErrors = pd.DataFrame({'Actual':y_test, 'Predicted':y_pred, "error":(y_pred - y_test).abs(), "% error":((y_pred - y_test)/y_test).abs()*100})
    return dfErrors["% error"].describe()

In [3]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import tree
import pandas as pd

def ft_importances(coef_, col):
    importanceSeries = pd.Series(coef_, index=col.values)
    return importanceSeries[importanceSeries != 0].abs().sort_values(ascending = False)

def ft_importances_RF(X, y):
    model = RandomForestRegressor(random_state=42)
    model.fit(X, y)

    feature_importances = model.feature_importances_
    features = pd.DataFrame({'feature': X.columns, 'importance': feature_importances})
    features.sort_values('importance', ascending=False, inplace=True)
    
    return features

def ft_importances_DT(X, y):
    model = tree.DecisionTreeRegressor(random_state=42)
    model.fit(X, y)

    feature_importances = model.feature_importances_
    features = pd.DataFrame({'feature': X.columns, 'importance': feature_importances})
    features.sort_values('importance', ascending=False, inplace=True)
    
    return features

def ft_importances_GB(X, y):
    model = GradientBoostingRegressor(random_state=42)
    model.fit(X, y)

    feature_importances = model.feature_importances_
    features = pd.DataFrame({'feature': X.columns, 'importance': feature_importances})
    features.sort_values('importance', ascending=False, inplace=True)
    
    return features


In [4]:
df_result  = pd.DataFrame(columns = ['algorithm_name', 'ft_dummyfication', 'origin_ft_selection', 'ft_selection', 'hyperparameters', 'size_target', 'test_size', 'coef_order', 'accuracy', 'num_features'])
df_result

Unnamed: 0,algorithm_name,ft_dummyfication,origin_ft_selection,ft_selection,hyperparameters,size_target,test_size,coef_order,accuracy,num_features


In [5]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import tree
from sklearn import linear_model
from sklearn import metrics
from sklearn import ensemble
from IPython.core.debugger import set_trace

%run feature_importance-all_systems.ipynb


def run_Algorithms(df, ft_importance_enable, system=None):
    
    samples_config = ['distBased', 'divDistBased','henard', 'random', 'solverBased', 'twise']
    size_methods = ["Performance"]
    
    for config_name in samples_config:
        
        df_result = pd.DataFrame(columns = ['algorithm_name', 'ft_dummyfication', 'origin_ft_selection', 'ft_selection', 'hyperparameters', 'size_target', 'test_size', 'coef_order', 'accuracy', 'num_features'])
        
        print(config_name)
        path_name = config_name+"_t3"
        if(system==None):
            path = r"sampledConfigurations_"+path_name+".csv"
        else:
            path = system+"/sampledConfigurations_"+path_name+".csv"
            
        if not ft_importance_enable:
            samplePerc_lst = [1.0]
        else:
            samplePerc_lst = [0.3,0.4,0.5,0.6]
            
        for samplePerc in samplePerc_lst:
            print(samplePerc)
            df = pd.read_csv(path, sep=';')
            
            if ft_importance_enable:
                #Run fetures engines: #yes and encoding
                df = run_feature_encoding(run_features_engine_yes(df))

                path_name = config_name+"_t3_features_engine"
            else:
                df = run_features_engine_yes(df)

            
            #display(df)
            #number of features after features engine execution
            num_features = round(samplePerc*len(df.columns))

            start_time_sample = pd.Timestamp.now()
            
            
            #Run feature selection
            if ft_importance_enable:
                run_featureSelection(df, system, None, path_name,'LinearRegression')
                df_importance = pd.read_csv(system+f"/feature_importance_{path_name}_RL.csv")

                print("Sample features number: "+ str(num_features))
                df_importance = df[df_importance[:num_features]["Unnamed: 0"].values]
                df_importance.head()


            target = "Performance"
            paramsLasso = {'alpha': [25,10,4,2,1.0,0.8,0.5,0.3,0.2,0.1,0.05,0.02,0.01],
                           'max_iter': [1, 5, 10, 100, 1000],}
            paramsRidge = {'alpha': [25,10,4,2,1.0,0.8,0.5,0.3,0.2,0.1,0.05,0.02,0.01]}
            paramsENet = {'alpha': [25,10,4,2,1.0,0.8,0.5,0.3,0.2,0.1,0.05,0.02,0.01],
                          'l1_ratio': [0.0, 0.5, 1.0, 0.1],
                          'max_iter': [1, 5, 10, 100, 1000],}
            paramsRF =  {"max_depth":18,"n_estimators":50,"verbose":2}

            #Modifiquei os dicion√°rios para quando a feature importance est√° ativada. Acho que n√£o fazia muito sentido 
            #manter o uso do random forest quando feature importance for False, da mesma forma que n√£o √© vi√°vel manter
            #o None quando est√° ativado
            
            if ft_importance_enable:
                ft_selection =  {'DecisionTree': df_importance.columns[:num_features]}
            else:
                ft_selection =  {'None': df.drop(columns=size_methods).columns}

            for origin_ft_selection in ft_selection:
                print(origin_ft_selection)
                if ft_importance_enable:
                    print('IN FT_LOOP DF IMPORTANCE\n')
                    display(df_importance)

                for size in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]:
                    config_start_time = pd.Timestamp.now()

                    # size = number of configurations
                    x_train, x_test, y_train, y_test = train_test_split(df[ft_selection[origin_ft_selection]], df[target], train_size=size, random_state=42)
                    x_train, y_train = np.array(x_train.values), np.array(y_train.values)


                    if len(x_train) > 3:

                        model = {'LinearRegression': LinearRegression(),
                                 'Lasso': GridSearchCV(linear_model.Lasso(), param_grid=paramsLasso, cv=3).fit(x_train, y_train).best_estimator_,
                                 'Ridge': GridSearchCV(linear_model.Ridge(), param_grid=paramsRidge, cv=3).fit(x_train, y_train).best_estimator_,
                                 'ElasticNet': GridSearchCV(ElasticNet(), param_grid=paramsENet, scoring='r2', cv=3).fit(x_train, y_train).best_estimator_,
                                 'PolynomialRegression': LinearRegression(),
                                 'RandomForest': ensemble.RandomForestRegressor(),
                                 'GradientBoostingTree': ensemble.GradientBoostingRegressor(),
                                 'DecisionTree': tree.DecisionTreeRegressor()}
                    else:
                        if len(x_train) >= 2:
                            model = {'LinearRegression': LinearRegression(),
                                     'Lasso': GridSearchCV(linear_model.Lasso(), param_grid=paramsLasso, cv=2).fit(x_train, y_train).best_estimator_,
                                     'Ridge': GridSearchCV(linear_model.Ridge(), param_grid=paramsRidge, cv=2).fit(x_train, y_train).best_estimator_,
                                     'ElasticNet': GridSearchCV(ElasticNet(), param_grid=paramsENet, scoring='r2', cv=2).fit(x_train, y_train).best_estimator_,
                                     'PolynomialRegression': LinearRegression(),
                                     'RandomForest': ensemble.RandomForestRegressor(),
                                     'GradientBoostingTree': ensemble.GradientBoostingRegressor(),
                                     'DecisionTree': tree.DecisionTreeRegressor()}

                    for key in model:
                        print(key)
                        if (key == 'PolynomialRegression') and (origin_ft_selection == 'None'):
                            print("It doesn't scale")
                        else:
                            if (key == 'PolynomialRegression'):
                                #for 2 options we have: ùëì(ùë•‚ÇÅ, ùë•‚ÇÇ) = ùëè‚ÇÄ + ùëè‚ÇÅùë•‚ÇÅ + ùëè‚ÇÇùë•‚ÇÇ + ùëè‚ÇÉùë•‚ÇÅ¬≤ + ùëè‚ÇÑùë•‚ÇÅùë•‚ÇÇ + ùëè‚ÇÖùë•‚ÇÇ¬≤
                                x_train = PolynomialFeatures(degree=2, include_bias=False).fit_transform(x_train)
                                x_test = PolynomialFeatures(degree=2, include_bias=False).fit_transform(x_test)

                            res_model = run_regressorML(model[key], test_size=size, size_target=target, x_train=x_train, x_test=x_test, y_train=y_train, y_test=y_test)

                            if (key == 'PolynomialRegression'):
                                model[key].coef_ = model[key].coef_[:num_features]
                                
                            if (key == 'DecisionTree' or key == 'RandomForest' or key == 'GradientBoostingTree'):
                                coef_order = ft_importances(res_model[:num_features], col=ft_selection[origin_ft_selection])
                            else:
                                coef_order = ft_importances(model[key].coef_, col=ft_selection[origin_ft_selection])

#                             df_result.loc[len(df_result)] = [key, ft_dummyfication, origin_ft_selection, ft_selection[origin_ft_selection], model[key], target, size, coef_order, res_model]
                            df_result.loc[len(df_result)] = [key, False, origin_ft_selection, ft_selection[origin_ft_selection], model[key], target, size, coef_order, res_model, str(num_features)]
                            display(df_result)
                            
                        config_end_time = pd.Timestamp.now()
                        exec_time = [config_name,samplePerc,num_features,ft_importance_enable,origin_ft_selection, key, size,False,config_start_time,config_end_time]
    #             exec_time = [config_name,samplePerc,num_features,ft_importance_enable,origin_ft_selection, key, size,ft_dummyfication,config_start_time,config_end_time]
                        df_time.loc[df_time.size] = exec_time

            sample_end_time = pd.Timestamp.now()
            exec_time = [config_name,samplePerc,num_features,ft_importance_enable,origin_ft_selection, key, None,None,start_time_sample,sample_end_time]
            df_time.loc[df_time.size] = exec_time
#             display(df_result)

            if ft_importance_enable:
                df_result.to_csv("results/"+system+"/"+system+"-results_with_feature_importance_"+path_name+"_"+str(num_features)+"_v3_RL.csv", header=True)
            else:
                df_result.to_csv("results/"+system+"/"+system+"-results_"+path_name+"_full_v3_RL.csv", header=True)

        display(df_result)
                                                                                                                                  
    if ft_importance_enable:
        time_file_name = "results/"+system+"/"+system+"-simulation_time_information-with_feature_importance_v3_RL.csv"
    else:
        time_file_name = "results/"+system+"/"+system+"-simulation_time_information_v3_RL.csv"
    
    df_time.to_csv(time_file_name)
        
    return df_result

In [None]:
df_time = pd.DataFrame(columns=['SampleAlgorithm','%FeatureSelection','#Features','FeatureImportance','OriginalFS','Algorithm','%Configurations','Dummy','StartTime', 'EndTime'])

for system in systems:
    print(system)
    
    # print("\n\n-----------------------------------------------------------------------------")
    print("\n--------------- System "+ system+ " training WITHOUT features importance ")
    print("\n--------------- Start time:: " + str(pd.Timestamp.now()))
    print("\n-----------------------------------------------------------------------------")
    df_result_nfi = run_Algorithms(None, False, system)
    print("\n\n-----------------------------------------------------------------------------")
    print("\n--------------- System "+ system+ " training ")
    print("\n--------------- End time:: " + str(pd.Timestamp.now()))
    print("\n-----------------------------------------------------------------------------")
    
    print("\n\n-----------------------------------------------------------------------------")
    print("\n--------------- System "+ system+ " training WITH features importance ")
    print("\n--------------- Start time:: " + str(pd.Timestamp.now()))
    print("\n-----------------------------------------------------------------------------")
    df_result_fi = run_Algorithms(None, True, system)
    print("\n\n-----------------------------------------------------------------------------")
    print("\n--------------- System "+ system+ " training ")
    print("\n--------------- End time:: " + str(pd.Timestamp.now()))
    print("\n-----------------------------------------------------------------------------")

    

LLVM

--------------- System LLVM training WITHOUT features importance 

--------------- Start time:: 2024-02-09 16:50:42.232737

-----------------------------------------------------------------------------
distBased
1.0
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge

Unnamed: 0,algorithm_name,ft_dummyfication,origin_ft_selection,ft_selection,hyperparameters,size_target,test_size,coef_order,accuracy,num_features
0,LinearRegression,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",LinearRegression(),Performance,0.1,sccp 2.074039e+01 inline ...,count 158.000000 mean 5.028969 std ...,14
1,Lasso,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","Lasso(alpha=0.2, max_iter=100)",Performance,0.1,sccp 15.429462 inline ...,count 158.000000 mean 4.595675 std ...,14
2,Ridge,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",Ridge(alpha=0.3),Performance,0.1,sccp 15.916759 inline ...,count 158.000000 mean 4.524758 std ...,14
3,ElasticNet,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","ElasticNet(alpha=0.05, l1_ratio=0.1, max_iter=10)",Performance,0.1,licm 11.536016 sccp ...,count 158.000000 mean 4.368042 std ...,14
4,RandomForest,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","(DecisionTreeRegressor(max_features=1.0, rando...",Performance,0.1,root NaN time_passes NaN ...,count 158.000000 mean 4.121166 std ...,14
...,...,...,...,...,...,...,...,...,...,...
58,Ridge,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",Ridge(),Performance,0.9,licm 10.199090 gvn ...,count 18.000000 mean 2.937313 std ...,14
59,ElasticNet,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","ElasticNet(alpha=0.1, l1_ratio=1.0, max_iter=100)",Performance,0.9,licm 13.439089 gvn ...,count 18.000000 mean 2.953826 std ...,14
60,RandomForest,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","(DecisionTreeRegressor(max_features=1.0, rando...",Performance,0.9,root NaN time_passes NaN ...,count 18.000000 mean 3.442993 std ...,14
61,GradientBoostingTree,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",([DecisionTreeRegressor(criterion='friedman_ms...,Performance,0.9,root NaN time_passes NaN ...,count 18.000000 mean 3.111504 std ...,14


divDistBased
1.0
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingT

Unnamed: 0,algorithm_name,ft_dummyfication,origin_ft_selection,ft_selection,hyperparameters,size_target,test_size,coef_order,accuracy,num_features
0,LinearRegression,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",LinearRegression(),Performance,0.1,instcombine 1.594077e+01 gvn ...,count 158.000000 mean 3.958103 std ...,14
1,Lasso,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","Lasso(alpha=0.2, max_iter=10)",Performance,0.1,instcombine 18.965952 gvn ...,count 158.000000 mean 3.885533 std ...,14
2,Ridge,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",Ridge(alpha=0.1),Performance,0.1,instcombine 15.384956 gvn ...,count 158.000000 mean 3.919000 std ...,14
3,ElasticNet,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","ElasticNet(alpha=0.02, l1_ratio=0.1, max_iter=10)",Performance,0.1,instcombine 18.175406 gvn ...,count 158.000000 mean 3.959691 std ...,14
4,RandomForest,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","(DecisionTreeRegressor(max_features=1.0, rando...",Performance,0.1,root NaN time_passes NaN ...,count 158.000000 mean 4.538527 std ...,14
...,...,...,...,...,...,...,...,...,...,...
58,Ridge,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",Ridge(),Performance,0.9,licm 13.535844 gvn ...,count 18.000000 mean 2.530435 std ...,14
59,ElasticNet,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","ElasticNet(alpha=0.3, l1_ratio=1.0, max_iter=5)",Performance,0.9,licm 18.470565 gvn ...,count 18.000000 mean 2.680206 std ...,14
60,RandomForest,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","(DecisionTreeRegressor(max_features=1.0, rando...",Performance,0.9,root NaN time_passes NaN ...,count 18.000000 mean 1.907190 std ...,14
61,GradientBoostingTree,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",([DecisionTreeRegressor(criterion='friedman_ms...,Performance,0.9,root NaN time_passes NaN ...,count 18.000000 mean 2.261609 std ...,14


henard
1.0
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
De

Unnamed: 0,algorithm_name,ft_dummyfication,origin_ft_selection,ft_selection,hyperparameters,size_target,test_size,coef_order,accuracy,num_features
0,LinearRegression,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",LinearRegression(),Performance,0.1,gvn 1.797019e+01 instcombine ...,count 158.000000 mean 1.949258 std ...,14
1,Lasso,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","Lasso(alpha=0.01, max_iter=10)",Performance,0.1,gvn 22.397539 instcombine ...,count 158.000000 mean 1.906405 std ...,14
2,Ridge,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",Ridge(alpha=0.05),Performance,0.1,gvn 17.752278 instcombine ...,count 158.000000 mean 1.913491 std ...,14
3,ElasticNet,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",ElasticNet(alpha=0.01),Performance,0.1,gvn 17.928110 instcombine ...,count 158.000000 mean 1.890358 std ...,14
4,RandomForest,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","(DecisionTreeRegressor(max_features=1.0, rando...",Performance,0.1,root NaN time_passes NaN ...,count 158.000000 mean 2.287677 std ...,14
...,...,...,...,...,...,...,...,...,...,...
58,Ridge,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",Ridge(alpha=0.3),Performance,0.9,gvn 18.876308 ipsccp ...,count 18.000000 mean 0.990442 std ...,14
59,ElasticNet,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","ElasticNet(alpha=0.05, l1_ratio=1.0, max_iter=...",Performance,0.9,gvn 22.505239 instcombine ...,count 18.000000 mean 1.049683 std ...,14
60,RandomForest,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","(DecisionTreeRegressor(max_features=1.0, rando...",Performance,0.9,root NaN time_passes NaN ...,count 18.000000 mean 1.002955 std ...,14
61,GradientBoostingTree,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",([DecisionTreeRegressor(criterion='friedman_ms...,Performance,0.9,root NaN time_passes NaN ...,count 18.000000 mean 0.967932 std ...,14


random
1.0
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
De

Unnamed: 0,algorithm_name,ft_dummyfication,origin_ft_selection,ft_selection,hyperparameters,size_target,test_size,coef_order,accuracy,num_features
0,LinearRegression,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",LinearRegression(),Performance,0.1,inline 2.123773e+01 iv_users ...,count 158.000000 mean 4.984741 std ...,14
1,Lasso,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","Lasso(alpha=2, max_iter=5)",Performance,0.1,inline 8.893818 #yes 4.856885 iv_u...,count 158.000000 mean 4.636160 std ...,14
2,Ridge,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",Ridge(alpha=4),Performance,0.1,inline 7.497593 #yes ...,count 158.000000 mean 4.141341 std ...,14
3,ElasticNet,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","ElasticNet(alpha=2, l1_ratio=1.0, max_iter=5)",Performance,0.1,inline 8.893818 #yes 4.856885 iv_u...,count 158.000000 mean 4.636160 std ...,14
4,RandomForest,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","(DecisionTreeRegressor(max_features=1.0, rando...",Performance,0.1,root NaN time_passes NaN ...,count 158.000000 mean 4.514760 std ...,14
...,...,...,...,...,...,...,...,...,...,...
58,Ridge,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",Ridge(alpha=0.8),Performance,0.9,gvn 9.782980 licm ...,count 18.000000 mean 2.721093 std ...,14
59,ElasticNet,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","ElasticNet(alpha=0.2, l1_ratio=1.0, max_iter=10)",Performance,0.9,gvn 13.201003 licm ...,count 18.000000 mean 2.822583 std ...,14
60,RandomForest,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","(DecisionTreeRegressor(max_features=1.0, rando...",Performance,0.9,root NaN time_passes NaN ...,count 18.000000 mean 1.820685 std ...,14
61,GradientBoostingTree,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",([DecisionTreeRegressor(criterion='friedman_ms...,Performance,0.9,root NaN time_passes NaN ...,count 18.000000 mean 2.148818 std ...,14


solverBased
1.0
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTr

Unnamed: 0,algorithm_name,ft_dummyfication,origin_ft_selection,ft_selection,hyperparameters,size_target,test_size,coef_order,accuracy,num_features
0,LinearRegression,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",LinearRegression(),Performance,0.1,ipsccp 1.963108e+01 licm ...,count 158.000000 mean 3.763552 std ...,14
1,Lasso,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","Lasso(alpha=0.3, max_iter=5)",Performance,0.1,inline 25.997052 licm ...,count 158.000000 mean 3.262582 std ...,14
2,Ridge,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",Ridge(alpha=2),Performance,0.1,inline 11.681712 iv_users ...,count 158.000000 mean 3.116029 std ...,14
3,ElasticNet,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","ElasticNet(alpha=0.3, l1_ratio=1.0, max_iter=5)",Performance,0.1,inline 25.997052 licm ...,count 158.000000 mean 3.262582 std ...,14
4,RandomForest,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","(DecisionTreeRegressor(max_features=1.0, rando...",Performance,0.1,root NaN time_passes NaN ...,count 158.000000 mean 4.516955 std ...,14
...,...,...,...,...,...,...,...,...,...,...
58,Ridge,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",Ridge(alpha=0.3),Performance,0.9,licm 13.233724 inline ...,count 18.000000 mean 2.360423 std ...,14
59,ElasticNet,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","ElasticNet(alpha=0.2, l1_ratio=1.0, max_iter=5)",Performance,0.9,licm 17.118936 inline ...,count 18.000000 mean 2.339768 std ...,14
60,RandomForest,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","(DecisionTreeRegressor(max_features=1.0, rando...",Performance,0.9,root NaN time_passes NaN ...,count 18.000000 mean 1.842857 std ...,14
61,GradientBoostingTree,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",([DecisionTreeRegressor(criterion='friedman_ms...,Performance,0.9,root NaN time_passes NaN ...,count 18.000000 mean 1.839568 std ...,14


twise
1.0
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
It doesn't scale
RandomForest
GradientBoostingTree
Dec

Unnamed: 0,algorithm_name,ft_dummyfication,origin_ft_selection,ft_selection,hyperparameters,size_target,test_size,coef_order,accuracy,num_features
0,LinearRegression,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",LinearRegression(),Performance,0.1,gvn 2.062010e+01 instcombine ...,count 149.000000 mean 2.361506 std ...,14
1,Lasso,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","Lasso(alpha=0.8, max_iter=1)",Performance,0.1,gvn 21.645667 instcombine ...,count 149.000000 mean 3.092050 std ...,14
2,Ridge,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",Ridge(),Performance,0.1,gvn 15.958548 ipsccp ...,count 149.000000 mean 2.696523 std ...,14
3,ElasticNet,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","ElasticNet(alpha=0.8, l1_ratio=1.0, max_iter=1)",Performance,0.1,gvn 21.645667 instcombine ...,count 149.000000 mean 3.092050 std ...,14
4,RandomForest,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","(DecisionTreeRegressor(max_features=1.0, rando...",Performance,0.1,root NaN time_passes NaN ...,count 149.000000 mean 3.249003 std ...,14
...,...,...,...,...,...,...,...,...,...,...
58,Ridge,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",Ridge(alpha=0.5),Performance,0.9,gvn 16.295644 licm ...,count 17.000000 mean 1.649282 std ...,14
59,ElasticNet,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","ElasticNet(alpha=0.01, l1_ratio=0.1, max_iter=5)",Performance,0.9,gvn 17.878785 licm ...,count 17.000000 mean 1.614323 std ...,14
60,RandomForest,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...","(DecisionTreeRegressor(max_features=1.0, rando...",Performance,0.9,root NaN time_passes NaN ...,count 17.000000 mean 0.958406 std ...,14
61,GradientBoostingTree,False,,"Index(['root', 'time_passes', 'gvn', 'instcomb...",([DecisionTreeRegressor(criterion='friedman_ms...,Performance,0.9,root NaN time_passes NaN ...,count 17.000000 mean 1.299567 std ...,14




-----------------------------------------------------------------------------

--------------- System LLVM training 

--------------- End time:: 2024-02-09 16:52:03.865636

-----------------------------------------------------------------------------


-----------------------------------------------------------------------------

--------------- System LLVM training WITH features importance 

--------------- Start time:: 2024-02-09 16:52:03.865706

-----------------------------------------------------------------------------
distBased
0.3
Deleted columns: 


['root', 'time_passes']

Sample features number: 4
IN FT_LOOP DF IMPORTANCE

LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
RandomForest
GradientBoostingTree
DecisionTree
LinearRegression
Lasso
Ridge
ElasticNet
PolynomialRegression
RandomForest
GradientBoostingTree
DecisionTree


In [None]:
df_result['accuracy'][0]

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

rs = []
for i in range(len(df_result_fi['accuracy'])):
    rs.append(df_result_fi['accuracy'][i]['mean'])

# rs
# display (df_result['algorithm_name'], df_result['accuracy'], rs)

plt.scatter(df_result_fi['algorithm_name'], rs, color ="blue", marker = "s")
plt.plot(df_result_fi['algorithm_name'], rs, color ="blue")


In [None]:
# Boxplot
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd


data = [model['LinearRegression'].coef_, model['Lasso'].coef_, model['Ridge'].coef_, model['ElasticNet'].coef_, model['PolynomialRegression'].coef_]
fig1, ax1 = plt.subplots()
ax1.set_title('Coeficient of feature importance for all algorithms')
ax1.boxplot(data)
plt.show()