In [1]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

In [3]:
from sklearn.linear_model import Lasso, Ridge, ElasticNet
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor, plot_tree

In [4]:
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures

In [5]:
import optuna

In [6]:
%matplotlib inline

#### get data

In [7]:
fldr = "./Temizlenmis data/"
def read_file(nn, fuel, ppb):
    fn = fldr+f"foms{nn}_{fuel}_{ppb}ppb.xlsx"
    return pd.read_excel(fn)


In [8]:
sensor = [11, 17, 35]
fuel = ["Benzin", "motorin"]
ppb = [0,300]

dfs = {}

for s in sensor:
    for f in fuel:
        for p in ppb:
            df = read_file(s,f, p)
            df["label"] = p/3
            dfs[(s,f,p)]=df

#### get eval

In [12]:
fldr = "./Temizlenmis data/"
def read_file_eval(nn, fuel):
    fn = fldr+f"foms{nn}_{fuel}_testdata.xlsx"
    return pd.read_excel(fn)

In [13]:
df_evals = {}
for s in sensor:
    for f in fuel:
        df = read_file_eval(s,f)
        df_evals[(s,f)]=df
        # print(df.columns)
        # print()

In [14]:
from sklearn.model_selection import KFold

In [12]:
best_params = {}
for s in sensor:
    for f in fuel:
        print(s,f)
        df1 = dfs[(s,f,0)][['CH1', 'CH2', 'CH3', 'CH4', 'label']]
        df2 = dfs[(s,f,300)][['CH1', 'CH2', 'CH3', 'CH4', 'label']]
        df0 = pd.concat([df1,df2], ignore_index=True)
        
        X_train = df0[['CH1', 'CH2', 'CH3', 'CH4']]
        y_train = df0['label']

        kf = KFold(n_splits=3, random_state=2**10, shuffle=True)
        ksplits = list(kf.split(X_train))

        def objective(trial):

            # Invoke suggest methods of a Trial object to generate hyperparameters.
            regressor_name = trial.suggest_categorical('regressor', ['ElasticNet'])

            #alpha = trial.suggest_float('alpha', 1e-3, 1e2, log=True)
            alpha = trial.suggest_float('alpha', 0.5, 2, log=False)
            #l1_ratio = trial.suggest_float('l1_ratio', 1e-3, 1e0, log=True)

            acc =[]
            for itrain, itest in ksplits:
                xtrain, ytrain = X_train.iloc[itrain], y_train.iloc[itrain]
                xtest, ytest = X_train.iloc[itest], y_train.iloc[itest]

                model1 = Pipeline(steps=[
                    ('scaler', StandardScaler()),
                    ('preprocessor', PolynomialFeatures(degree=2, include_bias=False)),
                    #('estimator', ElasticNet(alpha=alpha, l1_ratio=l1_ratio))
                    ('estimator', Lasso(alpha=alpha,max_iter=10000, fit_intercept=True))
                ])
                model1.fit(xtrain, ytrain)
                y_pred = model1.predict(xtest)
                acc.append((np.abs(y_pred-ytest)<=100.).mean() )

            return np.mean(acc)

        study = optuna.create_study(direction='maximize')  # Create a new study
        study.optimize(objective, n_trials=10)  # Invoke optimization of the objective function.
    
        best_params[(s,f)] = study.best_params

[I 2023-06-07 07:20:48,725] A new study created in memory with name: no-name-bc2374c8-df62-491b-b6ae-e8c2f0e6f00d
[I 2023-06-07 07:20:48,787] Trial 0 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 0.759957793948489}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:48,823] Trial 1 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 0.6676890601969279}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:48,858] Trial 2 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.2501849841106814}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:48,897] Trial 3 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.2238471161526594}. Best is trial 0 with value: 1.0.


11 Benzin


[I 2023-06-07 07:20:48,947] Trial 4 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 0.819360607131362}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:48,982] Trial 5 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.013871498423835}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:49,016] Trial 6 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.6361117692043667}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:49,054] Trial 7 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.0634150804409335}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:49,091] Trial 8 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.9818378620369206}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:49,124] Trial 9 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.3927136861280456}. Best is trial 0 with value:

11 motorin


[I 2023-06-07 07:20:49,334] Trial 5 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 0.7780826820491298}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:49,366] Trial 6 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 0.817606701253588}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:49,409] Trial 7 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.6313694316311693}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:49,437] Trial 8 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 0.591572942746247}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:49,471] Trial 9 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.455478585627511}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:49,476] A new study created in memory with name: no-name-8d7f2d69-5e95-4474-bba6-90480090854d
[I 2023-06-07 07:20:49,518] Trial 0 finished with

17 Benzin


[I 2023-06-07 07:20:49,709] Trial 5 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 0.6936057634136985}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:49,746] Trial 6 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 0.6762574849440358}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:49,776] Trial 7 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.7118256834886592}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:49,816] Trial 8 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.2835470801612647}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:49,853] Trial 9 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 0.9144821819936719}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:49,858] A new study created in memory with name: no-name-69e29f66-c320-461e-8aae-296d5c7e710c
[I 2023-06-07 07:20:49,889] Trial 0 finished w

17 motorin


[I 2023-06-07 07:20:50,062] Trial 6 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.2084814146215235}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:50,099] Trial 7 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 0.5452544356488096}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:50,132] Trial 8 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.6100028104851736}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:50,167] Trial 9 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.8537403917704647}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:50,173] A new study created in memory with name: no-name-bfb552b0-c5a8-4a78-af2e-c4a39ab879a2
[I 2023-06-07 07:20:50,204] Trial 0 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.6027022775312525}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:50,236] Trial 1 finished w

35 Benzin


[I 2023-06-07 07:20:50,386] Trial 6 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 0.8655278701340916}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:50,414] Trial 7 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.4807127774002204}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:50,448] Trial 8 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.7540430967578362}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:50,485] Trial 9 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 0.836610248697859}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:50,491] A new study created in memory with name: no-name-7e79d9f0-8cb4-4b8d-8018-74180e91fd2d
[I 2023-06-07 07:20:50,527] Trial 0 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 0.9779629722946237}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:50,567] Trial 1 finished wi

35 motorin


[I 2023-06-07 07:20:50,729] Trial 5 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 0.6443913244105789}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:50,783] Trial 6 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 0.826176894876852}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:50,816] Trial 7 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.688097690736226}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:50,847] Trial 8 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.8149996890001003}. Best is trial 0 with value: 1.0.
[I 2023-06-07 07:20:50,889] Trial 9 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 0.5381693917265773}. Best is trial 0 with value: 1.0.


In [13]:
models_poly = {}
for s in sensor:
    for f in fuel:
        df1 = dfs[(s,f,0)][['CH1', 'CH2', 'CH3', 'CH4', 'label']]
        df2 = dfs[(s,f,300)][['CH1', 'CH2', 'CH3', 'CH4', 'label']]
        df0 = pd.concat([df1,df2], ignore_index=True)
        
        X_train = df0[['CH1', 'CH2', 'CH3', 'CH4']]
        y_train = df0['label']
        
        bp = best_params[s,f]
        model1 = Pipeline(steps=[
            ('scaler', StandardScaler()),
            ('preprocessor', PolynomialFeatures(degree=2, include_bias=False)),
            #('estimator', ElasticNet(alpha=bp['alpha'], l1_ratio=bp['l1_ratio']))
            #('estimator', Lasso(alpha=bp['alpha'],max_iter=10000, fit_intercept=True))
            ('estimator', Lasso(alpha=1,max_iter=10000, fit_intercept=True))
        ])
        
        model1.fit(X_train, y_train)
        models_poly[(s,f)] = model1

In [14]:
rmse = {}
accuracy10 = {}
for s in sensor:
    for f in fuel:
        df0 = df_evals[(s,f)].copy()
        df0['pred'] = models_poly[(s,f)].predict(df0[['CH1', 'CH2', 'CH3', 'CH4']])
        df0['accuracy@10'] = np.abs(100 - df0['pred'])<10
        df0['error'] = (100 - df0['pred'])**2
        #
        accuracy10[(s,f)] = df0['accuracy@10'].mean()
        rmse[(s,f)] = df0['error'].mean()

In [15]:
print('\t \t accuracy \t rmse')
for s in sensor:
    for f in fuel:
            print("{}\t{}\t{:0.3f}\t\t{:0.3f}".format(s,f, accuracy10[(s,f)], rmse[(s,f) ]))
print('-------------')
print('Average: \t{:0.3f}\t\t{:0.3f}'.format(
        np.mean(list(accuracy10.values())),
        np.mean(list(rmse.values())),
))

	 	 accuracy 	 rmse
11	Benzin	0.756		118.119
11	motorin	0.962		57.701
17	Benzin	0.972		64.612
17	motorin	0.954		101.401
35	Benzin	0.555		323.966
35	motorin	0.953		133.062
-------------
Average: 	0.859		133.143


In [16]:
best_params

{(11, 'Benzin'): {'regressor': 'ElasticNet', 'alpha': 0.759957793948489},
 (11, 'motorin'): {'regressor': 'ElasticNet', 'alpha': 1.2289274205082639},
 (17, 'Benzin'): {'regressor': 'ElasticNet', 'alpha': 1.162196639771191},
 (17, 'motorin'): {'regressor': 'ElasticNet', 'alpha': 1.7759517653035872},
 (35, 'Benzin'): {'regressor': 'ElasticNet', 'alpha': 1.6027022775312525},
 (35, 'motorin'): {'regressor': 'ElasticNet', 'alpha': 0.9779629722946237}}

#### get params

In [17]:
params = {}

for s in sensor:
    for f in fuel:
        pipe = models_poly[(s,f)]
        params2={}
        params2['scale'] = pipe.steps[0][1].scale_
        params2['mean'] = pipe.steps[0][1].mean_
        params2['poly'] = pipe.steps[2][1].coef_
        params2['intercept'] = pipe.steps[2][1].intercept_
        
        params[(s,f)] = params2

In [24]:
rmse = {}
accuracy10 = {}
for s in sensor:
    for f in fuel:
        df0 = df_evals[(s,f)].copy()
        
        x = df0[['CH1', 'CH2', 'CH3', 'CH4']].copy()
        
        params0 = params[(s,f)]
        # scale
        x['CH1'] = (x['CH1'] - params0['mean'][0])/params0['scale'][0]
        x['CH2'] = (x['CH2'] - params0['mean'][1])/params0['scale'][1]
        x['CH3'] = (x['CH3'] - params0['mean'][2])/params0['scale'][2]
        x['CH4'] = (x['CH4'] - params0['mean'][3])/params0['scale'][3]
        
        # polynomial
        pp=params0['poly']
        pred=[]
        for i,r in x.iterrows():
            result = params0['intercept']

            result += pp[0]*r['CH1']
            result += pp[1]*r['CH2']
            result += pp[2]*r['CH3']
            result += pp[3]*r['CH4']

            result += pp[4]*r['CH1']*r['CH1']
            result += pp[5]*r['CH1']*r['CH2']
            result += pp[6]*r['CH1']*r['CH3']
            result += pp[7]*r['CH1']*r['CH4']

            result += pp[8]*r['CH2']*r['CH2']
            result += pp[9]*r['CH2']*r['CH3']
            result += pp[10]*r['CH2']*r['CH4']

            result += pp[11]*r['CH3']*r['CH3']
            result += pp[12]*r['CH3']*r['CH4']

            result += pp[13]*r['CH4']*r['CH4']
            pred.append(result)        
        
        df0['pred'] = pred
        df0['accuracy@10'] = np.abs(100 - df0['pred'])<10
        df0['error'] = (100 - df0['pred'])**2
        # #
        accuracy10[(s,f)] = df0['accuracy@10'].mean()
        rmse[(s,f)] = df0['error'].mean()

In [25]:
print('\t \t accuracy \t rmse')
for s in sensor:
    for f in fuel:
            print("{}\t{}\t{:0.3f}\t\t{:0.3f}".format(s,f, accuracy10[(s,f)], rmse[(s,f) ]))
print('-------------')
print('Average: \t{:0.3f}\t\t{:0.3f}'.format(
        np.mean(list(accuracy10.values())),
        np.mean(list(rmse.values())),
))

	 	 accuracy 	 rmse
11	Benzin	0.756		118.119
11	motorin	0.962		57.701
17	Benzin	0.972		64.612
17	motorin	0.954		101.401
35	Benzin	0.555		323.966
35	motorin	0.953		133.062
-------------
Average: 	0.859		133.143
