In [1]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

In [3]:
from sklearn.linear_model import Lasso, Ridge, ElasticNet
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor, plot_tree

In [4]:
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures

In [5]:
import optuna


  from .autonotebook import tqdm as notebook_tqdm


In [6]:
%matplotlib inline

#### get data

In [13]:
fldr = "./motorinTrainData/"
def read_file(nn, fuel, ppb):
    fn = fldr+f"foms{nn}_{fuel}_{ppb}ppb.xlsx"
    return pd.read_excel(fn)


In [14]:
read

NameError: name 'fn' is not defined

In [7]:
sensor = [11, 17, 35,]
fuel = ["motorin"]
ppb = [0,300]

dfs = {}

for s in sensor:
    for f in fuel:
        for p in ppb:
            df = read_file(s,f, p)
            df["label"] = p/3
            dfs[(s,f,p)]=df

In [8]:
dfs

{(11,
  'motorin',
  0):                                          Background     CH1    CH2     CH3  \
 0                       0 ppb 303 VAS Mersin 030721   84935  41111   49694   
 1                       0 ppb 303 VAS Mersin 030721   85458  41356   50004   
 2                       0 ppb 303 VAS Mersin 030721   85699  41462   50064   
 3                     Marmara 905 BG 030821 Motorin  282739  79640  142468   
 4                     Marmara 905 BG 030821 Motorin  285348  80219  143706   
 5                     Marmara 905 BG 030821 Motorin  287267  80670  144530   
 6                      0 ppb Körfez BG T-309 300721  338272  87504  163090   
 7                      0 ppb Körfez BG T-309 300721  340234  87998  164038   
 8                      0 ppb Körfez BG T-309 300721  340554  88099  164197   
 9                          0 ppb S44 Körfez Motorin   90197  50847   58378   
 10                         0 ppb S44 Körfez Motorin   91121  51304   58871   
 11                         

#### get eval

In [23]:
fldr = "./yeni_test_data/"
def read_file_eval(nn, fuel):
    fn = fldr+f"foms{nn}_{fuel}.xlsx"
    return pd.read_excel(fn)
fuel_for_test = ["benzin", "ecomotorin","bioethbenzin","ultramotorin"]
sensor_for_test = [5,8,9,10,12,13,14,15,16,17,18,33,39,40,41,42,44,46,47,48,49]

In [24]:
df_evals = {}
for s in sensor_for_test:
    for f in fuel_for_test:
        df = read_file_eval(s,f)
        df_evals[(s,f)]=df
        # print(df.columns)
        # print()

FileNotFoundError: [Errno 2] No such file or directory: './yeni_test_data/foms5_ultramotorin.xlsx'

In [9]:
from sklearn.model_selection import KFold

In [10]:
best_params = {}
for s in sensor:
    for f in fuel:
        print(s,f)
        df1 = dfs[(s,f,0)][['CH1', 'CH2', 'CH3', 'CH4', 'label']]
        df2 = dfs[(s,f,300)][['CH1', 'CH2', 'CH3', 'CH4', 'label']]
        df0 = pd.concat([df1,df2], ignore_index=True)
        
        X_train = df0[['CH1', 'CH2', 'CH3', 'CH4']]
        y_train = df0['label']

        kf = KFold(n_splits=3, random_state=2**10, shuffle=True)
        ksplits = list(kf.split(X_train))

        def objective(trial):

            # Invoke suggest methods of a Trial object to generate hyperparameters.
            regressor_name = trial.suggest_categorical('regressor', ['ElasticNet'])

            #alpha = trial.suggest_float('alpha', 1e-3, 1e2, log=True)
            alpha = trial.suggest_float('alpha', 0.5, 2, log=False)
            #l1_ratio = trial.suggest_float('l1_ratio', 1e-3, 1e0, log=True)

            acc =[]
            for itrain, itest in ksplits:
                xtrain, ytrain = X_train.iloc[itrain], y_train.iloc[itrain]
                xtest, ytest = X_train.iloc[itest], y_train.iloc[itest]

                model1 = Pipeline(steps=[
                    ('scaler', StandardScaler()),
                    ('preprocessor', PolynomialFeatures(degree=2, include_bias=False)),
                    #('estimator', ElasticNet(alpha=alpha, l1_ratio=l1_ratio))
                    ('estimator', Lasso(alpha=alpha,max_iter=10000, fit_intercept=True))
                ])
                model1.fit(xtrain, ytrain)
                y_pred = model1.predict(xtest)
                acc.append((np.abs(y_pred-ytest)<=100.).mean() )

            return np.mean(acc)

        study = optuna.create_study(direction='maximize')  # Create a new study
        study.optimize(objective, n_trials=10)  # Invoke optimization of the objective function.
    
        best_params[(s,f)] = study.best_params

[I 2023-08-23 16:04:45,837] A new study created in memory with name: no-name-3bbf105f-79c0-4b80-bef1-c4080d27d243
[I 2023-08-23 16:04:45,854] Trial 0 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 0.8795031756849123}. Best is trial 0 with value: 1.0.
[I 2023-08-23 16:04:45,864] Trial 1 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 0.6438363924478085}. Best is trial 0 with value: 1.0.
[I 2023-08-23 16:04:45,874] Trial 2 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.1456228007377156}. Best is trial 0 with value: 1.0.
[I 2023-08-23 16:04:45,883] Trial 3 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 0.8611536015361327}. Best is trial 0 with value: 1.0.
[I 2023-08-23 16:04:45,900] Trial 4 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.665057547604643}. Best is trial 0 with value: 1.0.
[I 2023-08-23 16:04:45,907] Trial 5 finished wi

11 motorin
17 motorin
35 motorin


[I 2023-08-23 16:04:46,048] Trial 2 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.4784624984491674}. Best is trial 0 with value: 1.0.
[I 2023-08-23 16:04:46,062] Trial 3 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.1882703855277166}. Best is trial 0 with value: 1.0.
[I 2023-08-23 16:04:46,073] Trial 4 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.7676001553832057}. Best is trial 0 with value: 1.0.
[I 2023-08-23 16:04:46,088] Trial 5 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.0582146896304256}. Best is trial 0 with value: 1.0.
[I 2023-08-23 16:04:46,104] Trial 6 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 0.7884865153214115}. Best is trial 0 with value: 1.0.
[I 2023-08-23 16:04:46,113] Trial 7 finished with value: 1.0 and parameters: {'regressor': 'ElasticNet', 'alpha': 1.9797042420720663}. Best is trial 0 with valu

In [11]:
models_poly = {}
for s in sensor:
    for f in fuel:
        df1 = dfs[(s,f,0)][['CH1', 'CH2', 'CH3', 'CH4', 'label']]
        df2 = dfs[(s,f,300)][['CH1', 'CH2', 'CH3', 'CH4', 'label']]
        df0 = pd.concat([df1,df2], ignore_index=True)
        
        X_train = df0[['CH1', 'CH2', 'CH3', 'CH4']]
        y_train = df0['label']
        
        bp = best_params[s,f]
        model1 = Pipeline(steps=[
            ('scaler', StandardScaler()),
            ('preprocessor', PolynomialFeatures(degree=2, include_bias=False)),
            #('estimator', ElasticNet(alpha=bp['alpha'], l1_ratio=bp['l1_ratio']))
            #('estimator', Lasso(alpha=bp['alpha'],max_iter=10000, fit_intercept=True))
            ('estimator', Lasso(alpha=1,max_iter=10000, fit_intercept=True))
        ])
        
        model1.fit(X_train, y_train)
        models_poly[(s,f)] = model1

In [28]:
!conda install pickle

Collecting package metadata (current_repodata.json): done
Solving environment: unsuccessful initial attempt using frozen solve. Retrying with flexible solve.
Collecting package metadata (repodata.json): done
Solving environment: unsuccessful initial attempt using frozen solve. Retrying with flexible solve.

PackagesNotFoundError: The following packages are not available from current channels:

  - pickle

Current channels:

  - https://repo.anaconda.com/pkgs/main/osx-arm64
  - https://repo.anaconda.com/pkgs/main/noarch
  - https://repo.anaconda.com/pkgs/r/osx-arm64
  - https://repo.anaconda.com/pkgs/r/noarch

To search for alternate channels that may provide the conda package you're
looking for, navigate to

    https://anaconda.org

and use the search bar at the top of the page.




In [12]:
import pickle

# Modeli kaydetme
#model = model1
model = models_poly[(s,f)]  # Eğitilmiş model nesnesi
with open('modelOnlyMotorin.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)



In [17]:
rmse = {}
accuracy10 = {}
for s in sensor:
    for f in fuel:
        df0 = df_evals[(s,f)].copy()
        df0['pred'] = models_poly[(s,f)].predict(df0[['CH1', 'CH2', 'CH3', 'CH4']])
        df0['accuracy@10'] = np.abs(100 - df0['pred'])<10
        df0['error'] = (100 - df0['pred'])**2
        #
        accuracy10[(s,f)] = df0['accuracy@10'].mean()
        rmse[(s,f)] = df0['error'].mean()

In [18]:
print('\t \t accuracy \t rmse')
for s in sensor:
    for f in fuel:
            print("{}\t{}\t{:0.3f}\t\t{:0.3f}".format(s,f, accuracy10[(s,f)], rmse[(s,f) ]))
print('-------------')
print('Average: \t{:0.3f}\t\t{:0.3f}'.format(
        np.mean(list(accuracy10.values())),
        np.mean(list(rmse.values())),
))

	 	 accuracy 	 rmse
11	Benzin	0.756		118.119
11	motorin	0.962		57.701
17	Benzin	0.972		64.612
17	motorin	0.954		101.401
35	Benzin	0.555		323.966
35	motorin	0.953		133.062
-------------
Average: 	0.859		133.143


In [19]:
best_params

{(11, 'Benzin'): {'regressor': 'ElasticNet', 'alpha': 1.2789715432696211},
 (11, 'motorin'): {'regressor': 'ElasticNet', 'alpha': 1.708025030347188},
 (17, 'Benzin'): {'regressor': 'ElasticNet', 'alpha': 0.5924766116040947},
 (17, 'motorin'): {'regressor': 'ElasticNet', 'alpha': 0.6556546944803776},
 (35, 'Benzin'): {'regressor': 'ElasticNet', 'alpha': 1.8444334425070936},
 (35, 'motorin'): {'regressor': 'ElasticNet', 'alpha': 0.9805023647681786}}

#### get params

In [20]:
params = {}

for s in sensor:
    for f in fuel:
        pipe = models_poly[(s,f)]
        params2={}
        params2['scale'] = pipe.steps[0][1].scale_
        params2['mean'] = pipe.steps[0][1].mean_
        params2['poly'] = pipe.steps[2][1].coef_
        params2['intercept'] = pipe.steps[2][1].intercept_
        
        params[(s,f)] = params2

In [21]:
rmse = {}
accuracy10 = {}
for s in sensor:
    for f in fuel:
        df0 = df_evals[(s,f)].copy()
        
        x = df0[['CH1', 'CH2', 'CH3', 'CH4']].copy()
        
        params0 = params[(s,f)]
        # scale
        x['CH1'] = (x['CH1'] - params0['mean'][0])/params0['scale'][0]
        x['CH2'] = (x['CH2'] - params0['mean'][1])/params0['scale'][1]
        x['CH3'] = (x['CH3'] - params0['mean'][2])/params0['scale'][2]
        x['CH4'] = (x['CH4'] - params0['mean'][3])/params0['scale'][3]
        
        # polynomial
        pp=params0['poly']
        pred=[]
        for i,r in x.iterrows():
            result = params0['intercept']

            result += pp[0]*r['CH1']
            result += pp[1]*r['CH2']
            result += pp[2]*r['CH3']
            result += pp[3]*r['CH4']

            result += pp[4]*r['CH1']*r['CH1']
            result += pp[5]*r['CH1']*r['CH2']
            result += pp[6]*r['CH1']*r['CH3']
            result += pp[7]*r['CH1']*r['CH4']

            result += pp[8]*r['CH2']*r['CH2']
            result += pp[9]*r['CH2']*r['CH3']
            result += pp[10]*r['CH2']*r['CH4']

            result += pp[11]*r['CH3']*r['CH3']
            result += pp[12]*r['CH3']*r['CH4']

            result += pp[13]*r['CH4']*r['CH4']
            pred.append(result)        
        
        df0['pred'] = pred
        df0['accuracy@10'] = np.abs(100 - df0['pred'])<10
        df0['error'] = (100 - df0['pred'])**2
        # #
        accuracy10[(s,f)] = df0['accuracy@10'].mean()
        rmse[(s,f)] = df0['error'].mean()

In [22]:
print('\t \t accuracy \t rmse')
for s in sensor:
    for f in fuel:
            print("{}\t{}\t{:0.3f}\t\t{:0.3f}".format(s,f, accuracy10[(s,f)], rmse[(s,f) ]))
print('-------------')
print('Average: \t{:0.3f}\t\t{:0.3f}'.format(
        np.mean(list(accuracy10.values())),
        np.mean(list(rmse.values())),
))

	 	 accuracy 	 rmse
11	Benzin	0.756		118.119
11	motorin	0.962		57.701
17	Benzin	0.972		64.612
17	motorin	0.954		101.401
35	Benzin	0.555		323.966
35	motorin	0.953		133.062
-------------
Average: 	0.859		133.143
