<a href="https://colab.research.google.com/github/arthursl12/POC1/blob/main/POC1_Turbofan_FD002_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [2]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import glob

In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import SGDRegressor
from sklearn.svm import LinearSVR

from sklearn.pipeline import Pipeline
from sklearn.compose import TransformedTargetRegressor
from sklearn.exceptions import ConvergenceWarning

from sklearn.feature_selection import SelectFromModel
from sklearn.tree import DecisionTreeRegressor

from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

In [4]:
sns.set_palette('colorblind')

# Data Preparation

In [5]:
# Dataset Download 
os.system('git clone https://github.com/arthursl12/dataset_2')
os.system('mv /content/dataset_2/CMaps /content/CMaps')
os.system('mv /content/dataset_2/data_processing /content/data_processing')
os.system('rm -rf dataset_2')

0

In [6]:
from data_processing.processing import DatasetProcessing
from data_processing.training import HyperparameterSearch, reclipper_scorer
from data_processing.eval import Evaluation

In [7]:
proc = DatasetProcessing()

## Data Integration

The data are provided as a zip-compressed text file with 26 columns of numbers, separated by spaces. Each row is a snapshot of data taken during a single operational cycle, each column is a different variable. The columns correspond to:  

1) unit number   
2) time, in cycles  
3) operational setting 1  
4) operational setting 2  
5) operational setting 3    
6) sensor measurement 1    
7) sensor measurement 2  
...  
26) sensor measurement 20


There are 6 conditions (or combinations) which the 3 operational settings can take.  
Condition 1: Altitude = 0, Mach Number = 0, TRA = 100  
Condition 2: Altitude = 10, Mach Number = 0.25, TRA = 100  
Condition 3: Altitude = 20, Mach Number = 0.7 TRA = 100  
Condition 4: Altitude = 25, Mach Number = 0.62, TRA = 60  
Condition 5: Altitude = 35 Mach Number = 0.84, TRA = 100  
Condition 6: Altitude = 42, Mach Number = 0.84, TRA = 100  
  
There is slight variation in all these conditions so you may get numbers like 24.453 instead of 25 exactly.

FD001: Condition 1 only  
FD002: Mix of all the conditions  
FD003: Condition 1 only  
FD004: Mix of all conditions  


In [8]:
index_cols, settings_cols, sensors_cols, cols = proc.column_names()
train, test, y_test = proc.read_dataset(2)
train

Unnamed: 0,unit_number,time,op_1,op_2,op_3,s_0,s_1,s_2,s_3,s_4,...,s_11,s_12,s_13,s_14,s_15,s_16,s_17,s_18,s_19,s_20
0,1,1,34.9983,0.8400,100.0,449.44,555.32,1358.61,1137.23,5.48,...,183.06,2387.72,8048.56,9.3461,0.02,334,2223,100.00,14.73,8.8071
1,1,2,41.9982,0.8408,100.0,445.00,549.90,1353.22,1125.78,3.91,...,130.42,2387.66,8072.30,9.3774,0.02,330,2212,100.00,10.41,6.2665
2,1,3,24.9988,0.6218,60.0,462.54,537.31,1256.76,1047.45,7.05,...,164.22,2028.03,7864.87,10.8941,0.02,309,1915,84.93,14.08,8.6723
3,1,4,42.0077,0.8416,100.0,445.00,549.51,1354.03,1126.38,3.91,...,130.72,2387.61,8068.66,9.3528,0.02,329,2212,100.00,10.59,6.4701
4,1,5,25.0005,0.6203,60.0,462.54,537.07,1257.71,1047.93,7.05,...,164.31,2028.00,7861.23,10.8963,0.02,309,1915,84.93,14.13,8.5286
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53754,260,312,20.0037,0.7000,100.0,491.19,608.79,1495.60,1269.51,9.35,...,314.05,2389.02,8169.64,9.3035,0.03,369,2324,100.00,24.36,14.5189
53755,260,313,10.0022,0.2510,100.0,489.05,605.81,1514.32,1324.12,10.52,...,371.22,2388.42,8245.36,8.7586,0.03,374,2319,100.00,28.10,16.9454
53756,260,314,25.0041,0.6200,60.0,462.54,537.48,1276.24,1057.92,7.05,...,163.74,2030.33,7971.25,11.0657,0.02,310,1915,84.93,14.19,8.5503
53757,260,315,25.0033,0.6220,60.0,462.54,537.84,1272.95,1066.30,7.05,...,164.37,2030.35,7972.47,11.0537,0.02,311,1915,84.93,14.05,8.3729


## Preprocessing

### Test Set Transformation 
Test set has samples for all cycles, but has annotations only for last one

In [9]:
test.shape, y_test.shape

((33991, 26), (259, 1))

In [10]:
test_last = proc.transform_test_keep_setting(test)
test_last.head()

Unnamed: 0,op_1,op_2,op_3,s_0,s_1,s_2,s_3,s_4,s_5,s_6,...,s_11,s_12,s_13,s_14,s_15,s_16,s_17,s_18,s_19,s_20
0,10.0076,0.2501,100.0,489.05,605.42,1515.0,1325.07,10.52,15.5,393.58,...,370.87,2388.32,8167.06,8.7456,0.03,371,2319,100.0,28.3,17.0934
1,0.0018,0.0,100.0,518.67,642.67,1591.67,1418.17,14.62,21.61,553.36,...,521.1,2388.12,8138.12,8.4248,0.03,393,2388,100.0,38.82,23.3463
2,35.0015,0.8412,100.0,449.44,555.86,1370.62,1135.59,5.48,8.0,194.58,...,183.11,2388.07,8071.23,9.3094,0.02,332,2223,100.0,14.75,8.9589
3,20.0032,0.7,100.0,491.19,607.99,1487.94,1257.49,9.35,13.66,334.39,...,314.88,2388.12,8062.39,9.2349,0.02,365,2324,100.0,24.22,14.6814
4,42.0055,0.84,100.0,445.0,550.81,1358.95,1140.34,3.91,5.72,138.42,...,130.82,2389.06,8140.94,9.3964,0.02,333,2212,100.0,10.34,6.3601


In [11]:
X_test = test_last

### Remaining Useful Life (RUL)

In [12]:
train = proc.add_remaining_useful_life_linear(train)
train[index_cols+['RUL']].head()

Unnamed: 0,unit_number,time,RUL
0,1,1,148
1,1,2,147
2,1,3,146
3,1,4,145
4,1,5,144


## Attributes and target separation

In [13]:
X_train, y_train = proc.X_y_train_divide_with_settings(train)

In [14]:
y_train.head()

Unnamed: 0,RUL
0,148
1,147
2,146
3,145
4,144


In [15]:
X_train.head()

Unnamed: 0,op_1,op_2,op_3,s_0,s_1,s_2,s_3,s_4,s_5,s_6,...,s_11,s_12,s_13,s_14,s_15,s_16,s_17,s_18,s_19,s_20
0,34.9983,0.84,100.0,449.44,555.32,1358.61,1137.23,5.48,8.0,194.64,...,183.06,2387.72,8048.56,9.3461,0.02,334,2223,100.0,14.73,8.8071
1,41.9982,0.8408,100.0,445.0,549.9,1353.22,1125.78,3.91,5.71,138.51,...,130.42,2387.66,8072.3,9.3774,0.02,330,2212,100.0,10.41,6.2665
2,24.9988,0.6218,60.0,462.54,537.31,1256.76,1047.45,7.05,9.02,175.71,...,164.22,2028.03,7864.87,10.8941,0.02,309,1915,84.93,14.08,8.6723
3,42.0077,0.8416,100.0,445.0,549.51,1354.03,1126.38,3.91,5.71,138.46,...,130.72,2387.61,8068.66,9.3528,0.02,329,2212,100.0,10.59,6.4701
4,25.0005,0.6203,60.0,462.54,537.07,1257.71,1047.93,7.05,9.03,175.05,...,164.31,2028.0,7861.23,10.8963,0.02,309,1915,84.93,14.13,8.5286


## Training and Evaluation functions

In [16]:
eval = Evaluation()

In [17]:
search = HyperparameterSearch()

# LinearRegression

## Linear RUL

In [18]:
model = LinearRegression()

In [19]:
model.fit(X_train, y_train)
eval.show_result(y_train, model.predict(X_train))
eval.show_result_cv(y_train, X_train, model)

R2=0.576,RMSE=-45.030
(CV) R2=0.570,RMSE=-45.237


In [20]:
eval.show_result(y_test, model.predict(X_test))

R2=0.602,RMSE=-33.943


## Non-linear RUL

In [21]:
y_train.clip(upper=189).head()

Unnamed: 0,RUL
0,148
1,147
2,146
3,145
4,144


In [22]:
model = Pipeline([
    ('trf_reg' ,TransformedTargetRegressor(
        check_inverse=False,
        regressor   = LinearRegression(),
        transformer = FunctionTransformer(np.clip, 
                                          kw_args={'a_min':0,'a_max':50})))
    ])

In [23]:
GRID_SEARCH = True
if (GRID_SEARCH):
    param_distributions = {
        "trf_reg__transformer__kw_args": search.generate_clip_dicts(80,150,1)
    }
    model = search.run_HR_GS(model, X_train, y_train, param_distributions)
    print(model)

n_iterations: 4
n_required_iterations: 4
n_possible_iterations: 5
min_resources_: 500
max_resources_: 53759
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 71
n_resources: 500
Fitting 5 folds for each of 71 candidates, totalling 355 fits




----------
iter: 1
n_candidates: 24
n_resources: 1500
Fitting 5 folds for each of 24 candidates, totalling 120 fits
----------
iter: 2
n_candidates: 8
n_resources: 4500
Fitting 5 folds for each of 8 candidates, totalling 40 fits
----------
iter: 3
n_candidates: 3
n_resources: 13500
Fitting 5 folds for each of 3 candidates, totalling 15 fits
Best params:  {'trf_reg__transformer__kw_args': {'a_min': 0, 'a_max': 104}}
Pipeline(steps=[('trf_reg',
                 TransformedTargetRegressor(check_inverse=False,
                                            regressor=LinearRegression(),
                                            transformer=FunctionTransformer(func=<function clip at 0x7f9b8c052c20>,
                                                                            kw_args={'a_max': 104,
                                                                                     'a_min': 0})))])


Best Model:

```
{'trf_reg__transformer__kw_args': {'a_min': 0, 'a_max': 104}}
```



In [24]:
model = Pipeline([
    ('trf_reg' ,TransformedTargetRegressor(
        check_inverse=False,
        regressor   = LinearRegression(),
        transformer = FunctionTransformer(np.clip, 
                                          kw_args={'a_min':0,'a_max':104})))
    ])
model

Pipeline(steps=[('trf_reg',
                 TransformedTargetRegressor(check_inverse=False,
                                            regressor=LinearRegression(),
                                            transformer=FunctionTransformer(func=<function clip at 0x7f9b8c052c20>,
                                                                            kw_args={'a_max': 104,
                                                                                     'a_min': 0})))])

In [25]:
model.fit(X_train, y_train)
reclipped_y =  model['trf_reg'].transformer.transform(y_train)
eval.show_result(reclipped_y, model.predict(X_train))
eval.show_result_cv(reclipped_y, X_train, model)

R2=0.740,RMSE=-17.241
(CV) R2=0.737,RMSE=-17.341


In [26]:
reclipped_y =  model['trf_reg'].transformer.transform(y_test)
eval.show_result(reclipped_y, model.predict(X_test))

R2=0.767,RMSE=-17.697


Just adding a non-linear improved results, as in the first set

## PolyFeatures + Linear RUL

In [27]:
model = Pipeline([
    ('poly_ft'  ,   PolynomialFeatures()),
    ('lin_reg'  ,   LinearRegression())
])
model

Pipeline(steps=[('poly_ft', PolynomialFeatures()),
                ('lin_reg', LinearRegression())])

In [28]:
GRID_SEARCH = True
if (GRID_SEARCH):
    param_distributions = {
        "poly_ft__degree": [1,2,3],
        "poly_ft__interaction_only": [False, True],
        "poly_ft__include_bias": [True, False]
    }
    model = search.run_HR_GS(model, X_train, y_train, 
                            param_distributions, scorer='r2')
    print(model)

n_iterations: 3
n_required_iterations: 3
n_possible_iterations: 5
min_resources_: 500
max_resources_: 53759
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 12
n_resources: 500
Fitting 5 folds for each of 12 candidates, totalling 60 fits




----------
iter: 1
n_candidates: 4
n_resources: 1500
Fitting 5 folds for each of 4 candidates, totalling 20 fits
----------
iter: 2
n_candidates: 2
n_resources: 4500
Fitting 5 folds for each of 2 candidates, totalling 10 fits
Best params:  {'poly_ft__interaction_only': False, 'poly_ft__include_bias': False, 'poly_ft__degree': 1}
Pipeline(steps=[('poly_ft', PolynomialFeatures(degree=1, include_bias=False)),
                ('lin_reg', LinearRegression())])


Best Model:
```
PolynomialFeatures(degree=1, include_bias=False)
```


In [29]:
model = Pipeline([
    ('poly_ft'  ,   PolynomialFeatures(degree=1, include_bias=False)),
    ('lin_reg'  ,   LinearRegression())
])
model

Pipeline(steps=[('poly_ft', PolynomialFeatures(degree=1, include_bias=False)),
                ('lin_reg', LinearRegression())])

In [30]:
model.fit(X_train, y_train)
eval.show_result(y_train, model.predict(X_train))
eval.show_result_cv(y_train, X_train, model)

R2=0.576,RMSE=-45.030
(CV) R2=0.570,RMSE=-45.237


In [31]:
eval.show_result(y_test, model.predict(X_test))

R2=0.602,RMSE=-33.943


No clear benefit from polynomial features in linear RUL for simple linear regression. Actually, the degree stayed in 1, i.e., without those features.

## PolyFeatures + Non-Linear RUL

In [32]:
model = Pipeline([
    ('poly_ft'  ,   PolynomialFeatures()),
    ('trf_reg' ,TransformedTargetRegressor(
        check_inverse=False,
        regressor   = LinearRegression(),
        transformer = FunctionTransformer(np.clip, 
                                          kw_args={'a_min':0,'a_max':96})))
])

In [33]:
GRID_SEARCH = True
if (GRID_SEARCH):
    param_distributions = {
        "poly_ft__degree": [1,2,3],
        "poly_ft__interaction_only": [False, True],
        "poly_ft__include_bias": [True, False],
        "trf_reg__transformer__kw_args": search.generate_clip_dicts(70,150,1)
    }
    model = search.run_HR_GS(model, X_train, y_train, param_distributions)
    print(model)

n_iterations: 5
n_required_iterations: 5
n_possible_iterations: 5
min_resources_: 500
max_resources_: 53759
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 107
n_resources: 500
Fitting 5 folds for each of 107 candidates, totalling 535 fits
----------
iter: 1
n_candidates: 36
n_resources: 1500
Fitting 5 folds for each of 36 candidates, totalling 180 fits
----------
iter: 2
n_candidates: 12
n_resources: 4500
Fitting 5 folds for each of 12 candidates, totalling 60 fits
----------
iter: 3
n_candidates: 4
n_resources: 13500
Fitting 5 folds for each of 4 candidates, totalling 20 fits
----------
iter: 4
n_candidates: 2
n_resources: 40500
Fitting 5 folds for each of 2 candidates, totalling 10 fits
Best params:  {'trf_reg__transformer__kw_args': {'a_min': 0, 'a_max': 96}, 'poly_ft__interaction_only': True, 'poly_ft__include_bias': True, 'poly_ft__degree': 1}
Pipeline(steps=[('poly_ft',
                 PolynomialFeatures(degree=1, interaction_only=True)),
              

Best Model:
```
{'trf_reg__transformer__kw_args': {'a_min': 0, 'a_max': 96}, 
PolynomialFeatures(degree=1, interaction_only=True)),
```


In [34]:
model = Pipeline([
    ('poly_ft'  ,   PolynomialFeatures(degree=1, interaction_only=True)),
    ('trf_reg' ,TransformedTargetRegressor(
        check_inverse=False,
        regressor   = LinearRegression(),
        transformer = FunctionTransformer(np.clip, 
                                          kw_args={'a_min':0,'a_max':96})))
])
model

Pipeline(steps=[('poly_ft',
                 PolynomialFeatures(degree=1, interaction_only=True)),
                ('trf_reg',
                 TransformedTargetRegressor(check_inverse=False,
                                            regressor=LinearRegression(),
                                            transformer=FunctionTransformer(func=<function clip at 0x7f9b8c052c20>,
                                                                            kw_args={'a_max': 96,
                                                                                     'a_min': 0})))])

In [35]:
model.fit(X_train, y_train)
reclipped_y =  model['trf_reg'].transformer.transform(y_train)
eval.show_result(reclipped_y, model.predict(X_train))
eval.show_result_cv(reclipped_y, X_train, model)

R2=0.742,RMSE=-15.587
(CV) R2=0.739,RMSE=-15.680


In [36]:
reclipped_y =  model['trf_reg'].transformer.transform(y_test)
eval.show_result(reclipped_y, model.predict(X_test))

R2=0.766,RMSE=-16.445


There was a slight increase in performance when comparing to the scenario without the polynomial features.

# SGD Regressor

The class SGDRegressor implements a plain stochastic gradient descent learning routine which supports different loss functions and penalties to fit linear regression models. **SGDRegressor is well suited for regression problems with a large number of training samples (> 10.000)**, for other problems we recommend Ridge, Lasso, or ElasticNet.

## Linear RUL

In [37]:
model = Pipeline([
    ('scaler'   ,   StandardScaler()),
    ('lin_reg'  ,   SGDRegressor())
])

In [38]:
# We need specific shape 1D arrays for this model
y_train = np.array(y_train).ravel()

In [39]:
# ~4min
GRID_SEARCH = False
if (GRID_SEARCH):
    param_distributions = {
        "lin_reg__loss": ['squared_error', 'huber', 'epsilon_insensitive'],
        "lin_reg__penalty": ['l2', 'l1', 'elasticnet'],
        "lin_reg__epsilon": [1.1,1.35,1.5,2,2.5,3,3.5,4,4.5,5.5,6],
        "lin_reg__alpha":[1e-2,1e-3,1e-4,1e-5,1e-6,1e-7],
        "lin_reg__max_iter": [1000,1500,2000,2500,3000],
        "lin_reg__tol": [1e-3,1e-4,1e-5,1e-6,1e-7],
        "lin_reg__learning_rate":['invscaling','constant','optimal','adaptive'],
        "lin_reg__eta0": [1e-1,1e-2,1e-3,1e-4,1e-5,1e-6],
        "lin_reg__power_t": [1.25,1,0.75,0.5,0.25,0.1],
        "lin_reg__early_stopping": [True,False],
        "lin_reg__validation_fraction": [0.1,0.2,0.3,0.4,0.5,0.6],
        "lin_reg__n_iter_no_change": [1,2,5,8,10,15,20],
        "lin_reg__average": [False, 1,2,5,10,20]
    }
    model = search.run_HR_GS(model, X_train, y_train, param_distributions, 
                            scorer='r2', ignore_warnings=True)
    print(model)

n_iterations: 5
n_required_iterations: 5
n_possible_iterations: 5
min_resources_: 500
max_resources_: 53759
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 107
n_resources: 500
Fitting 5 folds for each of 107 candidates, totalling 535 fits
----------
iter: 1
n_candidates: 36
n_resources: 1500
Fitting 5 folds for each of 36 candidates, totalling 180 fits
----------
iter: 2
n_candidates: 12
n_resources: 4500
Fitting 5 folds for each of 12 candidates, totalling 60 fits
----------
iter: 3
n_candidates: 4
n_resources: 13500
Fitting 5 folds for each of 4 candidates, totalling 20 fits
----------
iter: 4
n_candidates: 2
n_resources: 40500
Fitting 5 folds for each of 2 candidates, totalling 10 fits
Best params:  {'lin_reg__validation_fraction': 0.5, 'lin_reg__tol': 0.001, 'lin_reg__power_t': 0.5, 'lin_reg__penalty': 'elasticnet', 'lin_reg__n_iter_no_change': 20, 'lin_reg__max_iter': 3000, 'lin_reg__loss': 'epsilon_insensitive', 'lin_reg__learning_rate': 'optimal', 'lin_

Best Model:
```
SGDRegressor(alpha=1e-06, average=2, early_stopping=True,
                epsilon=3.5, eta0=1e-06, learning_rate='optimal',
                loss='epsilon_insensitive', max_iter=3000,
                n_iter_no_change=20, penalty='elasticnet',
                power_t=0.5, validation_fraction=0.5))
```

In [40]:
model = Pipeline([
    ('scaler'   ,   StandardScaler()),
    ('lin_reg'  ,   SGDRegressor(alpha=1e-06, average=2, early_stopping=True,
                              epsilon=3.5, eta0=1e-06, learning_rate='optimal',
                              loss='epsilon_insensitive', max_iter=3000,
                              n_iter_no_change=20, penalty='elasticnet',
                              power_t=0.5, validation_fraction=0.5))
])
model

Pipeline(steps=[('scaler', StandardScaler()),
                ('lin_reg',
                 SGDRegressor(alpha=1e-06, average=2, early_stopping=True,
                              epsilon=3.5, eta0=1e-06, learning_rate='optimal',
                              loss='epsilon_insensitive', max_iter=3000,
                              n_iter_no_change=20, penalty='elasticnet',
                              power_t=0.5, validation_fraction=0.5))])

In [41]:
model.fit(X_train, y_train)
eval.show_result(y_train, model.predict(X_train))
eval.show_result_cv(y_train, X_train, model)

R2=0.566,RMSE=-45.563
(CV) R2=0.563,RMSE=-45.722


In [42]:
X_test = test_last
eval.show_result(y_test, model.predict(X_test))

R2=0.632,RMSE=-32.617


Poor results without non-lineariy of RUL. Comparable to regular LinearRegressor.

## Non-linear RUL

In [43]:
model = Pipeline([
    ('scaler'   ,   StandardScaler()),
    ('trf_reg' ,TransformedTargetRegressor(
        check_inverse=False,
        regressor   = SGDRegressor(),
        transformer = FunctionTransformer(np.clip, 
                                          kw_args={'a_min':0,'a_max':50})))
    ])

In [44]:
GRID_SEARCH = False
if (GRID_SEARCH):
    param_distributions = {
        "trf_reg__transformer__kw_args": search.generate_clip_dicts(80,150,1),
        "trf_reg__regressor__loss": ['squared_error', 'huber', 'epsilon_insensitive'],
        "trf_reg__regressor__penalty": ['l2', 'l1', 'elasticnet'],
        "trf_reg__regressor__epsilon": [1.1,1.35,1.5,2,2.5,3,3.5,4,4.5,5.5,6],
        "trf_reg__regressor__alpha":[1e-2,1e-3,1e-4,1e-5,1e-6,1e-7],
        "trf_reg__regressor__max_iter": [1000,1500,2000,2500,3000],
        "trf_reg__regressor__tol": [1e-3,1e-4,1e-5,1e-6,1e-7],
        "trf_reg__regressor__learning_rate":['invscaling','constant','optimal','adaptive'],
        "trf_reg__regressor__eta0": [1e-1,1e-2,1e-3,1e-4,1e-5,1e-6],
        "trf_reg__regressor__power_t": [1.25,1,0.75,0.5,0.25,0.1],
        "trf_reg__regressor__early_stopping": [True,False],
        "trf_reg__regressor__validation_fraction": [0.1,0.2,0.3,0.4,0.5,0.6],
        "trf_reg__regressor__n_iter_no_change": [1,2,5,8,10,15,20],
        "trf_reg__regressor__average": [False, 1,2,5,10,20]
    }
    model = search.run_HR_GS(model, X_train, y_train, 
                            param_distributions, ignore_warnings=True)
    print(model)

n_iterations: 5
n_required_iterations: 5
n_possible_iterations: 5
min_resources_: 500
max_resources_: 53759
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 107
n_resources: 500
Fitting 5 folds for each of 107 candidates, totalling 535 fits
----------
iter: 1
n_candidates: 36
n_resources: 1500
Fitting 5 folds for each of 36 candidates, totalling 180 fits
----------
iter: 2
n_candidates: 12
n_resources: 4500
Fitting 5 folds for each of 12 candidates, totalling 60 fits
----------
iter: 3
n_candidates: 4
n_resources: 13500
Fitting 5 folds for each of 4 candidates, totalling 20 fits
----------
iter: 4
n_candidates: 2
n_resources: 40500
Fitting 5 folds for each of 2 candidates, totalling 10 fits
Best params:  {'trf_reg__transformer__kw_args': {'a_min': 0, 'a_max': 107}, 'trf_reg__regressor__validation_fraction': 0.6, 'trf_reg__regressor__tol': 0.001, 'trf_reg__regressor__power_t': 0.1, 'trf_reg__regressor__penalty': 'l1', 'trf_reg__regressor__n_iter_no_change': 10, '

Best Model:
```
kw_args={'a_max': 107,'a_min': 0}))

SGDRegressor(epsilon=6,
            eta0=0.0001,
            learning_rate='constant',
            n_iter_no_change=10,
            penalty='l1',
            power_t=0.1,
            validation_fraction=0.6),
```

In [45]:
model = Pipeline([
    ('scaler'   ,   StandardScaler()),
    ('trf_reg' ,TransformedTargetRegressor(
        check_inverse=False,
        regressor=SGDRegressor(epsilon=6,
                                eta0=0.0001,
                                learning_rate='constant',
                                n_iter_no_change=10,
                                penalty='l1',
                                power_t=0.1,
                                validation_fraction=0.6),
        transformer = FunctionTransformer(np.clip, 
                                          kw_args={'a_min':0,'a_max':107})))
    ])
model

Pipeline(steps=[('scaler', StandardScaler()),
                ('trf_reg',
                 TransformedTargetRegressor(check_inverse=False,
                                            regressor=SGDRegressor(epsilon=6,
                                                                   eta0=0.0001,
                                                                   learning_rate='constant',
                                                                   n_iter_no_change=10,
                                                                   penalty='l1',
                                                                   power_t=0.1,
                                                                   validation_fraction=0.6),
                                            transformer=FunctionTransformer(func=<function clip at 0x7f9b8c052c20>,
                                                                            kw_args={'a_max': 107,
                                                     

In [46]:
model.fit(X_train, y_train)
reclipped_y =  model['trf_reg'].transformer.transform(y_train)
eval.show_result(reclipped_y, model.predict(X_train))
eval.show_result_cv(reclipped_y, X_train, model)

R2=0.727,RMSE=-18.257
(CV) R2=0.724,RMSE=-18.342


In [47]:
reclipped_y =  model['trf_reg'].transformer.transform(y_test)
eval.show_result(reclipped_y, model.predict(X_test))

R2=0.764,RMSE=-18.286


Achieved training results from non-linear RUL Linear Regressor, but with a little better test result. However the LinearRegressor had a clipping of 121 instead of 104. Same model as FD003.

## PolyFeatures + Linear RUL

In [48]:
model = Pipeline([
    ('poly_ft'  ,   PolynomialFeatures()),
    ('scaler'   ,   StandardScaler()),
    ('lin_reg'  ,   SGDRegressor())
])

In [49]:
# ~30min
GRID_SEARCH = False
if (GRID_SEARCH):
    param_distributions = {
        "poly_ft__degree": [1,2,3],
        "poly_ft__interaction_only": [False, True],
        "poly_ft__include_bias": [True, False],
        "lin_reg__loss": ['squared_error', 'huber', 'epsilon_insensitive'],
        "lin_reg__penalty": ['l2', 'l1', 'elasticnet'],
        "lin_reg__epsilon": [1.1,1.35,1.5,2,2.5,3,3.5,4,4.5,5.5,6],
        "lin_reg__alpha":[1e-2,1e-3,1e-4,1e-5,1e-6,1e-7],
        "lin_reg__max_iter": [1000,1500,2000,2500,3000],
        "lin_reg__tol": [1e-3,1e-4,1e-5,1e-6,1e-7],
        "lin_reg__learning_rate":['invscaling','constant','optimal','adaptive'],
        "lin_reg__eta0": [1e-1,1e-2,1e-3,1e-4,1e-5,1e-6],
        "lin_reg__power_t": [1.25,1,0.75,0.5,0.25,0.1],
        "lin_reg__early_stopping": [True,False],
        "lin_reg__validation_fraction": [0.1,0.2,0.3,0.4,0.5,0.6],
        "lin_reg__n_iter_no_change": [1,2,5,8,10,15,20],
        "lin_reg__average": [False, 1,2,5,10,20]
    }
    model = search.run_HR_GS(model, X_train, y_train, param_distributions,
                            ignore_warnings=True, scorer='r2')
    print(model)

n_iterations: 5
n_required_iterations: 5
n_possible_iterations: 5
min_resources_: 500
max_resources_: 53759
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 107
n_resources: 500
Fitting 5 folds for each of 107 candidates, totalling 535 fits
----------
iter: 1
n_candidates: 36
n_resources: 1500
Fitting 5 folds for each of 36 candidates, totalling 180 fits
----------
iter: 2
n_candidates: 12
n_resources: 4500
Fitting 5 folds for each of 12 candidates, totalling 60 fits
----------
iter: 3
n_candidates: 4
n_resources: 13500
Fitting 5 folds for each of 4 candidates, totalling 20 fits
----------
iter: 4
n_candidates: 2
n_resources: 40500
Fitting 5 folds for each of 2 candidates, totalling 10 fits
Best params:  {'poly_ft__interaction_only': False, 'poly_ft__include_bias': False, 'poly_ft__degree': 3, 'lin_reg__validation_fraction': 0.1, 'lin_reg__tol': 0.0001, 'lin_reg__power_t': 0.25, 'lin_reg__penalty': 'elasticnet', 'lin_reg__n_iter_no_change': 10, 'lin_reg__max_ite

Best Model:
```
Pipeline(steps=[('poly_ft', PolynomialFeatures(degree=3, include_bias=False)),
                ('scaler', StandardScaler()),
                ('lin_reg',
                 SGDRegressor(alpha=1e-06, average=1, epsilon=6, eta0=0.0001,
                              learning_rate='adaptive', max_iter=2000,
                              n_iter_no_change=10, penalty='elasticnet',
                              tol=0.0001)
```


In [50]:
model = Pipeline([
    ('poly_ft', PolynomialFeatures(degree=3, include_bias=False)),
    ('scaler', StandardScaler()),
    ('lin_reg',  SGDRegressor(alpha=1e-06, average=1, epsilon=6, eta0=0.0001,
                              learning_rate='adaptive', max_iter=2000,
                              n_iter_no_change=10, penalty='elasticnet',
                              tol=0.0001))
])
model

Pipeline(steps=[('poly_ft', PolynomialFeatures(degree=3, include_bias=False)),
                ('scaler', StandardScaler()),
                ('lin_reg',
                 SGDRegressor(alpha=1e-06, average=1, epsilon=6, eta0=0.0001,
                              learning_rate='adaptive', max_iter=2000,
                              n_iter_no_change=10, penalty='elasticnet',
                              tol=0.0001))])

In [51]:
# 20min
model.fit(X_train, y_train)
eval.show_result(y_train, model.predict(X_train))
eval.show_result_cv(y_train, X_train, model)

R2=0.575,RMSE=-45.097
(CV) R2=0.569,RMSE=-45.328


In [52]:
eval.show_result(y_test, model.predict(X_test))

R2=0.612,RMSE=-33.519


No clear benefit from polynomial features in linear RUL as well. It just performs a little better than the LinearRegression model. It's the exactly same model as in FD001, the same as FD003.

## PolyFeatures + Non-Linear RUL

In [None]:
model = Pipeline([
    ('poly_ft'  ,   PolynomialFeatures()),
    ('scaler'   ,   StandardScaler()),
    ('trf_reg' ,TransformedTargetRegressor(
        check_inverse=False,
        regressor   = SGDRegressor(),
        transformer = FunctionTransformer(np.clip, 
                                          kw_args={'a_min':0,'a_max':96})))
])

In [None]:
# ~6min
GRID_SEARCH = False
if (GRID_SEARCH):
    param_distributions = {
        "poly_ft__degree": [1,2,3],
        "poly_ft__interaction_only": [False, True],
        "poly_ft__include_bias": [True, False],
        "trf_reg__transformer__kw_args": search.generate_clip_dicts(70,150,1),
        "trf_reg__regressor__loss": ['squared_error', 'huber', 'epsilon_insensitive'],
        "trf_reg__regressor__penalty": ['l2', 'l1', 'elasticnet'],
        "trf_reg__regressor__epsilon": [1.1,1.35,1.5,2,2.5,3,3.5,4,4.5,5.5,6],
        "trf_reg__regressor__alpha":[1e-2,1e-3,1e-4,1e-5,1e-6,1e-7],
        "trf_reg__regressor__max_iter": [1000,1500,2000,2500,3000,4000,4500],
        "trf_reg__regressor__tol": [1e-3,1e-4,1e-5,1e-6,1e-7],
        "trf_reg__regressor__learning_rate":['invscaling','constant','optimal','adaptive'],
        "trf_reg__regressor__eta0": [1e-1,1e-2,1e-3,1e-4,1e-5,1e-6],
        "trf_reg__regressor__power_t": [1.25,1,0.75,0.5,0.25,0.1],
        "trf_reg__regressor__early_stopping": [True,False],
        "trf_reg__regressor__validation_fraction": [0.1,0.2,0.3,0.4,0.5,0.6],
        "trf_reg__regressor__n_iter_no_change": [1,2,5,8,10,15,20],
        "trf_reg__regressor__average": [False, 1,2,5,10,20]
    }
    model = search.run_HR_GS(model, X_train, y_train, 
                             param_distributions, ignore_warnings=True)
    print(model)

n_iterations: 4
n_required_iterations: 4
n_possible_iterations: 4
min_resources_: 500
max_resources_: 24720
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 49
n_resources: 500
Fitting 5 folds for each of 49 candidates, totalling 245 fits
----------
iter: 1
n_candidates: 17
n_resources: 1500
Fitting 5 folds for each of 17 candidates, totalling 85 fits
----------
iter: 2
n_candidates: 6
n_resources: 4500
Fitting 5 folds for each of 6 candidates, totalling 30 fits
----------
iter: 3
n_candidates: 2
n_resources: 13500
Fitting 5 folds for each of 2 candidates, totalling 10 fits
Best params:  {'trf_reg__transformer__kw_args': {'a_min': 0, 'a_max': 115}, 'trf_reg__regressor__validation_fraction': 0.5, 'trf_reg__regressor__tol': 1e-06, 'trf_reg__regressor__power_t': 0.25, 'trf_reg__regressor__penalty': 'elasticnet', 'trf_reg__regressor__n_iter_no_change': 15, 'trf_reg__regressor__max_iter': 2500, 'trf_reg__regressor__loss': 'squared_error', 'trf_reg__regressor__learnin

Best Model:
```
{'trf_reg__transformer__kw_args': {'a_min': 0, 'a_max': 115}, 
PolynomialFeatures(include_bias=False, interaction_only=True)),
SGDRegressor(alpha=1e-05,
            average=20,
            early_stopping=True,
            epsilon=1.5,
            max_iter=2500,
            n_iter_no_change=15,
            penalty='elasticnet',
            tol=1e-06,
            validation_fraction=0.5),
```


In [None]:
model = Pipeline([
    ('poly_ft',PolynomialFeatures(include_bias=False, interaction_only=True)),
    ('scaler', StandardScaler()),
    ('trf_reg',
        TransformedTargetRegressor(
            check_inverse=False,
            regressor=SGDRegressor(alpha=1e-05,
                                    average=20,
                                    early_stopping=True,
                                    epsilon=1.5,
                                    max_iter=2500,
                                    n_iter_no_change=15,
                                    penalty='elasticnet',
                                    tol=1e-06,
                                    validation_fraction=0.5),
            transformer=FunctionTransformer(np.clip,
                                            kw_args={'a_max': 115,
                                                     'a_min': 0})))
])
model

Pipeline(steps=[('poly_ft',
                 PolynomialFeatures(include_bias=False, interaction_only=True)),
                ('scaler', StandardScaler()),
                ('trf_reg',
                 TransformedTargetRegressor(check_inverse=False,
                                            regressor=SGDRegressor(alpha=1e-05,
                                                                   average=20,
                                                                   early_stopping=True,
                                                                   epsilon=1.5,
                                                                   max_iter=2500,
                                                                   n_iter_no_change=15,
                                                                   penalty='elasticnet',
                                                                   tol=1e-06,
                                                                   validation_fraction=0

In [None]:
model.fit(X_train, y_train)
reclipped_y =  model['trf_reg'].transformer.transform(y_train)
eval.show_result(reclipped_y, model.predict(X_train))
eval.show_result_cv(reclipped_y, X_train, model)

R2=0.779,RMSE=-17.284
(CV) R2=0.763,RMSE=-17.677


In [None]:
reclipped_y =  model['trf_reg'].transformer.transform(y_test)
eval.show_result(reclipped_y, model.predict(X_test))

R2=0.733,RMSE=-19.049


With 2-degree terms, we increased the test performance. The training is similar to the previous cases, a little smaller, probably due to the random search.

# SVR

We'll use only the linear kernel, because the algorithm is quadratic on the number of samples. Otherwise, the training times get worser.

## Linear RUL

In [None]:
model = Pipeline([
    ('scaler'   ,   MinMaxScaler()),
    ('svm_reg'  ,   LinearSVR(random_state=42))
])

In [None]:
# We need specific shape 1D arrays for this model
y_train = np.array(y_train).ravel()

In [None]:
GRID_SEARCH = True
if (GRID_SEARCH):
    param_distributions = {
        "scaler": [StandardScaler(), MinMaxScaler()],
        "svm_reg__loss": ["epsilon_insensitive","squared_epsilon_insensitive"],
        "svm_reg__fit_intercept": [True, False],
        "svm_reg__max_iter": [1000,3000,5000],
        "svm_reg__epsilon": [1,1e-1,1e-2,1e-3],
        "svm_reg__tol": [1e-3,1e-4,1e-5,1e-6,1e-7,1e-8],
        "svm_reg__C":[1,2,5,10]
    }
    model = search.run_HR_GS(model, X_train, y_train, param_distributions, 
                            scorer='r2', ignore_warnings=True)
    print(model)

n_iterations: 4
n_required_iterations: 4
n_possible_iterations: 4
min_resources_: 500
max_resources_: 24720
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 49
n_resources: 500
Fitting 5 folds for each of 49 candidates, totalling 245 fits
----------
iter: 1
n_candidates: 17
n_resources: 1500
Fitting 5 folds for each of 17 candidates, totalling 85 fits
----------
iter: 2
n_candidates: 6
n_resources: 4500
Fitting 5 folds for each of 6 candidates, totalling 30 fits
----------
iter: 3
n_candidates: 2
n_resources: 13500
Fitting 5 folds for each of 2 candidates, totalling 10 fits
Best params:  {'svm_reg__tol': 0.001, 'svm_reg__max_iter': 5000, 'svm_reg__loss': 'squared_epsilon_insensitive', 'svm_reg__fit_intercept': True, 'svm_reg__epsilon': 0.001, 'svm_reg__C': 1, 'scaler': MinMaxScaler()}
Pipeline(steps=[('scaler', MinMaxScaler()),
                ('svm_reg',
                 LinearSVR(C=1, epsilon=0.001,
                           loss='squared_epsilon_insensitive'

Best Model:
```
('scaler', MinMaxScaler()),
LinearSVR(C=1, epsilon=0.001,
        loss='squared_epsilon_insensitive', max_iter=5000,
        random_state=42, tol=0.001))
```

In [None]:
model = Pipeline([
    ('scaler'   ,   MinMaxScaler()),
    ('svm_reg'  ,   LinearSVR(C=1, epsilon=0.001,
        loss='squared_epsilon_insensitive', max_iter=5000,
        random_state=42, tol=0.001))
])
model

Pipeline(steps=[('scaler', MinMaxScaler()),
                ('svm_reg',
                 LinearSVR(C=1, epsilon=0.001,
                           loss='squared_epsilon_insensitive', max_iter=5000,
                           random_state=42, tol=0.001))])

In [None]:
model.fit(X_train, y_train)
eval.show_result(y_train, model.predict(X_train))
eval.show_result_cv(y_train, X_train, model)

R2=0.587,RMSE=-63.495
(CV) R2=0.543,RMSE=-65.172


In [None]:
eval.show_result(y_test, model.predict(X_test))

R2=-0.899,RMSE=-57.039


Poor results without non-lineariy of RUL. Comparable to regular LinearRegressor.

## Non-linear RUL

In [None]:
model = Pipeline([
    ('scaler'   ,   MinMaxScaler()),
    ('trf_reg' ,TransformedTargetRegressor(
        check_inverse=False,
        regressor   = LinearSVR(random_state=42),
        transformer = FunctionTransformer(np.clip, 
                                          kw_args={'a_min':0,'a_max':50})))
    ])

In [None]:
# ~4min
GRID_SEARCH = False
if (GRID_SEARCH):
    param_distributions = {
        "scaler": [StandardScaler(), MinMaxScaler()],
        "trf_reg__transformer__kw_args": search.generate_clip_dicts(80,150,1),
        "trf_reg__regressor__loss": ["epsilon_insensitive","squared_epsilon_insensitive"],
        "trf_reg__regressor__fit_intercept": [True, False],
        "trf_reg__regressor__max_iter": [1000,3000,5000],
        "trf_reg__regressor__epsilon": [1,1e-1,1e-2,1e-3],
        "trf_reg__regressor__tol": [1e-3,1e-4,1e-5,1e-6,1e-7,1e-8],
        "trf_reg__regressor__C":[1,2,5,10]
    }
    model = search.run_HR_GS(model, X_train, y_train, 
                            param_distributions, ignore_warnings=True)
    print(model)

n_iterations: 4
n_required_iterations: 4
n_possible_iterations: 4
min_resources_: 500
max_resources_: 24720
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 49
n_resources: 500
Fitting 5 folds for each of 49 candidates, totalling 245 fits
----------
iter: 1
n_candidates: 17
n_resources: 1500
Fitting 5 folds for each of 17 candidates, totalling 85 fits
----------
iter: 2
n_candidates: 6
n_resources: 4500
Fitting 5 folds for each of 6 candidates, totalling 30 fits
----------
iter: 3
n_candidates: 2
n_resources: 13500
Fitting 5 folds for each of 2 candidates, totalling 10 fits
Best params:  {'trf_reg__transformer__kw_args': {'a_min': 0, 'a_max': 111}, 'trf_reg__regressor__tol': 1e-05, 'trf_reg__regressor__max_iter': 5000, 'trf_reg__regressor__loss': 'squared_epsilon_insensitive', 'trf_reg__regressor__fit_intercept': True, 'trf_reg__regressor__epsilon': 0.001, 'trf_reg__regressor__C': 2, 'scaler': MinMaxScaler()}
Pipeline(steps=[('scaler', MinMaxScaler()),
         

Best Model:
```
kw_args={'a_max': 111,'a_min': 0}))
('scaler', MinMaxScaler())
LinearSVR(C=2,
            epsilon=0.001,
            loss='squared_epsilon_insensitive',
            max_iter=5000,
            random_state=42,
            tol=1e-05),
```

In [None]:
model = Pipeline([
    ('scaler'   ,   MinMaxScaler()),
    ('trf_reg' ,TransformedTargetRegressor(
        check_inverse=False,
        regressor=LinearSVR(C=2,
                            epsilon=0.001,
                            loss='squared_epsilon_insensitive',
                            max_iter=5000,
                            random_state=42,
                            tol=1e-05),
        transformer = FunctionTransformer(np.clip, 
                                          kw_args={'a_min':0,'a_max':111})))
    ])
model

Pipeline(steps=[('scaler', MinMaxScaler()),
                ('trf_reg',
                 TransformedTargetRegressor(check_inverse=False,
                                            regressor=LinearSVR(C=2,
                                                                epsilon=0.001,
                                                                loss='squared_epsilon_insensitive',
                                                                max_iter=5000,
                                                                random_state=42,
                                                                tol=1e-05),
                                            transformer=FunctionTransformer(func=<function clip at 0x7f4790d58c20>,
                                                                            kw_args={'a_max': 111,
                                                                                     'a_min': 0})))])

In [None]:
model.fit(X_train, y_train)
reclipped_y =  model['trf_reg'].transformer.transform(y_train)
eval.show_result(reclipped_y, model.predict(X_train))
eval.show_result_cv(reclipped_y, X_train, model)

R2=0.779,RMSE=-16.518
(CV) R2=0.765,RMSE=-16.854


In [None]:
reclipped_y =  model['trf_reg'].transformer.transform(y_test)
eval.show_result(reclipped_y, model.predict(X_test))

R2=0.744,RMSE=-18.062


Good linear results

## PolyFeatures + Linear RUL

In [None]:
model = Pipeline([
    ('poly_ft'  ,   PolynomialFeatures()),
    ('selection', SelectFromModel(DecisionTreeRegressor())),
    ('scaler'   ,   MinMaxScaler()),
    ('svm_reg'  ,   LinearSVR(random_state=42))
])

In [None]:
# ~20min
GRID_SEARCH = False
if (GRID_SEARCH):
    param_distributions = {
        "scaler": [StandardScaler(), MinMaxScaler()],
        "poly_ft__degree": [1,2,3],
        "poly_ft__interaction_only": [False, True],
        "poly_ft__include_bias": [True, False],
        "svm_reg__loss": ["epsilon_insensitive","squared_epsilon_insensitive"],
        "svm_reg__fit_intercept": [True, False],
        "svm_reg__max_iter": [1000,3000,5000],
        "svm_reg__epsilon": [1,1e-1,1e-2,1e-3],
        "svm_reg__tol": [1e-3,1e-4,1e-5,1e-6,1e-7,1e-8],
        "svm_reg__C":[1,2,5,10]
    }
    model = search.run_HR_GS(model, X_train, y_train, param_distributions,
                            ignore_warnings=True, scorer='r2')
    print(model)

n_iterations: 4
n_required_iterations: 4
n_possible_iterations: 4
min_resources_: 500
max_resources_: 24720
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 49
n_resources: 500
Fitting 5 folds for each of 49 candidates, totalling 245 fits
----------
iter: 1
n_candidates: 17
n_resources: 1500
Fitting 5 folds for each of 17 candidates, totalling 85 fits
----------
iter: 2
n_candidates: 6
n_resources: 4500
Fitting 5 folds for each of 6 candidates, totalling 30 fits
----------
iter: 3
n_candidates: 2
n_resources: 13500
Fitting 5 folds for each of 2 candidates, totalling 10 fits
Best params:  {'svm_reg__tol': 0.001, 'svm_reg__max_iter': 5000, 'svm_reg__loss': 'squared_epsilon_insensitive', 'svm_reg__fit_intercept': True, 'svm_reg__epsilon': 0.01, 'svm_reg__C': 10, 'scaler': StandardScaler(), 'poly_ft__interaction_only': True, 'poly_ft__include_bias': False, 'poly_ft__degree': 3}
Pipeline(steps=[('poly_ft',
                 PolynomialFeatures(degree=3, include_bias=Fa

Best Model:
```
 PolynomialFeatures(degree=3, include_bias=False,
                                    interaction_only=True)),
('selection',
                 SelectFromModel(estimator=DecisionTreeRegressor())),
('scaler', StandardScaler()),
LinearSVR(C=10, epsilon=0.01,
            loss='squared_epsilon_insensitive', max_iter=5000,
            random_state=42, tol=0.001))
```


In [None]:
model = Pipeline([
    ('poly_ft', PolynomialFeatures(degree=3, include_bias=False,
                                    interaction_only=True)),
    ('selection', SelectFromModel(DecisionTreeRegressor())),
    ('scaler', StandardScaler()),
    ('svm_reg', LinearSVR(C=10, epsilon=0.01,
            loss='squared_epsilon_insensitive', max_iter=5000,
            random_state=42, tol=0.001))
])
model

Pipeline(steps=[('poly_ft',
                 PolynomialFeatures(degree=3, include_bias=False,
                                    interaction_only=True)),
                ('selection',
                 SelectFromModel(estimator=DecisionTreeRegressor())),
                ('scaler', StandardScaler()),
                ('svm_reg',
                 LinearSVR(C=10, epsilon=0.01,
                           loss='squared_epsilon_insensitive', max_iter=5000,
                           random_state=42, tol=0.001))])

In [None]:
# ~20min
model.fit(X_train, y_train)
eval.show_result(y_train, model.predict(X_train))
eval.show_result_cv(y_train, X_train, model)



R2=0.630,RMSE=-60.135




(CV) R2=0.571,RMSE=-62.946




In [None]:
eval.show_result(y_test, model.predict(X_test))

R2=-0.685,RMSE=-53.734


Minor benefit from polynomial features in linear RUL.  However, most of the time, it didn't converge, even with 5k iterations.

## PolyFeatures + Non-Linear RUL

In [None]:
model = Pipeline([
    ('poly_ft'  ,   PolynomialFeatures()),
    ('selection', SelectFromModel(DecisionTreeRegressor())),
    ('scaler'   ,   StandardScaler()),
    ('trf_reg' ,TransformedTargetRegressor(
        check_inverse=False,
        regressor   = LinearSVR(random_state=42),
        transformer = FunctionTransformer(np.clip, 
                                          kw_args={'a_min':0,'a_max':96})))
])

In [None]:
# ~15min
GRID_SEARCH = False
if (GRID_SEARCH):
    param_distributions = {
        "scaler": [StandardScaler(), MinMaxScaler()],
        "trf_reg__transformer__kw_args": search.generate_clip_dicts(80,150,1),
        "trf_reg__regressor__loss": ["epsilon_insensitive","squared_epsilon_insensitive"],
        "trf_reg__regressor__fit_intercept": [True, False],
        "trf_reg__regressor__max_iter": [1000,3000,5000],
        "trf_reg__regressor__epsilon": [1,1e-1,1e-2,1e-3],
        "trf_reg__regressor__tol": [1e-3,1e-4,1e-5,1e-6,1e-7,1e-8],
        "trf_reg__regressor__C":[1,2,5,10],
        "poly_ft__degree": [1,2,3],
        "poly_ft__interaction_only": [False, True],
        "poly_ft__include_bias": [True, False],
    }
    model = search.run_HR_GS(model, X_train, y_train, 
                             param_distributions, ignore_warnings=True)
    print(model)

n_iterations: 4
n_required_iterations: 4
n_possible_iterations: 4
min_resources_: 500
max_resources_: 24720
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 49
n_resources: 500
Fitting 5 folds for each of 49 candidates, totalling 245 fits
----------
iter: 1
n_candidates: 17
n_resources: 1500
Fitting 5 folds for each of 17 candidates, totalling 85 fits
----------
iter: 2
n_candidates: 6
n_resources: 4500
Fitting 5 folds for each of 6 candidates, totalling 30 fits
----------
iter: 3
n_candidates: 2
n_resources: 13500
Fitting 5 folds for each of 2 candidates, totalling 10 fits
Best params:  {'trf_reg__transformer__kw_args': {'a_min': 0, 'a_max': 93}, 'trf_reg__regressor__tol': 0.001, 'trf_reg__regressor__max_iter': 3000, 'trf_reg__regressor__loss': 'epsilon_insensitive', 'trf_reg__regressor__fit_intercept': True, 'trf_reg__regressor__epsilon': 1, 'trf_reg__regressor__C': 10, 'scaler': StandardScaler(), 'poly_ft__interaction_only': False, 'poly_ft__include_bias': Tr

Best Model:
```
{'trf_reg__transformer__kw_args': {'a_min': 0, 'a_max': 93}, 
PolynomialFeatures(degree=3)
('scaler', StandardScaler()),
LinearSVR(C=10, epsilon=1,
        max_iter=3000,
        random_state=42,
        tol=0.001),
```


In [None]:
model = Pipeline([
    ('poly_ft',PolynomialFeatures(degree=3)),
    ('selection',
                 SelectFromModel(estimator=DecisionTreeRegressor())),
    ('scaler', StandardScaler()),
    ('trf_reg',
        TransformedTargetRegressor(
            check_inverse=False,
            regressor=LinearSVR(C=10, epsilon=1,
                                max_iter=3000,
                                random_state=42,
                                tol=0.001),
            transformer=FunctionTransformer(np.clip,
                                            kw_args={'a_max': 93,
                                                     'a_min': 0})))
])
model

Pipeline(steps=[('poly_ft', PolynomialFeatures(degree=3)),
                ('selection',
                 SelectFromModel(estimator=DecisionTreeRegressor())),
                ('scaler', StandardScaler()),
                ('trf_reg',
                 TransformedTargetRegressor(check_inverse=False,
                                            regressor=LinearSVR(C=10, epsilon=1,
                                                                max_iter=3000,
                                                                random_state=42,
                                                                tol=0.001),
                                            transformer=FunctionTransformer(func=<function clip at 0x7f4790d58c20>,
                                                                            kw_args={'a_max': 93,
                                                                                     'a_min': 0})))])

In [None]:
# ~15min
model.fit(X_train, y_train)
reclipped_y =  model['trf_reg'].transformer.transform(y_train)
eval.show_result(reclipped_y, model.predict(X_train))
eval.show_result_cv(reclipped_y, X_train, model)



R2=0.823,RMSE=-11.806
(CV) R2=0.810,RMSE=-12.131


In [None]:
reclipped_y =  model['trf_reg'].transformer.transform(y_test)
eval.show_result(reclipped_y, model.predict(X_test))

R2=0.764,RMSE=-14.513


There was a case of failing to converge. But we achieved the best results in testing using linear kernel models. It is also the same model from FD001. However, the test performance was a little lower, mostly because it is a little harder.