In [99]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn import svm
# from xgboost import XGBRegressor
# from lightgbm import LGBMRegressor

from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.feature_selection import RFECV, RFE
# from sklearn.linear_model import Ridge, Lasso
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from scipy.stats import randint

from scipy.stats import expon, reciprocal
import warnings
warnings.filterwarnings('ignore')

# Model Tuning: Preprocess to Hyperparameter Tuning
Linear Regression & SVM Regressor

<br>

## Linear Regression

<br>

### Result
- Best Cross Validation (StratifiedKFold, n_splits=5) MSE
    - 0.420
- Test MSE
    - 0.471

<br>

### Setting
- Local Outlier Factor
    - `n_neighbors` = 23
    - applied to stratified train set (StandardScaler)  
- Pipeline
```python
{'dimension_reduce': PCA(copy=True, iterated_power='auto', n_components=32, random_state=2021, svd_solver='auto', tol=0.0, whiten=False),
 'feature_selection': RFE(estimator=LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False), n_features_to_select=65, step=1, verbose=0),
 'poly': PolynomialFeatures(degree=2, include_bias=False, interaction_only=True,
                   order='C'), 'poly__degree': 2, 'poly__include_bias': False,
 'regressor': LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False),
 'scale': MinMaxScaler(copy=True, feature_range=(0, 1))}
```

<br>

### Model Performance in Detail
- accuracy (반올림한 예측값과 실제 타깃이 같은 비율)
    - 0.603
- mse_by_target = 각 실제 타깃별로 계산한 MSE

target|accuracy|mse_by_target
-|-|-
3|0.000000|1.737492
4|0.000000|2.368812
5|0.758929|0.317865
6|0.611650|0.286689
7|0.312500|0.770513
8|0.000000|2.146317

<br>

## SVM Regressor

<br>

### Result
- Best Cross Validation (StratifiedKFold, n_splits=5) MSE
    - 0.402
- Test MSE
    - 0.495

<br>

### Setting
- Local Outlier Factor
    - `n_neighbors` = 22
    - applied to NON-stratified train set (StandardScaler)  
- Pipeline

```python
{'scale': StandardScaler(copy=True, with_mean=True, with_std=True), 
 'regressor': SVR(C=100, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.001, kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)}
```

<br>

### Model Performance in Detail
- accuracy (반올림한 예측값과 실제 타깃이 같은 비율)
    - 0.59
- mse_by_target = 각 실제 타깃별로 계산한 MSE


target|accuracy|mse_by_target
-|-|-
3|0.000000|4.496266
4|0.000000|1.753694
5|0.763158|0.193075
6|0.621359|0.269660
7|0.148148|0.934924
8|0.000000|2.211116


### Load Data

In [100]:
# load data
red = pd.read_csv('./data/winequality-red.csv', sep=';')
red = red.drop_duplicates(keep='last', ignore_index=True)
red.head(3)

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
1,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
2,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6


In [101]:
RANDOM_SEED = 2021

# Tuning LinearRegression

In [102]:
# base pipeline
pipe_base = Pipeline([
                ('scale', MinMaxScaler()),
                ('regressor', LinearRegression())
                ])

# base parameter
param_base = [              
              {'regressor': [LinearRegression()],
               'scale':[MinMaxScaler(), StandardScaler(), RobustScaler()],
              }
             ]

In [103]:
def random_grid_search_with_pipeline(Xtr, Xte, ytr, yte, pipe, params, iter_num, seed,
                                     is_not_mute=1, stratify=True):
    if stratify:
        r_grid = RandomizedSearchCV(pipe, params,
                             scoring = 'neg_mean_squared_error',
                             cv=StratifiedKFold(n_splits=5,
                                                  shuffle=True,
                                                  random_state=seed),
                             verbose=is_not_mute, n_jobs=-1, n_iter=iter_num, random_state=seed)
    else:
        r_grid = RandomizedSearchCV(pipe, params,
                             scoring = 'neg_mean_squared_error',
                             cv=KFold(n_splits=5,
                                      shuffle=True,
                                      random_state=seed),
                             verbose=is_not_mute, n_jobs=-1, n_iter=iter_num, random_state=seed)
    
    r_grid.fit(Xtr, ytr)
    if is_not_mute:
        print(r_grid.best_params_)
        print('Best CV MSE', -1 * r_grid.best_score_)
        print('Test MSE', np.mean(np.square(r_grid.predict(Xte) - yte)))
    return r_grid

In [104]:
# LOF tuning
def LOF_tuning(df, param_dict, pipe, pipe_param_dict, stratify = True, seed=2021):
    
    def split_data(df, seed, stratify=True):
        X = df.drop('quality',axis=1)
        y = df['quality']
        if stratify:
            return train_test_split(X, y,
                                    test_size=0.2,
                                    stratify=y,
                                    shuffle=True,
                                    random_state=seed)
        else:
            return train_test_split(X, y,
                                    test_size=0.2,
                                    shuffle=True,
                                    random_state=seed)
    
    Xtrain, Xtest, ytrain, ytest = split_data(df, seed=seed, stratify=stratify)
    
    # normalize data
    X_scaler = param_dict['scaler']
    X_train_std = X_scaler.fit_transform(Xtrain)
    X_test_std = X_scaler.transform(Xtest)
    
    best_cv_score = 100
    best_n_neighbors = 0
    best_grid = ''
    best_dataset = []
    
    # outlier removal using Local Outlier Factor
    n_neighbors_list = param_dict['n_neighbors']
    for i, nn in enumerate(n_neighbors_list):
        lof = LocalOutlierFactor(n_neighbors=nn)
        y_pred_std = lof.fit_predict(X_train_std)
        outlier_idx = np.where(y_pred_std==-1)[0]
        
        X_train_lof, X_test_lof, y_train_lof, y_test_lof = split_data(df.drop(outlier_idx),
                                                                     seed=seed,
                                                                     stratify=stratify)        
        rand_grid = random_grid_search_with_pipeline(X_train_lof,
                                                     X_test_lof,
                                                     y_train_lof,
                                                     y_test_lof,
                                                     pipe,
                                                     pipe_param_dict,
                                                     iter_num=1000,
                                                     seed=seed,
                                                     is_not_mute=0,
                                                     stratify=stratify)
    
        if (-1 * rand_grid.best_score_) < best_cv_score:
            best_cv_score = -1 * rand_grid.best_score_
            best_grid = rand_grid
            best_n_neighbors = nn
            best_dataset = X_train_lof, X_test_lof, y_train_lof, y_test_lof
            print(f'update: {best_cv_score}')
        
        if i % 10 == 0:
            print('/', end=' ')
        
    return best_dataset, best_n_neighbors

### LOF Tuning

In [82]:
lof_param_mms = {'scaler':MinMaxScaler(),
            'n_neighbors':range(10, 40)}
best_lof_data_mms, best_neighbor_param_mms = LOF_tuning(red, lof_param_mms,
                                                        pipe_base, param_base,
                                                        seed=RANDOM_SEED)

update: 0.44235386764106455
/ update: 0.4397925084980251
update: 0.43973623431059394
/ update: 0.4395969227147248
update: 0.43823114966631493
/ update: 0.43736501161903985
update: 0.4372322596052342
update: 0.43531433888396764


In [105]:
lof_param_ss = {'scaler':StandardScaler(),
            'n_neighbors':range(10, 40)}
best_lof_data_ss, best_neighbor_param_ss = LOF_tuning(red, lof_param_ss,
                                                      pipe_base, param_base,
                                                      seed=RANDOM_SEED)

update: 0.4537990348545894
/ update: 0.44976770994854903
update: 0.44823105692754084
update: 0.44534316785385136
update: 0.44316195848503276
update: 0.44019491939965744
/ update: 0.43230276932920597
/ 

In [84]:
lof_param_rs = {'scaler':RobustScaler(),
            'n_neighbors':range(10, 40)}
best_lof_data_rs, best_neighbor_param_rs = LOF_tuning(red, lof_param_rs,
                                                      pipe_base, param_base,
                                                      seed=RANDOM_SEED)

update: 0.44349581805409233
/ update: 0.4432501610854895
/ update: 0.43966768788862376
/ update: 0.43667782347747497
update: 0.43632664159487444


In [106]:
# best n_neighbors = 23
print(best_neighbor_param_ss)
X_train_lof, X_test_lof, y_train_lof, y_test_lof = best_lof_data_ss

23


### Model Tuning

In [107]:
pipe_no_prep = Pipeline([
                ('scale', MinMaxScaler()),
                ('regressor', LinearRegression())
                ])

param_no_prep = [              
              {'regressor': [LinearRegression()],
               'scale':[MinMaxScaler(), StandardScaler(), RobustScaler()]
              }
             ]

r_grd_no_prep = random_grid_search_with_pipeline(X_train_lof, X_test_lof, y_train_lof, y_test_lof,
                                pipe_no_prep, param_no_prep, iter_num=1000, seed=RANDOM_SEED)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
{'scale': MinMaxScaler(copy=True, feature_range=(0, 1)), 'regressor': LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)}
Best CV MSE 0.43230276932920597
Test MSE 0.4777033284130501


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:    0.2s finished


In [108]:
pipe_rfe = Pipeline([
                ('scale', MinMaxScaler()),
                ('poly', PolynomialFeatures()),
                ('feature_selection', RFE(LinearRegression())),
                ('dimension_reduce', PCA()),
                ('regressor', LinearRegression())
                ])

param_grid_rfe = [              
              {'regressor': [LinearRegression()],
               'scale':[MinMaxScaler(), StandardScaler(), RobustScaler()],
               'poly':[PolynomialFeatures()],
               'poly__degree':[2],
               'poly__interaction_only':[True, False],
               'poly__include_bias':[True, False],
              'feature_selection' : [RFE(LinearRegression())],
              'feature_selection__n_features_to_select' : randint(low=10, high=70),
              'dimension_reduce' : [PCA(random_state=RANDOM_SEED)],
              'dimension_reduce__n_components' : randint(low=10, high=50)
              }
             ]

r_grd = random_grid_search_with_pipeline(X_train_lof, X_test_lof, y_train_lof, y_test_lof,
                                pipe_rfe, param_grid_rfe, iter_num=1000, seed=RANDOM_SEED)

Fitting 5 folds for each of 1000 candidates, totalling 5000 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  76 tasks      | elapsed:    3.0s
[Parallel(n_jobs=-1)]: Done 376 tasks      | elapsed:   12.8s
[Parallel(n_jobs=-1)]: Done 876 tasks      | elapsed:   25.5s
[Parallel(n_jobs=-1)]: Done 1576 tasks      | elapsed:   43.3s
[Parallel(n_jobs=-1)]: Done 2476 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 3576 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 4876 tasks      | elapsed:  2.3min


{'dimension_reduce': PCA(copy=True, iterated_power='auto', n_components=32, random_state=2021,
    svd_solver='auto', tol=0.0, whiten=False), 'dimension_reduce__n_components': 32, 'feature_selection': RFE(estimator=LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
                               normalize=False),
    n_features_to_select=65, step=1, verbose=0), 'feature_selection__n_features_to_select': 65, 'poly': PolynomialFeatures(degree=2, include_bias=False, interaction_only=True,
                   order='C'), 'poly__degree': 2, 'poly__include_bias': False, 'poly__interaction_only': True, 'regressor': LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False), 'scale': MinMaxScaler(copy=True, feature_range=(0, 1))}
Best CV MSE 0.41978718778660085
Test MSE 0.4709478820711314


[Parallel(n_jobs=-1)]: Done 5000 out of 5000 | elapsed:  2.3min finished


### Result

In [109]:
def get_best_grid_model(grid_param_dict, Xtr, Xte, ytr, yte, y_scaler=None, no_prep=False):
    
    def pipeline_application(pipeline, dataX, datay=None, test=False):
        result = dataX
        if not test:
            for pipe in pipeline:
                try:
                    result = pipe.fit_transform(result)
                except:
                    result = pipe.fit_transform(result, datay)
            return result, pipeline
        else:
            for pipe in pipeline:
                result = pipe.transform(result)
            return result
        
    def make_2d_array(data):
        return np.array(data).reshape(-1,1)

    scaler = grid_param_dict['scale']
    model = grid_param_dict['regressor']
    
    if y_scaler:
        ys = y_scaler
        ytr = ys.fit_transform(make_2d_array(ytr))
    
    if no_prep:
        pipeline = [scaler]
    else:
        poly = grid_param_dict['poly']
        feature_selector = grid_param_dict['feature_selection']
        dim_reduce = grid_param_dict['dimension_reduce']
        pipeline = [scaler, poly, feature_selector, dim_reduce]

    Xtr_preprocess, pipeline = pipeline_application(pipeline, Xtr, datay=ytr)
    Xte_preprocess = pipeline_application(pipeline, Xte, test=True)
    
    model.fit(Xtr_preprocess, ytr)
    if y_scaler:
        pred = model.predict(Xte_preprocess)
        pred = ys.inverse_transform(model.predict(Xte_preprocess)).flatten()
    else:
        pred = model.predict(Xte_preprocess)
    
    return model, pred, np.mean(np.square(pred - yte))

In [110]:
best_lin_reg, pred, test_mse = get_best_grid_model(r_grd.best_params_,
                                               X_train_lof, X_test_lof, y_train_lof, y_test_lof)

In [111]:
test_mse

0.4709478820711314

### Result Analysis

In [114]:
def result_analysis(y_hat, y):
    # 반올림한 예측값을 사용하여 accuracy 계산
    acc = np.sum(np.round(y_hat) == np.array(y)) / len(y)
    
    # 반올림한 예측값과 실제 타깃이 같지 않은 인덱스
    pred_false_index = pd.Series(np.round(y_hat) == np.array(y)) == False
    
    # 모델이 맞추는 데 실패한 타깃의 실제 값
    pred_false_target = pd.Series(y).reset_index(drop=True)[pred_false_index]
    
    # 타깃 값(3,4,...,8) 별 갯수
    false_target_vc = pred_false_target.value_counts()
    real_target_vc = y.value_counts()
    
    # 타깃 값 별로 반올림한 예측값이 동일할 확률을 데이터프레임으로 저장
    df_acc = pd.DataFrame(1-(false_target_vc / real_target_vc)).rename(columns={'quality':'accuracy'})
    
    # 각 타깃 값(3,4,...,8) 별로 각각의 MSE를 계산하여 저장
    mask_target_by_unq = [(np.array(y)==unq_target) for unq_target in np.unique(y)]
    mse_by_target = []
    
    for idx in df_acc.index.tolist():
        mask = mask_target_by_unq[idx-3]
        target_masked = np.array(y)[mask]
        pred_masked = np.array(y_hat)[mask]
        mse = np.mean(np.square(pred_masked - target_masked))
        mse_by_target.append(mse)

    df_acc['mse_by_target'] = mse_by_target
    
    return df_acc.sort_index(), acc

In [115]:
df_acc, acc = result_analysis(pred, y_test_lof)
print(acc)
df_acc

0.6030534351145038


Unnamed: 0,accuracy,mse_by_target
3,0.0,1.737492
4,0.0,2.368812
5,0.758929,0.317865
6,0.61165,0.286689
7,0.3125,0.770513
8,0.0,2.146317


# Tuning SVMRegressor

In [87]:
# base pipeline
pipe_base = Pipeline([
                ('scale', MinMaxScaler()),
                ('regressor', svm.SVR())
                ])

# base parameter
param_base = [              
              {'regressor': [svm.SVR()],
               'scale':[MinMaxScaler(), StandardScaler(), RobustScaler()],
              }
             ]

### Tuning LOF

In [89]:
lof_param_mms = {'scaler':MinMaxScaler(),
            'n_neighbors':range(10, 40)}
best_lof_data_mms, best_neighbor_param_mms = LOF_tuning(red, lof_param_mms,
                                                        pipe_base, param_base,
                                                        seed=RANDOM_SEED,
                                                       stratify=False)

update: 0.4078308482033344
/ / update: 0.4020972780106876
/ 

In [90]:
lof_param_ss = {'scaler':StandardScaler(),
            'n_neighbors':range(10, 40)}
best_lof_data_ss, best_neighbor_param_ss = LOF_tuning(red, lof_param_ss,
                                                      pipe_base, param_base,
                                                      seed=RANDOM_SEED,
                                                     stratify=False)

update: 0.4062723245332293
/ update: 0.40103013734252463
/ update: 0.39465283240868637
/ 

In [91]:
lof_param_rs = {'scaler':RobustScaler(),
            'n_neighbors':range(10, 40)}
best_lof_data_rs, best_neighbor_param_rs = LOF_tuning(red, lof_param_rs,
                                                      pipe_base, param_base,
                                                      seed=RANDOM_SEED,
                                                     stratify=False)

update: 0.4121045755929818
/ update: 0.41001452578243025
/ update: 0.3990683812318899
/ update: 0.39596387373699277


In [92]:
# best n_neighbors = 22
print(best_neighbor_param_ss)
X_train_lof, X_test_lof, y_train_lof, y_test_lof = best_lof_data_ss

22


### Pipeline & Model Tuning

In [93]:
pipe_no_prep = Pipeline([
                ('scale', MinMaxScaler()),
                ('regressor', svm.SVR())
                ])

param_grid_no_prep = [              
              {'regressor': [svm.SVR()],
               'regressor__kernel':['rbf', 'linear','sigmoid'],
               'regressor__C': [0.001, 0.1, 0.1, 10, 100],
               'regressor__gamma':['auto', 'scale', 1, 0.1, 1e-2, 1e-3],
               'scale':[MinMaxScaler(), StandardScaler(), RobustScaler()]
              }
             ]

# non-stratified data
r_grd_no_prep = random_grid_search_with_pipeline(X_train_lof, X_test_lof, y_train_lof, y_test_lof,
                                pipe_no_prep, param_grid_no_prep, iter_num=1000, seed=RANDOM_SEED,
                                stratify=False)

Fitting 5 folds for each of 270 candidates, totalling 1350 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 104 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-1)]: Done 704 tasks      | elapsed:   10.1s
[Parallel(n_jobs=-1)]: Done 1137 tasks      | elapsed:  1.0min


{'scale': StandardScaler(copy=True, with_mean=True, with_std=True), 'regressor__kernel': 'rbf', 'regressor__gamma': 0.001, 'regressor__C': 100, 'regressor': SVR(C=100, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.001,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)}
Best CV MSE 0.40163059908892784
Test MSE 0.4949720689979241


[Parallel(n_jobs=-1)]: Done 1350 out of 1350 | elapsed:  3.1min finished


In [94]:
pipe = Pipeline([
                ('scale', MinMaxScaler()),
                ('poly', PolynomialFeatures()),
                ('feature_selection', RFE(svm.SVR())),
                ('dimension_reduce', PCA()),
                ('regressor', svm.SVR())
                ])

param_grid = [              
              {'regressor': [svm.SVR()],
               'regressor__kernel':['rbf', 'linear','sigmoid'],
               'regressor__C': [0.001, 0.1, 0.1, 10, 100],
               'regressor__gamma':['auto', 'scale', 1, 0.1, 1e-2, 1e-3],
               'scale':[MinMaxScaler(), StandardScaler(), RobustScaler()],
               'poly':[PolynomialFeatures()],
               'poly__degree':[2],
               'poly__interaction_only':[True, False],
               'poly__include_bias':[True, False],
              'feature_selection' : [RFE(svm.SVR())],
              'feature_selection__n_features_to_select' : randint(low=10, high=70),
              'dimension_reduce' : [PCA(random_state=RANDOM_SEED)],
              'dimension_reduce__n_components' : randint(low=10, high=50)
              }
             ]

# non-stratified data
r_grd = random_grid_search_with_pipeline(X_train_lof, X_test_lof, y_train_lof, y_test_lof,
                                pipe, param_grid, iter_num=1000, seed=RANDOM_SEED,
                                stratify=False)

Fitting 5 folds for each of 1000 candidates, totalling 5000 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  76 tasks      | elapsed:    3.1s
[Parallel(n_jobs=-1)]: Done 369 tasks      | elapsed:   21.9s
[Parallel(n_jobs=-1)]: Done 620 tasks      | elapsed:   50.4s
[Parallel(n_jobs=-1)]: Done 971 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 1422 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 1972 tasks      | elapsed:  3.2min
[Parallel(n_jobs=-1)]: Done 2622 tasks      | elapsed:  4.1min
[Parallel(n_jobs=-1)]: Done 3372 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 4222 tasks      | elapsed:  5.0min
[Parallel(n_jobs=-1)]: Done 5000 out of 5000 | elapsed:  5.5min finished


{'dimension_reduce': PCA(copy=True, iterated_power='auto', n_components=19, random_state=2021,
    svd_solver='auto', tol=0.0, whiten=False), 'dimension_reduce__n_components': 19, 'feature_selection': RFE(estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
                  gamma='scale', kernel='rbf', max_iter=-1, shrinking=True,
                  tol=0.001, verbose=False),
    n_features_to_select=68, step=1, verbose=0), 'feature_selection__n_features_to_select': 68, 'poly': PolynomialFeatures(degree=2, include_bias=True, interaction_only=True,
                   order='C'), 'poly__degree': 2, 'poly__include_bias': True, 'poly__interaction_only': True, 'regressor': SVR(C=10, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False), 'regressor__C': 10, 'regressor__gamma': 0.1, 'regressor__kernel': 'rbf', 'scale': MinMaxScaler(copy=True, feature_range=(0, 1))}
Best CV MSE 0.40403867175005387

In [95]:
best_lin_reg, pred, test_mse = get_best_grid_model(r_grd_no_prep.best_params_,
                                               X_train_lof, X_test_lof, y_train_lof, y_test_lof,
                                                  no_prep=True)
print(test_mse)

0.4949720689979241


In [96]:
df_acc, accuracy = result_analysis(pred, y_test_lof)

print(accuracy)
df_acc

0.5909090909090909


Unnamed: 0,accuracy,mse_by_target
3,0.0,4.496266
4,0.1,1.753694
5,0.763158,0.193075
6,0.621359,0.26966
7,0.148148,0.934924
8,0.0,2.211116
