# Day 09. Exercise 00
# Regularization

## 0. Imports

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

## 1. Preprocessing

1. Read the file `dayofweek.csv` that you used in the previous day to a dataframe.
2. Using `train_test_split` with parameters `test_size=0.2`, `random_state=21` get `X_train`, `y_train`, `X_test`, `y_test`. Use the additional parameter `stratify`.

In [2]:
df = pd.read_csv('../data/dayofweek.csv')

In [3]:
X = df.drop('dayofweek', axis=1)
y = df['dayofweek']

In [4]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21, stratify=y)

## 2. Logreg regularization

### a. Default regularization

1. Train a baseline model with the only parameters `random_state=21`, `fit_intercept=False`.
2. Use stratified K-fold cross-validation with `10` splits to evaluate the accuracy of the model


The result of the code where you trained and evaluated the baseline model should be exactly like this (use `%%time` to get the info about how long it took to run the cell):

```
train -  0.62902   |   valid -  0.59259
train -  0.64633   |   valid -  0.62963
train -  0.63479   |   valid -  0.56296
train -  0.65622   |   valid -  0.61481
train -  0.63397   |   valid -  0.57778
train -  0.64056   |   valid -  0.59259
train -  0.64138   |   valid -  0.65926
train -  0.65952   |   valid -  0.56296
train -  0.64333   |   valid -  0.59701
train -  0.63674   |   valid -  0.62687
Average accuracy on crossval is 0.60165
Std is 0.02943
```

In [5]:
%%time
logreg = LogisticRegression(random_state=21, fit_intercept=False)
logreg.fit(x_train, y_train)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=21)
scores = cross_val_score(logreg, x_train, y_train, cv=skf, scoring='accuracy')

for i, score in enumerate(scores):
    print(f"train - {score:.5f}   |   valid - {score:.5f}")
print(f"Average accuracy on crossval is {scores.mean():.5f}")
print(f"STD is {scores.std():.5f}")

train - 0.65926   |   valid - 0.65926
train - 0.62222   |   valid - 0.62222
train - 0.60000   |   valid - 0.60000
train - 0.64444   |   valid - 0.64444
train - 0.60741   |   valid - 0.60741
train - 0.60000   |   valid - 0.60000
train - 0.60000   |   valid - 0.60000
train - 0.54074   |   valid - 0.54074
train - 0.66418   |   valid - 0.66418
train - 0.61194   |   valid - 0.61194
Average accuracy on crossval is 0.61502
STD is 0.03399
CPU times: total: 516 ms
Wall time: 349 ms


### b. Optimizing regularization parameters

1. In the cells below try different values of penalty: `none`, `l1`, `l2` – you can change the values of solver too.

In [6]:
%%time
logreg_none = LogisticRegression(random_state=21, fit_intercept=False, penalty='l1', solver='liblinear')
logreg_none.fit(x_train, y_train)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=21)
scores = cross_val_score(logreg_none, x_train, y_train, cv=skf, scoring='accuracy')

for i, score in enumerate(scores):
    print(f"train - {score:.5f}   |   valid - {score:.5f}")
print(f"Average accuracy on crossval is {scores.mean():.5f}")
print(f"STD is {scores.std():.5f}")

train - 0.64444   |   valid - 0.64444
train - 0.60000   |   valid - 0.60000
train - 0.60000   |   valid - 0.60000
train - 0.62963   |   valid - 0.62963
train - 0.60000   |   valid - 0.60000
train - 0.59259   |   valid - 0.59259
train - 0.59259   |   valid - 0.59259
train - 0.52593   |   valid - 0.52593
train - 0.66418   |   valid - 0.66418
train - 0.57463   |   valid - 0.57463
Average accuracy on crossval is 0.60240
STD is 0.03627
CPU times: total: 188 ms
Wall time: 180 ms


In [7]:
%%time
logreg_none = LogisticRegression(random_state=21, fit_intercept=False, penalty='l2')
logreg_none.fit(x_train, y_train)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=21)
scores = cross_val_score(logreg_none, x_train, y_train, cv=skf, scoring='accuracy')

for i, score in enumerate(scores):
    print(f"train - {score:.5f}   |   valid - {score:.5f}")
print(f"Average accuracy on crossval is {scores.mean():.5f}")
print(f"STD is {scores.std():.5f}")

train - 0.65926   |   valid - 0.65926
train - 0.62222   |   valid - 0.62222
train - 0.60000   |   valid - 0.60000
train - 0.64444   |   valid - 0.64444
train - 0.60741   |   valid - 0.60741
train - 0.60000   |   valid - 0.60000
train - 0.60000   |   valid - 0.60000
train - 0.54074   |   valid - 0.54074
train - 0.66418   |   valid - 0.66418
train - 0.61194   |   valid - 0.61194
Average accuracy on crossval is 0.61502
STD is 0.03399
CPU times: total: 406 ms
Wall time: 304 ms


In [8]:
%%time
logreg_none = LogisticRegression(random_state=21, fit_intercept=False, solver='liblinear')
logreg_none.fit(x_train, y_train)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=21)
scores = cross_val_score(logreg_none, x_train, y_train, cv=skf, scoring='accuracy')

for i, score in enumerate(scores):
    print(f"train - {score:.5f}   |   valid - {score:.5f}")
print(f"Average accuracy on crossval is {scores.mean():.5f}")
print(f"STD is {scores.std():.5f}")

train - 0.62222   |   valid - 0.62222
train - 0.59259   |   valid - 0.59259
train - 0.58519   |   valid - 0.58519
train - 0.60741   |   valid - 0.60741
train - 0.59259   |   valid - 0.59259
train - 0.58519   |   valid - 0.58519
train - 0.60000   |   valid - 0.60000
train - 0.55556   |   valid - 0.55556
train - 0.66418   |   valid - 0.66418
train - 0.58955   |   valid - 0.58955
Average accuracy on crossval is 0.59945
STD is 0.02701
CPU times: total: 93.8 ms
Wall time: 107 ms


## 3. SVM regularization

### a. Default regularization

1. Train a baseline model with the only parameters `probability=True`, `kernel='linear'`, `random_state=21`.
2. Use stratified K-fold cross-validation with `10` splits to evaluate the accuracy of the model.
3. The format of the result of the code where you trained and evaluated the baseline model should be similar to what you have got for the logreg.

In [9]:
svm = SVC(kernel='linear', probability=True, random_state=21)
svm.fit(x_train, y_train)
skf_svm = StratifiedKFold(n_splits=10, shuffle=True, random_state=21)
scores = cross_val_score(svm, x_train, y_train, cv=skf_svm, scoring='accuracy')

for i, score in enumerate(scores):
    print(f"train - {score:.5f}   |   valid - {score:.5f}")
print(f"Average accuracy on crossval is {scores.mean():.5f}")
print(f"STD is {scores.std():.5f}")

train - 0.68148   |   valid - 0.68148
train - 0.64444   |   valid - 0.64444
train - 0.66667   |   valid - 0.66667
train - 0.65926   |   valid - 0.65926
train - 0.63704   |   valid - 0.63704
train - 0.68148   |   valid - 0.68148
train - 0.61481   |   valid - 0.61481
train - 0.57778   |   valid - 0.57778
train - 0.72388   |   valid - 0.72388
train - 0.64179   |   valid - 0.64179
Average accuracy on crossval is 0.65286
STD is 0.03800


### b. Optimizing regularization parameters

1. In the cells below try different values of the parameter `C`.

In [10]:
svm = SVC(kernel='linear', probability=True, random_state=21, C=0.1)
svm.fit(x_train, y_train)
skf_svm = StratifiedKFold(n_splits=10, shuffle=True, random_state=21)
scores = cross_val_score(svm, x_train, y_train, cv=skf_svm, scoring='accuracy')

for i, score in enumerate(scores):
    print(f"train - {score:.5f}   |   valid - {score:.5f}")
print(f"Average accuracy on crossval is {scores.mean():.5f}")
print(f"STD is {scores.std():.5f}")

train - 0.55556   |   valid - 0.55556
train - 0.59259   |   valid - 0.59259
train - 0.54074   |   valid - 0.54074
train - 0.60000   |   valid - 0.60000
train - 0.57037   |   valid - 0.57037
train - 0.53333   |   valid - 0.53333
train - 0.54074   |   valid - 0.54074
train - 0.52593   |   valid - 0.52593
train - 0.58209   |   valid - 0.58209
train - 0.53731   |   valid - 0.53731
Average accuracy on crossval is 0.55787
STD is 0.02522


In [11]:
svm = SVC(kernel='linear', probability=True, random_state=21, C=3)
svm.fit(x_train, y_train)
skf_svm = StratifiedKFold(n_splits=10, shuffle=True, random_state=21)
scores = cross_val_score(svm, x_train, y_train, cv=skf_svm, scoring='accuracy')

for i, score in enumerate(scores):
    print(f"train - {score:.5f}   |   valid - {score:.5f}")
print(f"Average accuracy on crossval is {scores.mean():.5f}")
print(f"STD is {scores.std():.5f}")

train - 0.68889   |   valid - 0.68889
train - 0.69630   |   valid - 0.69630
train - 0.67407   |   valid - 0.67407
train - 0.73333   |   valid - 0.73333
train - 0.64444   |   valid - 0.64444
train - 0.69630   |   valid - 0.69630
train - 0.65926   |   valid - 0.65926
train - 0.58519   |   valid - 0.58519
train - 0.76119   |   valid - 0.76119
train - 0.67910   |   valid - 0.67910
Average accuracy on crossval is 0.68181
STD is 0.04554


In [12]:
svm = SVC(kernel='linear', probability=True, random_state=21, C=8)
svm.fit(x_train, y_train)
skf_svm = StratifiedKFold(n_splits=10, shuffle=True, random_state=21)
scores = cross_val_score(svm, x_train, y_train, cv=skf_svm, scoring='accuracy')

for i, score in enumerate(scores):
    print(f"train - {score:.5f}   |   valid - {score:.5f}")
print(f"Average accuracy on crossval is {scores.mean():.5f}")
print(f"STD is {scores.std():.5f}")

train - 0.70370   |   valid - 0.70370
train - 0.71111   |   valid - 0.71111
train - 0.72593   |   valid - 0.72593
train - 0.73333   |   valid - 0.73333
train - 0.68148   |   valid - 0.68148
train - 0.74815   |   valid - 0.74815
train - 0.68148   |   valid - 0.68148
train - 0.63704   |   valid - 0.63704
train - 0.78358   |   valid - 0.78358
train - 0.70149   |   valid - 0.70149
Average accuracy on crossval is 0.71073
STD is 0.03837


## 4. Tree

### a. Default regularization

1. Train a baseline model with the only parameter `max_depth=10` and `random_state=21`.
2. Use stratified K-fold cross-validation with `10` splits to evaluate the accuracy of the model.
3. The format of the result of the code where you trained and evaluated the baseline model should be similar to what you have got for the logreg.

In [13]:
DTC = DecisionTreeClassifier(random_state=21, max_depth=10)
DTC.fit(x_train, y_train)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=21)
scores = cross_val_score(DTC, x_train, y_train, cv=skf, scoring='accuracy')

for i, score in enumerate(scores):
    print(f"train - {score:.5f}   |   valid - {score:.5f}")
print(f"Average accuracy on crossval is {scores.mean():.5f}")
print(f"STD is {scores.std():.5f}")

train - 0.77037   |   valid - 0.77037
train - 0.70370   |   valid - 0.70370
train - 0.72593   |   valid - 0.72593
train - 0.74815   |   valid - 0.74815
train - 0.68889   |   valid - 0.68889
train - 0.74074   |   valid - 0.74074
train - 0.60741   |   valid - 0.60741
train - 0.71111   |   valid - 0.71111
train - 0.79104   |   valid - 0.79104
train - 0.70896   |   valid - 0.70896
Average accuracy on crossval is 0.71963
STD is 0.04791


### b. Optimizing regularization parameters

1. In the cells below try different values of the parameter `max_depth`.
2. As a bonus, play with other regularization parameters trying to find the best combination.

In [14]:
DTC = DecisionTreeClassifier(random_state=21, max_depth=5)
DTC.fit(x_train, y_train)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=21)
scores = cross_val_score(DTC, x_train, y_train, cv=skf, scoring='accuracy')

for i, score in enumerate(scores):
    print(f"train - {score:.5f}   |   valid - {score:.5f}")
print(f"Average accuracy on crossval is {scores.mean():.5f}")
print(f"STD is {scores.std():.5f}")

train - 0.60741   |   valid - 0.60741
train - 0.52593   |   valid - 0.52593
train - 0.60000   |   valid - 0.60000
train - 0.58519   |   valid - 0.58519
train - 0.51111   |   valid - 0.51111
train - 0.53333   |   valid - 0.53333
train - 0.51852   |   valid - 0.51852
train - 0.51111   |   valid - 0.51111
train - 0.56716   |   valid - 0.56716
train - 0.48507   |   valid - 0.48507
Average accuracy on crossval is 0.54448
STD is 0.04014


In [15]:
DTC = DecisionTreeClassifier(random_state=21, max_depth=20)
DTC.fit(x_train, y_train)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=21)
scores = cross_val_score(DTC, x_train, y_train, cv=skf, scoring='accuracy')

for i, score in enumerate(scores):
    print(f"train - {score:.5f}   |   valid - {score:.5f}")
print(f"Average accuracy on crossval is {scores.mean():.5f}")
print(f"STD is {scores.std():.5f}")

train - 0.88889   |   valid - 0.88889
train - 0.86667   |   valid - 0.86667
train - 0.91111   |   valid - 0.91111
train - 0.92593   |   valid - 0.92593
train - 0.87407   |   valid - 0.87407
train - 0.89630   |   valid - 0.89630
train - 0.86667   |   valid - 0.86667
train - 0.88148   |   valid - 0.88148
train - 0.89552   |   valid - 0.89552
train - 0.87313   |   valid - 0.87313
Average accuracy on crossval is 0.88798
STD is 0.01862


In [16]:
DTC = DecisionTreeClassifier(random_state=21, max_depth=40)
DTC.fit(x_train, y_train)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=21)
scores = cross_val_score(DTC, x_train, y_train, cv=skf, scoring='accuracy')

for i, score in enumerate(scores):
    print(f"train - {score:.5f}   |   valid - {score:.5f}")
print(f"Average accuracy on crossval is {scores.mean():.5f}")
print(f"STD is {scores.std():.5f}")

train - 0.90370   |   valid - 0.90370
train - 0.85926   |   valid - 0.85926
train - 0.90370   |   valid - 0.90370
train - 0.92593   |   valid - 0.92593
train - 0.88148   |   valid - 0.88148
train - 0.88889   |   valid - 0.88889
train - 0.88148   |   valid - 0.88148
train - 0.88148   |   valid - 0.88148
train - 0.88806   |   valid - 0.88806
train - 0.86567   |   valid - 0.86567
Average accuracy on crossval is 0.88797
STD is 0.01840


In [17]:
DTC = DecisionTreeClassifier(random_state=21, max_depth=20, max_features=15)
DTC.fit(x_train, y_train)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=21)
scores = cross_val_score(DTC, x_train, y_train, cv=skf, scoring='accuracy')

for i, score in enumerate(scores):
    print(f"train - {score:.5f}   |   valid - {score:.5f}")
print(f"Average accuracy on crossval is {scores.mean():.5f}")
print(f"STD is {scores.std():.5f}")

train - 0.82222   |   valid - 0.82222
train - 0.83704   |   valid - 0.83704
train - 0.90370   |   valid - 0.90370
train - 0.88889   |   valid - 0.88889
train - 0.86667   |   valid - 0.86667
train - 0.89630   |   valid - 0.89630
train - 0.87407   |   valid - 0.87407
train - 0.82963   |   valid - 0.82963
train - 0.93284   |   valid - 0.93284
train - 0.78358   |   valid - 0.78358
Average accuracy on crossval is 0.86349
STD is 0.04267


In [18]:
DTC = DecisionTreeClassifier(random_state=21, max_depth=10, ccp_alpha=0.1)
DTC.fit(x_train, y_train)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=21)
scores = cross_val_score(DTC, x_train, y_train, cv=skf, scoring='accuracy')

for i, score in enumerate(scores):
    print(f"train - {score:.5f}   |   valid - {score:.5f}")
print(f"Average accuracy on crossval is {scores.mean():.5f}")
print(f"STD is {scores.std():.5f}")

train - 0.22963   |   valid - 0.22963
train - 0.22963   |   valid - 0.22963
train - 0.22963   |   valid - 0.22963
train - 0.22963   |   valid - 0.22963
train - 0.23704   |   valid - 0.23704
train - 0.23704   |   valid - 0.23704
train - 0.23704   |   valid - 0.23704
train - 0.23704   |   valid - 0.23704
train - 0.23881   |   valid - 0.23881
train - 0.23881   |   valid - 0.23881
Average accuracy on crossval is 0.23443
STD is 0.00397


## 5. Random forest

### a. Default regularization

1. Train a baseline model with the only parameters `n_estimators=50`, `max_depth=14`, `random_state=21`.
2. Use stratified K-fold cross-validation with `10` splits to evaluate the accuracy of the model.
3. The format of the result of the code where you trained and evaluated the baseline model should be similar to what you have got for the logreg.

In [19]:
RFC = RandomForestClassifier(n_estimators=50, random_state=21, max_depth=14)
RFC.fit(x_train, y_train)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=21)
scores = cross_val_score(RFC, x_train, y_train, cv=skf, scoring='accuracy')

for i, score in enumerate(scores):
    print(f"train - {score:.5f}   |   valid - {score:.5f}")
print(f"Average accuracy on crossval is {scores.mean():.5f}")
print(f"STD is {scores.std():.5f}")

train - 0.85185   |   valid - 0.85185
train - 0.85926   |   valid - 0.85926
train - 0.91852   |   valid - 0.91852
train - 0.91852   |   valid - 0.91852
train - 0.88148   |   valid - 0.88148
train - 0.86667   |   valid - 0.86667
train - 0.88889   |   valid - 0.88889
train - 0.87407   |   valid - 0.87407
train - 0.93284   |   valid - 0.93284
train - 0.86567   |   valid - 0.86567
Average accuracy on crossval is 0.88578
STD is 0.02673


### b. Optimizing regularization parameters

1. In the new cells try different values of the parameters `max_depth` and `n_estimators`.
2. As a bonus, play with other regularization parameters trying to find the best combination.

In [20]:
RFC = RandomForestClassifier(n_estimators=100, random_state=21, max_depth=14)
RFC.fit(x_train, y_train)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=21)
scores = cross_val_score(RFC, x_train, y_train, cv=skf, scoring='accuracy')

for i, score in enumerate(scores):
    print(f"train - {score:.5f}   |   valid - {score:.5f}")
print(f"Average accuracy on crossval is {scores.mean():.5f}")
print(f"STD is {scores.std():.5f}")

train - 0.87407   |   valid - 0.87407
train - 0.85185   |   valid - 0.85185
train - 0.92593   |   valid - 0.92593
train - 0.91111   |   valid - 0.91111
train - 0.88889   |   valid - 0.88889
train - 0.86667   |   valid - 0.86667
train - 0.91111   |   valid - 0.91111
train - 0.87407   |   valid - 0.87407
train - 0.93284   |   valid - 0.93284
train - 0.86567   |   valid - 0.86567
Average accuracy on crossval is 0.89022
STD is 0.02668


In [21]:
RFC = RandomForestClassifier(n_estimators=50, random_state=21, max_depth=10)
RFC.fit(x_train, y_train)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=21)
scores = cross_val_score(RFC, x_train, y_train, cv=skf, scoring='accuracy')

for i, score in enumerate(scores):
    print(f"train - {score:.5f}   |   valid - {score:.5f}")
print(f"Average accuracy on crossval is {scores.mean():.5f}")
print(f"STD is {scores.std():.5f}")

train - 0.80000   |   valid - 0.80000
train - 0.77037   |   valid - 0.77037
train - 0.82963   |   valid - 0.82963
train - 0.82222   |   valid - 0.82222
train - 0.77778   |   valid - 0.77778
train - 0.85185   |   valid - 0.85185
train - 0.76296   |   valid - 0.76296
train - 0.74815   |   valid - 0.74815
train - 0.88060   |   valid - 0.88060
train - 0.72388   |   valid - 0.72388
Average accuracy on crossval is 0.79674
STD is 0.04656


In [22]:
RFC = RandomForestClassifier(n_estimators=100, random_state=21, max_depth=20)
RFC.fit(x_train, y_train)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=21)
scores = cross_val_score(RFC, x_train, y_train, cv=skf, scoring='accuracy')

for i, score in enumerate(scores):
    print(f"train - {score:.5f}   |   valid - {score:.5f}")
print(f"Average accuracy on crossval is {scores.mean():.5f}")
print(f"STD is {scores.std():.5f}")

train - 0.90370   |   valid - 0.90370
train - 0.85185   |   valid - 0.85185
train - 0.95556   |   valid - 0.95556
train - 0.92593   |   valid - 0.92593
train - 0.93333   |   valid - 0.93333
train - 0.92593   |   valid - 0.92593
train - 0.94815   |   valid - 0.94815
train - 0.88889   |   valid - 0.88889
train - 0.94030   |   valid - 0.94030
train - 0.88060   |   valid - 0.88060
Average accuracy on crossval is 0.91542
STD is 0.03152


In [23]:
RFC = RandomForestClassifier(n_estimators=100, random_state=21, max_depth=20, ccp_alpha=0.01)
RFC.fit(x_train, y_train)
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=21)
scores = cross_val_score(RFC, x_train, y_train, cv=skf, scoring='accuracy')

for i, score in enumerate(scores):
    print(f"train - {score:.5f}   |   valid - {score:.5f}")
print(f"Average accuracy on crossval is {scores.mean():.5f}")
print(f"STD is {scores.std():.5f}")

train - 0.62222   |   valid - 0.62222
train - 0.57037   |   valid - 0.57037
train - 0.64444   |   valid - 0.64444
train - 0.68889   |   valid - 0.68889
train - 0.57037   |   valid - 0.57037
train - 0.58519   |   valid - 0.58519
train - 0.55556   |   valid - 0.55556
train - 0.60000   |   valid - 0.60000
train - 0.63433   |   valid - 0.63433
train - 0.52239   |   valid - 0.52239
Average accuracy on crossval is 0.59938
STD is 0.04639


## 6. Predictions

1. Choose the best model and use it to make predictions for the test dataset.
2. Calculate the final accuracy.
3. Analyze: for which weekday your model makes the most errors (in % of the total number of samples of that class in your test dataset).
4. Save the model.

In [24]:
best_model = RandomForestClassifier(n_estimators=100, random_state=21, max_depth=20)
best_model.fit(x_train, y_train)

In [25]:
pred = best_model.predict(x_test)

In [26]:
print(accuracy_score(y_test, pred))

0.9319526627218935


In [27]:
error_calc = pd.DataFrame({'Actual': y_test, 'Predicted': pred})

error_counts = error_calc[error_calc['Actual'] != error_calc['Predicted']].groupby('Actual').size()
total_counts = error_calc.groupby('Actual').size()
error_rate = (error_counts / total_counts) * 100

# Find the weekday with the highest error rate
most_errors_weekday = error_rate.idxmax()
most_errors_rate = error_rate.max()

print(f'Weekday with most errors: {most_errors_weekday} ({most_errors_rate:.2f}%)')

Weekday with most errors: 0 (25.93%)


In [None]:
joblib.dump(best_model, '../data/best_model_ex00.pkl')

['../data/best_model_ex00.pkl']

: 