In [30]:
# 04_grid_random_search (n)
# 1. Zaimplementuj GridSearchCV oraz RandomizedSearchCV dla datasetu wine. Uwzględnij poniższe parametry:
# - estymator: LogisticRegression(solver="liblinear")
#  - parametr C:
#     - min 1
#     - max 10 000
#     - liczba wystąpień 1 000
#  - regularyzacja l1 oraz l2
# 2. Zaimplementuj GridSearchCV (jeden na wszystkie modele) w celu znalezienia najlepszego algorytmu oraz hyperparametrów dla datasetu z pkt.1:
#  - wykorzystaj estymatory:
#     - RandomForestClassifier
#     - KNeighborsClassifier
#     - LogisticRegression
# 3. Porównaj wyniki korzystając z  hyperopt-sklearn.

# Rozwiązanie prześlij jako printscreen (jpg, pdf).
# Nazwa pliku (bez polskich znaków):

# Nazwisko_Imie_04_stacj.jpg

## Grid Search

In [31]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

In [32]:
iris = load_iris()
features = iris.data
target = iris.target

In [33]:
logistic = LogisticRegression(solver="liblinear", max_iter=200)

In [34]:
penalty = ["l1", "l2"]

In [35]:
# 10 do potęgi 0, 4, 1000
C = np.logspace(0, 4, 1000)

In [36]:
hyperparameters = dict(C=C, penalty=penalty)

In [37]:
gridsearch = GridSearchCV(logistic, hyperparameters, cv=5, verbose=2, n_jobs=-1)

In [38]:
best_model = gridsearch.fit(features, target)

Fitting 5 folds for each of 2000 candidates, totalling 10000 fits


In [39]:
print(best_model.best_estimator_.get_params()['penalty'])
print(best_model.best_estimator_.get_params()['C'])

l1
5.926151812475554


In [40]:
#best_model.predict(features)

## Randomized Search

In [41]:
from sklearn.model_selection import RandomizedSearchCV

In [42]:
randomizedsearch = RandomizedSearchCV(
logistic,
hyperparameters,
random_state=1,
n_iter=100,
cv=5,
verbose=0,
n_jobs=-1
)

In [43]:
best_random_model = randomizedsearch.fit(features, target)

In [44]:
print(best_random_model.best_estimator_.get_params()['penalty'])
print(best_random_model.best_estimator_.get_params()['C'])

l2
182.92045048462938


## Pipeline

In [45]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline

In [46]:
pipe = Pipeline([("classifier", RandomForestClassifier())])

In [47]:
search_space = [
    {"classifier": [logistic],
    "classifier__penalty": ["l1", "l2"],
    "classifier__C": np.logspace(0,4, 10)},
    {"classifier": [RandomForestClassifier()],
    "classifier__n_estimators": [10, 100, 1000],
    "classifier__max_features": [1, 2, 3]}
]

In [48]:
gridsearch = GridSearchCV(pipe, search_space, cv=5, verbose=1, n_jobs=-1)

In [49]:
best_model = gridsearch.fit(features, target)

Fitting 5 folds for each of 29 candidates, totalling 145 fits


In [50]:
print(best_model.best_estimator_.get_params()["classifier"])

LogisticRegression(C=7.742636826811269, max_iter=200, penalty='l1',
                   solver='liblinear')


## Hyperopt

In [51]:
from sklearn.model_selection import train_test_split
from hpsklearn import HyperoptEstimator
from hpsklearn import any_classifier
from hpsklearn import any_preprocessing
from hyperopt import tpe

In [52]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.33, random_state=42)

In [53]:
model = HyperoptEstimator(
classifier=any_classifier("cla"),
preprocessing=any_preprocessing("pre"),
algo=tpe.suggest,
max_evals=20,
trial_timeout=30,
n_jobs=-1)

In [54]:
model.fit(X_train, y_train)

100%|████████████████| 1/1 [00:01<00:00,  1.50s/trial, best loss: 0.050000000000000044]
100%|████████████████| 2/2 [00:01<00:00,  1.67s/trial, best loss: 0.050000000000000044]
100%|████████████████| 3/3 [00:01<00:00,  1.42s/trial, best loss: 0.050000000000000044]
100%|████████████████| 4/4 [00:01<00:00,  1.85s/trial, best loss: 0.050000000000000044]
100%|████████████████| 5/5 [00:01<00:00,  1.59s/trial, best loss: 0.050000000000000044]
100%|████████████████| 6/6 [00:01<00:00,  1.54s/trial, best loss: 0.050000000000000044]
100%|████████████████| 7/7 [00:02<00:00,  2.17s/trial, best loss: 0.050000000000000044]
100%|████████████████| 8/8 [00:01<00:00,  1.41s/trial, best loss: 0.050000000000000044]
100%|████████████████| 9/9 [00:01<00:00,  1.48s/trial, best loss: 0.050000000000000044]
100%|███████████████████████████████| 10/10 [00:01<00:00,  1.47s/trial, best loss: 0.0]
100%|███████████████████████████████| 11/11 [00:02<00:00,  2.34s/trial, best loss: 0.0]
100%|███████████████████████████

In [55]:
acc = model.score(X_test, y_test)
print("Accuracy: %.3f" % acc)

Accuracy: 0.980


In [56]:
print(model.best_model())

{'learner': KNeighborsClassifier(metric='euclidean', n_jobs=-1, n_neighbors=32), 'preprocs': (Normalizer(norm='l1'),), 'ex_preprocs': ()}
