In [3]:
# 1 Užduotis
# Panaudokite Grid Search, Random Search ir Bayes optimizavimą vienam iš savo anksčiau naudotų 
# datasetų, jog surastumėte didžiausią tikslumą ir geriausius hiperparametrus

from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets  import load_iris
from sklearn.metrics import accuracy_score

iris = load_iris()
X = iris.data
y = iris.target

model = RandomForestClassifier()

param_grid = {
    'n_estimators': [10, 50, 100, 200], # medziu kiekis
    'max_depth': [None, 10, 20], # sprendimu kiekis medyje
    'min_samples_split': [2, 5, 10] # minimalus pavyzdziu kiekis
}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X, y)

print(f'Geriausi parametrai: {grid_search.best_params_}')
print(f'Geriausias tikslumas: {grid_search.best_score_}')


Geriausi parametrai: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}
Geriausias tikslumas: 0.9666666666666668


In [4]:
from sklearn.model_selection import RandomizedSearchCV
import numpy as np

# Define the parameter grid for Random Search
param_distributions = {
    'n_estimators': [int(x) for x in np.linspace(start=10, stop=200, num=20)],
    'max_depth': [None] + [int(x) for x in np.linspace(10, 110, num=11)],
    'min_samples_split': [2, 5, 10, 15, 20]
}

# Initialize the model
model = RandomForestClassifier()

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_distributions, 
                                   n_iter=100, cv=5, scoring='accuracy', random_state=42, n_jobs=-1)

# Fit RandomizedSearchCV
random_search.fit(X, y)

print(f'Geriausi parametrai (Random Search): {random_search.best_params_}')
print(f'Geriausias tikslumas (Random Search): {random_search.best_score_}')


Geriausi parametrai (Random Search): {'n_estimators': 60, 'min_samples_split': 15, 'max_depth': 80}
Geriausias tikslumas (Random Search): 0.9666666666666668


In [5]:
from hyperopt import hp, tpe, fmin, Trials, STATUS_OK
from hyperopt.pyll.base import scope
from sklearn.model_selection import cross_val_score

# Define the search space
space = {
    'n_estimators': scope.int(hp.quniform('n_estimators', 10, 200, 1)),
    'max_depth': hp.choice('max_depth', [None] + [int(x) for x in np.linspace(10, 110, num=11)]),
    'min_samples_split': scope.int(hp.quniform('min_samples_split', 2, 20, 1))
}

# Define the objective function
def objective(params):
    model = RandomForestClassifier(**params)
    accuracy = cross_val_score(model, X, y, cv=5, scoring='accuracy').mean()
    return {'loss': -accuracy, 'status': STATUS_OK}

# Initialize Trials object
trials = Trials()

# Run optimization
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=100, trials=trials, rstate=np.random.RandomState(42))

print(f'Geriausi parametrai (Bayesian Optimization): {best}')
# Convert hyperopt parameters to scikit-learn format
best_params = {
    'n_estimators': int(best['n_estimators']),
    'max_depth': [None] + [int(x) for x in np.linspace(10, 110, num=11)][best['max_depth']],
    'min_samples_split': int(best['min_samples_split'])
}
print(f'Geriausias tikslumas (Bayesian Optimization): {-trials.best_trial["result"]["loss"]}')


  0%|          | 0/100 [00:00<?, ?trial/s, best loss=?]


AttributeError: 'numpy.random.mtrand.RandomState' object has no attribute 'integers'