# **Eksplorasi HyperParameter Search**
<br>

**Setup**

In [1]:
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

df = pd.read_csv('./Kaggle/resources/datasets/melb_data.csv')
df = df.dropna(axis=0)

features = ['Rooms', 'Bathroom', 'Landsize', 'BuildingArea', 
                        'YearBuilt', 'Lattitude', 'Longtitude']
y = df.Price
X = df[features]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
model = DecisionTreeRegressor()

# **Grid Search**

In [2]:
from sklearn.model_selection import GridSearchCV

# Param
param_grid = {'random_state': [0,1,4,12,24,38,42,48,61,78,80,87,97,100,123]}

# Search
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_absolute_error')
grid_search.fit(X_train, y_train)

# Best Score
best_random_state = grid_search.best_params_['random_state']
print(f"Best random state: {best_random_state}")
print(f"Best random score: {grid_search.best_score_}")

Best random state: 87
Best random score: -259037.22653911595


# **Manual Search with Cross-Validation**

In [3]:
from sklearn.model_selection import cross_val_score
import numpy as np

# Param
random_state = [0,1,4,12,24,38,42,80,81,82,83,84,85,85,86,87,97]
result = {}

for i in random_state:
    model.set_params(random_state=i)
    scores = cross_val_score(model,X_train, y_train, cv=5, scoring='neg_mean_absolute_error')
    result[i] = np.mean(scores)
    
# Best Score
best_random_state = min(result, key=result.get)
print(f"Best random state: {best_random_state}")
print(f"Best random score: {result}")

Best random state: 42
Best random score: {0: -261414.25274673887, 1: -262673.1178091832, 4: -260552.7668775536, 12: -261015.7791958054, 24: -264376.28950102435, 38: -261534.53661469728, 42: -265463.43696586683, 80: -262697.3142187807, 81: -263931.24647661374, 82: -262810.70697362174, 83: -259651.7806451613, 84: -263327.7442719076, 85: -263602.2020074771, 86: -260071.24380453024, 87: -259037.22653911595, 97: -260958.2043267706}


# **Randomized Search**<br>
- model: Estimator yang dioptimalkan.
- random_param: Parameter distribusi yang dioptimalkan.
- cv: Cross-validation fold.
- n_iter: Jumlah iterasi pencarian.
- n_jobs: Jumlah pekerjaan paralel.
- random_state: Seed untuk angka acak.
- scoring: Metrik evaluasi.

In [4]:
from sklearn.model_selection import RandomizedSearchCV
#param
random_param = {'random_state': np.random.randint(0, 2500, size=500)}

# Search Param
random_search = RandomizedSearchCV(model, random_param, cv=5, n_iter=10, n_jobs=-1, random_state=42, scoring='neg_mean_absolute_error')
random_search.fit(X_train, y_train)

best_random_state = random_search.best_params_['random_state']
print(f"Best random state: {best_random_state}")
print(f"Best score: {random_search.best_score_}")

Best random state: 652
Best score: -258631.71457666354
