# Hyperparameter
Hyperparameter are those parameters that are explicitly define by the user to control the learning process.
The best value can be determined either by the rule of thumb or by trial and error.

# Type of Hyperparameter

# 1. Manual Tuning:

This involves manually adjusting hyperparameters based on trial and error and observing the model's performance.

It can be effective for small models and understanding hyperparameter effects but becomes impractical for complex models or many hyperparameters

# 2. Exhaustive Search Methods

# Grid Search:
Systematically evaluates all possible combinations of hyperparameter values within a predefined grid.
Guarantees finding the best combination within the specified grid but can be computationally expensive.

# Random Search: 
Randomly samples hyperparameter values from the search space.
Often more efficient than grid search, especially in high-dimensional spaces, as it can explore a wider range of values more effectively.

In [1]:
import pandas as pd 

In [2]:
dataset = pd.read_csv("Polynomial_Regression.csv")
dataset.head(3)

Unnamed: 0,Level,Salary
0,1,1500.0
1,2,16970.0
2,3,70148.0


In [4]:
x = dataset.iloc[:,:-1]
y = dataset["Salary"]

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=42)

In [7]:
from sklearn.tree import DecisionTreeRegressor

In [8]:
dt = DecisionTreeRegressor()
dt.fit(x_train,y_train)

0,1,2
,criterion,'squared_error'
,splitter,'best'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [9]:
dt.score(x_train,y_train)*100, dt.score(x_test,y_test)*100

(100.0, 99.9211468673978)

In [10]:
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV

In [16]:
df = {"criterion":['squared_error',"friedman_mse", "absolute_error","poisson"],
    "splitter":['best',"random"],
    "max_depth":[i for i in range(1,20)]}

In [17]:
gd = GridSearchCV(DecisionTreeRegressor(),param_grid=df)
gd.fit(x_train,y_train)

0,1,2
,estimator,DecisionTreeRegressor()
,param_grid,"{'criterion': ['squared_error', 'friedman_mse', ...], 'max_depth': [1, 2, ...], 'splitter': ['best', 'random']}"
,scoring,
,n_jobs,
,refit,True
,cv,
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,criterion,'squared_error'
,splitter,'best'
,max_depth,15
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [18]:
gd.best_params_

{'criterion': 'squared_error', 'max_depth': 15, 'splitter': 'best'}

In [19]:
gd.best_score_

np.float64(0.9988819819873435)

In [15]:
rd = RandomizedSearchCV(DecisionTreeRegressor(),param_distributions=df,n_iter=20)
rd.fit(x_train,y_train)

0,1,2
,estimator,DecisionTreeRegressor()
,param_distributions,"{'criterion': ['squared_error', 'friedman_mse', ...], 'max_depth': [1, 2, ...], 'splitter': ['best', 'random']}"
,n_iter,20
,scoring,
,n_jobs,
,refit,True
,cv,
,verbose,0
,pre_dispatch,'2*n_jobs'
,random_state,

0,1,2
,criterion,'squared_error'
,splitter,'best'
,max_depth,17
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [20]:
rd.best_params_

{'splitter': 'best', 'max_depth': 17, 'criterion': 'squared_error'}

In [21]:
rd.best_score_

np.float64(0.9988819819873435)