Title: GridSearchCV & RandomizedSearchCV

Task 1: GridSearchCV for Decision Trees<br>
Use GridSearchCV to tune max_depth and min_samples_split in Decision Tree for Iris.

In [None]:

from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

data = load_iris()
X = data.data
y = data.target

param_grid = {
    'max_depth': [3, 5, 7, None],
    'min_samples_split': [2, 5, 10]
}

model = DecisionTreeClassifier(random_state=42)
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)

grid_search.fit(X, y)

best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

y_pred = best_model.predict(X)

accuracy = accuracy_score(y, y_pred)

print(f"Best Parameters: {best_params}")
print(f"Accuracy: {accuracy:.2f}")


Best Parameters: {'max_depth': 3, 'min_samples_split': 2}
Accuracy: 0.97


Task 2: RandomizedSearchCV for Random Forest<br>
Apply RandomizedSearchCV to optimize hyperparameters of Random Forest for customer churn.

In [2]:
# Write your code here
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import numpy as np

X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

param_dist = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

model = RandomForestClassifier(random_state=42)
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_dist, n_iter=10, cv=5, random_state=42)

random_search.fit(X_train, y_train)

best_model = random_search.best_estimator_

y_val_pred = best_model.predict(X_val)

accuracy = accuracy_score(y_val, y_val_pred)

print(f"Best Parameters: {random_search.best_params_}")
print(f"Accuracy: {accuracy:.2f}")


Best Parameters: {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_depth': None}
Accuracy: 0.87


Task 3: Fine-Tuning SVR with GridSearchCV<br>
Use GridSearchCV to find best parameters for Support Vector Regression on housing data.

In [3]:
# Write your code here
from sklearn.datasets import make_regression
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import numpy as np

X, y = make_regression(n_samples=1000, n_features=5, noise=0.1, random_state=42)

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

param_grid = {
    'C': [0.1, 1, 10],
    'epsilon': [0.01, 0.1, 1],
    'kernel': ['linear', 'rbf']
}

model = SVR()
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)

grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_

y_val_pred = best_model.predict(X_val)

rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))

print(f"Best Parameters: {grid_search.best_params_}")
print(f"Validation RMSE: {rmse:.2f}")


Best Parameters: {'C': 10, 'epsilon': 0.01, 'kernel': 'linear'}
Validation RMSE: 0.10
