In [7]:
import numpy as np
import pandas as pd
from sklearn.ensemble import AdaBoostRegressor, RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_squared_error
from Features import FEATURE_LIST
from sklearn.metrics import mean_absolute_error

In [8]:
df = pd.read_csv('../data/wine_data_train.csv')
df = df[FEATURE_LIST]

df_test = pd.read_csv('../data/wine_data_test.csv')
df_test = df_test[FEATURE_LIST]

X_train, X_val, y_train, y_val = train_test_split(df.drop("quality",axis=1), df["quality"], test_size=0.2, random_state=42)

param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.001, 0.01, 0.1],
    'estimator': [DecisionTreeRegressor(), RandomForestRegressor()],
}

reg = AdaBoostRegressor(random_state=42)
# Create the GridSearchCV object
grid_search = RandomizedSearchCV(estimator=reg, param_distributions=param_grid, cv=5,n_jobs=-1,scoring='neg_mean_squared_error')

# Fit the GridSearchCV object to the training data
grid_search.fit(X_train, y_train)

TypeError: __init__() got an unexpected keyword argument 'param_grid'

In [None]:
best_params = grid_search.best_params_
reg = grid_search.best_estimator_

y_pred = reg.predict(X_val)
MAE = mean_absolute_error(y_pred, y_val)
MSE = mean_squared_error(y_pred,y_val)
r_2 = reg.score(X_val, y_val)

# Perform of test.
X_test = df_test.drop("quality",axis=1)
y_test = df_test["quality"]
y_pred_test = reg.predict(X_test)
MAE_t = mean_absolute_error(y_pred_test, y_test)
MSE_t = mean_squared_error(y_pred_test,y_test)
r_2_t = reg.score(X_test, y_test)

message = (f'best parameter{best_params}\n' +
           f'Validation R^2: {r_2}\n' + f'Validation MAE: {MAE}\n' + f'Validation MSE: {MSE}\n' 
           + f'Test R^2: {r_2_t}\n' + f'Test MAE: {MAE_t}\n' + f'Test MSE: {MSE_t}')
print(message)

with open("results/AdaBoost_results.txt", "w") as file:
    file.write(message)