In [None]:
import numpy as np
import pandas as pd
import time
import sklearn
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor

In [None]:
data = pd.read_excel('../Data/ProcessedData/TMDB_processed.xlsx')
data = data.drop(['title', 'revenue', 'budget'], axis=1) # Using log revenue

In [None]:
X = data[data.dtypes[data.columns != 'log_revenue'].index]
y = data['log_revenue']

**GridSearchCV on XGBRegressor**

In [None]:
# # First run
# params =  {'max_depth': [2, 4, 6],
#            'n_estimators': [100, 500, 1000],
#            'colsample_bytree': [0.2, 0.6, 0.8],
#            'min_child_weight': [3, 5, 7],
#            'gamma': [0.3, 0.5, 0.7],
#            'subsample': [0.4, 0.6, 0.8]}

# Second run
params = {'max_depth': [4],
          'n_estimators': [100, 300, 500],
          'colsample_bytree': [0.6, 0.65, 0.7],
          'min_child_weight': [5],
          'gamma': [0.3],
          'subsample': [0.75, 0.8, 0.85]}

model = XGBRegressor()

clf = GridSearchCV(estimator=model, 
                   param_grid=params,
                   scoring='r2', 
                   verbose=1)
start = time.time()
clf.fit(X, y)
end = time.time()
print('Time taken to tune hyperparameters: ', end - start)
print('Best parameters: ', clf.best_params_)
print('Best Score: ', clf.best_score_)

**GridSearchCV on Random Forest Regressor**

In [None]:
params = {'bootstrap': [False],
          'max_features': ['sqrt'],
          'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None],
          'n_estimators': [53],
          'min_samples_leaf': [1, 2, 4]}

model = RandomForestRegressor()

clf = GridSearchCV(estimator=model, 
                   param_grid=params, 
                   scoring='r2', 
                   verbose=2)
start = time.time()
clf.fit(X, y)
end = time.time()
print('Time taken to tune hyperparameters: ', end - start)
print('Best parameters: ', clf.best_params_)
print('Best Score: ', clf.best_score_)