In [2]:
import numpy as np
import pandas as pd
import optuna
import seaborn as sns

In [5]:
healthxp = sns.load_dataset('healthexp')
healthxp.head()

Unnamed: 0,Year,Country,Spending_USD,Life_Expectancy
0,1970,Germany,252.311,70.6
1,1970,France,192.143,72.2
2,1970,Great Britain,123.993,71.9
3,1970,Japan,150.437,72.0
4,1970,USA,326.961,70.9


In [8]:
healthxp = pd.get_dummies(healthxp)
healthxp

Unnamed: 0,Year,Spending_USD,Life_Expectancy,Country_Canada,Country_France,Country_Germany,Country_Great Britain,Country_Japan,Country_USA
0,1970,252.311,70.6,False,False,True,False,False,False
1,1970,192.143,72.2,False,True,False,False,False,False
2,1970,123.993,71.9,False,False,False,True,False,False
3,1970,150.437,72.0,False,False,False,False,True,False
4,1970,326.961,70.9,False,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...
269,2020,6938.983,81.1,False,False,True,False,False,False
270,2020,5468.418,82.3,False,True,False,False,False,False
271,2020,5018.700,80.4,False,False,False,True,False,False
272,2020,4665.641,84.7,False,False,False,False,True,False


In [10]:
X = healthxp.drop(['Life_Expectancy'], axis = 1)
X.head()

Unnamed: 0,Year,Spending_USD,Country_Canada,Country_France,Country_Germany,Country_Great Britain,Country_Japan,Country_USA
0,1970,252.311,False,False,True,False,False,False
1,1970,192.143,False,True,False,False,False,False
2,1970,123.993,False,False,False,True,False,False
3,1970,150.437,False,False,False,False,True,False
4,1970,326.961,False,False,False,False,False,True


In [11]:
y = healthxp["Life_Expectancy"]

In [12]:
from sklearn.model_selection import train_test_split

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 34)

In [22]:
from sklearn.ensemble import RandomForestRegressor

In [23]:
rfr = RandomForestRegressor(random_state = 34)

In [30]:
rfr.fit(X_train, y_train)

In [31]:
y_pred = rfr.predict(X_test)

In [32]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [33]:
mean_absolute_error(y_test, y_pred)

0.24521818181817784

In [34]:
mean_squared_error(y_test, y_pred)

0.10628972727272501

In [35]:
r2_score(y_test, y_pred)

0.9909317700688961

In [36]:
import optuna

In [38]:
from sklearn.model_selection import cross_val_score

In [48]:
def objective(trial):
    hyperparams ={
    'n_estimators' : trial.suggest_int('n_estimators', 100, 1000),
    'max_depth' : trial.suggest_int('max_depth', 10, 50),
    'min_samples_split' : trial.suggest_int('min_samples_split', 2, 32),
    'min_samples_leaf' : trial.suggest_int('min_samples_leaf',1,32),
    'n_jobs' : -1
    }
    
    model = RandomForestRegressor(**hyperparams)
    score = cross_val_score(model, X_train, y_train, cv = 5, scoring = 'neg_mean_squared_error')
    return np.mean(score)

In [52]:
study = optuna.create_study(direction = 'maximize')
study.optimize(objective, n_trials = 20)
best_params = study.best_params
print("Best parameters found: ", best_params)

[I 2024-02-05 16:44:14,806] A new study created in memory with name: no-name-ddd7ed98-6b41-4b01-ae1c-a16b4a505cdc
[I 2024-02-05 16:44:23,974] Trial 0 finished with value: -1.189354203730634 and parameters: {'n_estimators': 578, 'max_depth': 37, 'min_samples_split': 15, 'min_samples_leaf': 8}. Best is trial 0 with value: -1.189354203730634.
[I 2024-02-05 16:44:30,053] Trial 1 finished with value: -3.091252948143261 and parameters: {'n_estimators': 397, 'max_depth': 40, 'min_samples_split': 23, 'min_samples_leaf': 30}. Best is trial 0 with value: -1.189354203730634.
[I 2024-02-05 16:44:38,956] Trial 2 finished with value: -2.117843076576596 and parameters: {'n_estimators': 568, 'max_depth': 23, 'min_samples_split': 30, 'min_samples_leaf': 15}. Best is trial 0 with value: -1.189354203730634.
[I 2024-02-05 16:44:44,075] Trial 3 finished with value: -2.3642065672934356 and parameters: {'n_estimators': 321, 'max_depth': 41, 'min_samples_split': 16, 'min_samples_leaf': 19}. Best is trial 0 wi

Best parameters found:  {'n_estimators': 124, 'max_depth': 10, 'min_samples_split': 4, 'min_samples_leaf': 1}
