<a href="https://colab.research.google.com/github/douglasmmachado/MedicineConsumption/blob/main/notebooks/division_approach/5_Forecasting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 5 - Forecasting and prediction



---



---



In [28]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns
import math as m

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

from sklearn.tree import export_graphviz
from subprocess import call
from IPython.display import Image

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error,  mean_absolute_percentage_error
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import StandardScaler

from sklearn.utils import shuffle
from sklearn.model_selection import GridSearchCV



df_h1_url ="https://raw.githubusercontent.com/douglasmmachado/ExploratoryDataAnalysis/main/datasets/division_approach/clustered/df_h1_clustered.csv"
df_h2_url ="https://raw.githubusercontent.com/douglasmmachado/ExploratoryDataAnalysis/main/datasets/division_approach/clustered/df_h2_clustered.csv"
df_h3_url ="https://raw.githubusercontent.com/douglasmmachado/ExploratoryDataAnalysis/main/datasets/division_approach/clustered/df_h3_clustered.csv"
df_h4_url ="https://raw.githubusercontent.com/douglasmmachado/ExploratoryDataAnalysis/main/datasets/division_approach/clustered/df_h4_clustered.csv"

df_h1 = pd.read_csv(df_h1_url)
df_h2 = pd.read_csv(df_h2_url)
df_h3 = pd.read_csv(df_h3_url)
df_h4 = pd.read_csv(df_h4_url)

# Convert numerical columns to string
for i, curr_df in enumerate([df_h1, df_h2, df_h3, df_h4]):
    numerical_columns = ['HOSPI_CODE_UCD']
    curr_df[numerical_columns] = curr_df[numerical_columns].astype('string')

medicines = ['3400892088310','3400892075761','3400892203645',
             '3400892065366','3400892052120','3400891996128',
             '3400893826706','3400893736135','3400893875490',
             '3400890837149','3400891235203','3400891225037',
             '3400891191226','3400892729589','3400892745848',
             '3400892697789','3400892761527','3400893022634',
             '3400892761695','3400892669236','3400892508566']

In [29]:
df_h1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1208 entries, 0 to 1207
Data columns (total 47 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   HOSPI_CODE_UCD     1208 non-null   string 
 1   LIT_HC             1208 non-null   float64
 2   LIT_HP             1208 non-null   float64
 3   N_UFS              1208 non-null   float64
 4   PN_MEDICAL         1208 non-null   float64
 5   POPULATION         1208 non-null   float64
 6   P_MEDICAL          1208 non-null   float64
 7   QUANTITY           1208 non-null   float64
 8   QUANTITY_MA        1208 non-null   float64
 9   SEJ_MCO            1208 non-null   float64
 10  SEJ_SLD            1208 non-null   float64
 11  SEJ_SSR            1208 non-null   float64
 12  YEAR               1208 non-null   float64
 13  MONTH_1.0          1208 non-null   int64  
 14  MONTH_2.0          1208 non-null   int64  
 15  MONTH_3.0          1208 non-null   int64  
 16  MONTH_4.0          1208 

In [30]:
df_h2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 697 entries, 0 to 696
Data columns (total 47 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   HOSPI_CODE_UCD     697 non-null    string 
 1   LIT_HC             697 non-null    float64
 2   LIT_HP             697 non-null    float64
 3   N_UFS              697 non-null    float64
 4   PN_MEDICAL         697 non-null    float64
 5   POPULATION         697 non-null    float64
 6   P_MEDICAL          697 non-null    float64
 7   QUANTITY           697 non-null    float64
 8   QUANTITY_MA        697 non-null    float64
 9   SEJ_MCO            697 non-null    float64
 10  SEJ_PSY            697 non-null    float64
 11  SEJ_SSR            697 non-null    float64
 12  YEAR               697 non-null    float64
 13  MONTH_1.0          697 non-null    int64  
 14  MONTH_2.0          697 non-null    int64  
 15  MONTH_3.0          697 non-null    int64  
 16  MONTH_4.0          697 non

In [31]:
df_h3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1397 entries, 0 to 1396
Data columns (total 49 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   HOSPI_CODE_UCD     1397 non-null   string 
 1   LIT_HC             1397 non-null   float64
 2   LIT_HP             1397 non-null   float64
 3   N_UFS              1397 non-null   float64
 4   PN_MEDICAL         1397 non-null   float64
 5   POPULATION         1397 non-null   float64
 6   P_MEDICAL          1397 non-null   float64
 7   QUANTITY           1397 non-null   float64
 8   QUANTITY_MA        1397 non-null   float64
 9   SEJ_HAD            1397 non-null   float64
 10  SEJ_MCO            1397 non-null   float64
 11  SEJ_PSY            1397 non-null   float64
 12  SEJ_SLD            1397 non-null   float64
 13  SEJ_SSR            1397 non-null   float64
 14  YEAR               1397 non-null   float64
 15  MONTH_1.0          1397 non-null   int64  
 16  MONTH_2.0          1397 

In [32]:
df_h4.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 770 entries, 0 to 769
Data columns (total 48 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   HOSPI_CODE_UCD     770 non-null    string 
 1   LIT_HC             770 non-null    float64
 2   LIT_HP             770 non-null    float64
 3   N_UFS              770 non-null    float64
 4   PN_MEDICAL         770 non-null    float64
 5   POPULATION         770 non-null    float64
 6   P_MEDICAL          770 non-null    float64
 7   QUANTITY           770 non-null    float64
 8   QUANTITY_MA        770 non-null    float64
 9   SEJ_MCO            770 non-null    float64
 10  SEJ_PSY            770 non-null    float64
 11  SEJ_SLD            770 non-null    float64
 12  SEJ_SSR            770 non-null    float64
 13  YEAR               770 non-null    float64
 14  MONTH_1.0          770 non-null    int64  
 15  MONTH_2.0          770 non-null    int64  
 16  MONTH_3.0          770 non

## 5.1 - New database composition based on clusters

In [33]:
features = ['HOSPI_CODE_UCD', 'LIT_HC', 'LIT_HP', 'N_ETB', 'N_UFS', 'PN_MEDICAL',
       'POPULATION', 'P_MEDICAL', 'QUANTITY', 'SEJ_HAD',
       'SEJ_MCO', 'SEJ_PSY', 'SEJ_SLD', 'SEJ_SSR', 'MONTH_1.0', 'MONTH_2.0',
       'MONTH_3.0', 'MONTH_4.0', 'MONTH_5.0', 'MONTH_6.0', 'MONTH_7.0',
       'MONTH_8.0', 'MONTH_9.0', 'MONTH_10.0', 'MONTH_11.0', 'MONTH_12.0', 'CLUSTER']

In [34]:
features_h1 = list(set(features) - {"N_ETB", "SEJ_HAD", "SEJ_PSY"})
features_h2 = list(set(features) - {'SEJ_HAD', 'SEJ_SLD', 'N_ETB'})
features_h3 = list(set(features) - {'N_ETB'})
features_h4 = list(set(features) - {'SEJ_HAD', 'N_ETB'})

## Baseline for each medicine and hospital

In [35]:
import plotly.graph_objects as go

def plot_pred(y_pred, y_test, medicine):
    # Create a scatter plot for y_pred
    fig = go.Figure(data=go.Scatter(x=np.arange(len(y_pred)), y=y_pred, mode='markers', name='y_pred', marker=dict(symbol='circle')))

    # Add scatter plot for y_test
    fig.add_trace(go.Scatter(x=np.arange(len(y_test)), y=y_test, mode='markers', name='y_test', marker=dict(symbol='x')))

    # Set axes labels and title
    fig.update_layout(xaxis_title='Test samples', yaxis_title='Quantity',
                      title=f'y_pred and y_test for medicine: {medicine}')

    # Show the plot
    fig.show()


In [36]:
import plotly.graph_objects as go

def plot_mape(y_pred, y_test, medicine, epsilon=0.001):
    mape_array = np.abs(y_test - y_pred) / np.maximum(epsilon, np.abs(y_test))

    # Create a scatter plot for MAPE
    fig = go.Figure(data=go.Scatter(x=np.arange(len(y_pred)), y=mape_array, mode='markers', name='MAPE'))

    # Set axes labels and title
    fig.update_layout(xaxis_title='Test samples', yaxis_title='MAPE',
                      title=f'MAPE for medicine: {medicine}')

    # Show the plot
    fig.show()

In [37]:
def test_1_baseline(df, medicine, df_scores):
  print()
  print(100*'-')
  print('Medicine:' + str(medicine))

  X = df[df['HOSPI_CODE_UCD'] == medicine].drop(['QUANTITY', 'HOSPI_CODE_UCD'], axis=1).values

  scaler = StandardScaler()
  X_scaled = scaler.fit_transform(X)

  y = df[df['HOSPI_CODE_UCD'] == medicine]['QUANTITY'].values

  X_scaled, y = shuffle(X_scaled, y, random_state=42)

  if m.ceil(len(X_scaled) * 0.1) == 1:
    print('Dataset too small')
    test_size = 2
  else:
    test_size = 0.1

  # Split the data into training and testing sets
  X_train, X_test, y_train, y_test = train_test_split(X_scaled, y,
                                                      test_size = test_size,
                                                      random_state = 42)
  print(f'Size of data set: {len(X)}')
  print(f'Size of training set: {len(X_train)}')
  print(f'Size of test set: {len(X_test)}')

  # Define the parameter distributions for RandomizedSearchCV
  param_grid = {
      'max_depth': np.arange(2, 8, 1),
      'n_estimators': np.arange(2, max(int(m.ceil(len(X_train)*0.1)),3), 1),
      'max_features': ['sqrt']
  }
  depth_len = param_grid['max_depth'].size
  estimators_len = param_grid['n_estimators'].size

  print(f'Size of grid search: {depth_len * estimators_len}')

  # Create the RandomizedSearchCV object
  grid_search = GridSearchCV(estimator=RandomForestRegressor(random_state=42),
                             param_grid=param_grid,
                             scoring = 'neg_mean_absolute_percentage_error',
                             cv = 5,
                             n_jobs = -1)

  ''' >3 : the fold and candidate parameter indexes
      are also displayed together with the starting time of the computation.
  '''
  # Fit the RandomizedSearchCV object to the data
  grid_search.fit(X_train, y_train)

  # Get the best estimator
  best_estimator = grid_search.best_estimator_

  # Make predictions using the best estimator
  y_pred = best_estimator.predict(X_test)

  # Calculate R^2 score
  r2 = r2_score(y_test, y_pred)

  # Calculate MAE
  mae = mean_absolute_error(y_test, y_pred)

  # Calculate MAPE
  mape = mean_absolute_percentage_error(y_test, y_pred)

  # Calculate RMSE
  rmse = np.sqrt(mean_squared_error(y_test, y_pred))

  # Print the best parameters, best score, and evaluation metrics
  print('Best Parameters: ', grid_search.best_params_)
  print('Training Score (MAPE): ', round(grid_search.best_score_, 3))
  print(10*'-' + 'Test scores' + 10*'-')
  print('R^2 Score:', round(r2, 3))
  print('MAE:', round(mae, 3))
  print('MAPE:', round(mape, 3))
  print('RMSE:', round(rmse, 3))
  print()


  # Create the new row as a DataFrame
  new_row = pd.DataFrame({'HOSPI_CODE_UCD': ['CODE_UCD_'+str(medicine)],
                          'R2': [r2],
                          'RMSE': [rmse],
                          'MAE': [mae],
                          'MAPE': [mape]})

  # Append the new row to the DataFrame
  df_scores = pd.concat([df_scores, new_row], ignore_index=True)

  # plot pred x test
  plot_pred(y_pred, y_test, medicine)
  print()


  plot_mape(y_pred, y_test, medicine)
  print()

  # Return the updated DataFrame
  return df_scores


In [38]:
df_prediction_scores_h1 = pd.DataFrame(columns=['HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

for medicine in medicines:

  df_prediction_scores_h1 = test_1_baseline(df_h1[features_h1], medicine, df_prediction_scores_h1)

df_prediction_scores_h1


----------------------------------------------------------------------------------------------------
Medicine:3400892088310
Size of data set: 47
Size of training set: 42
Size of test set: 5
Size of grid search: 18
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 4}
Training Score (MAPE):  -15.513
----------Test scores----------
R^2 Score: 0.595
MAE: 511.584
MAPE: 4.996
RMSE: 653.248








----------------------------------------------------------------------------------------------------
Medicine:3400892075761
Size of data set: 63
Size of training set: 56
Size of test set: 7
Size of grid search: 24
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 4}
Training Score (MAPE):  -0.442
----------Test scores----------
R^2 Score: 0.796
MAE: 128.911
MAPE: 0.207
RMSE: 162.542








----------------------------------------------------------------------------------------------------
Medicine:3400892203645
Size of data set: 60
Size of training set: 54
Size of test set: 6
Size of grid search: 24
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 5}
Training Score (MAPE):  -1.404
----------Test scores----------
R^2 Score: 0.85
MAE: 190.775
MAPE: 0.442
RMSE: 212.687








----------------------------------------------------------------------------------------------------
Medicine:3400892065366
Size of data set: 60
Size of training set: 54
Size of test set: 6
Size of grid search: 24
Best Parameters:  {'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.431
----------Test scores----------
R^2 Score: 0.238
MAE: 698.062
MAPE: 0.218
RMSE: 1139.577








----------------------------------------------------------------------------------------------------
Medicine:3400892052120
Size of data set: 54
Size of training set: 48
Size of test set: 6
Size of grid search: 18
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.518
----------Test scores----------
R^2 Score: -0.311
MAE: 410.855
MAPE: 0.156
RMSE: 536.882








----------------------------------------------------------------------------------------------------
Medicine:3400891996128
Size of data set: 64
Size of training set: 57
Size of test set: 7
Size of grid search: 24
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 4}
Training Score (MAPE):  -10.585
----------Test scores----------
R^2 Score: 0.905
MAE: 4379.166
MAPE: 0.126
RMSE: 5870.62








----------------------------------------------------------------------------------------------------
Medicine:3400893826706
Size of data set: 60
Size of training set: 54
Size of test set: 6
Size of grid search: 24
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -32.505
----------Test scores----------
R^2 Score: 0.742
MAE: 605.948
MAPE: 6.798
RMSE: 769.993








----------------------------------------------------------------------------------------------------
Medicine:3400893736135
Size of data set: 59
Size of training set: 53
Size of test set: 6
Size of grid search: 24
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.523
----------Test scores----------
R^2 Score: 0.892
MAE: 208.449
MAPE: 0.904
RMSE: 295.137








----------------------------------------------------------------------------------------------------
Medicine:3400893875490
Size of data set: 60
Size of training set: 54
Size of test set: 6
Size of grid search: 24
Best Parameters:  {'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -4.711
----------Test scores----------
R^2 Score: 0.938
MAE: 766.094
MAPE: 1.053
RMSE: 929.828








----------------------------------------------------------------------------------------------------
Medicine:3400890837149
Size of data set: 66
Size of training set: 59
Size of test set: 7
Size of grid search: 24
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -3.905
----------Test scores----------
R^2 Score: 0.495
MAE: 634.405
MAPE: 2.418
RMSE: 756.473








----------------------------------------------------------------------------------------------------
Medicine:3400891235203
Size of data set: 56
Size of training set: 50
Size of test set: 6
Size of grid search: 18
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.256
----------Test scores----------
R^2 Score: -0.045
MAE: 363.586
MAPE: 0.389
RMSE: 426.965








----------------------------------------------------------------------------------------------------
Medicine:3400891225037
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 5}
Training Score (MAPE):  -6.494
----------Test scores----------
R^2 Score: 0.67
MAE: 2882.954
MAPE: 21.041
RMSE: 4048.079








----------------------------------------------------------------------------------------------------
Medicine:3400891191226
Size of data set: 60
Size of training set: 54
Size of test set: 6
Size of grid search: 24
Best Parameters:  {'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.118
----------Test scores----------
R^2 Score: 0.334
MAE: 278.241
MAPE: 0.101
RMSE: 360.958








----------------------------------------------------------------------------------------------------
Medicine:3400892729589
Size of data set: 54
Size of training set: 48
Size of test set: 6
Size of grid search: 18
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 4}
Training Score (MAPE):  -5.297
----------Test scores----------
R^2 Score: 0.691
MAE: 599.506
MAPE: 0.197
RMSE: 665.839








----------------------------------------------------------------------------------------------------
Medicine:3400892745848
Size of data set: 34
Size of training set: 30
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.138
----------Test scores----------
R^2 Score: 0.169
MAE: 922.698
MAPE: 0.127
RMSE: 1052.248








----------------------------------------------------------------------------------------------------
Medicine:3400892697789
Size of data set: 63
Size of training set: 56
Size of test set: 7
Size of grid search: 24
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 4}
Training Score (MAPE):  -1.273
----------Test scores----------
R^2 Score: 0.163
MAE: 287.584
MAPE: 5.077
RMSE: 363.75








----------------------------------------------------------------------------------------------------
Medicine:3400892761527
Size of data set: 59
Size of training set: 53
Size of test set: 6
Size of grid search: 24
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.616
----------Test scores----------
R^2 Score: 0.619
MAE: 896.752
MAPE: 5.308
RMSE: 1214.338








----------------------------------------------------------------------------------------------------
Medicine:3400893022634
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -8.291
----------Test scores----------
R^2 Score: 0.886
MAE: 346.482
MAPE: 0.293
RMSE: 442.881








----------------------------------------------------------------------------------------------------
Medicine:3400892761695
Size of data set: 60
Size of training set: 54
Size of test set: 6
Size of grid search: 24
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.191
----------Test scores----------
R^2 Score: -1.482
MAE: 242.029
MAPE: 0.14
RMSE: 284.283








----------------------------------------------------------------------------------------------------
Medicine:3400892669236
Size of data set: 33
Size of training set: 29
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -62.017
----------Test scores----------
R^2 Score: 0.944
MAE: 809.48
MAPE: 0.236
RMSE: 889.767








----------------------------------------------------------------------------------------------------
Medicine:3400892508566
Size of data set: 60
Size of training set: 54
Size of test set: 6
Size of grid search: 24
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -3.157
----------Test scores----------
R^2 Score: 0.757
MAE: 640.668
MAPE: 0.562
RMSE: 677.099









Unnamed: 0,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE
0,CODE_UCD_3400892088310,0.594736,653.248406,511.584276,4.995967
1,CODE_UCD_3400892075761,0.796174,162.541843,128.910714,0.206923
2,CODE_UCD_3400892203645,0.849805,212.686801,190.775159,0.441506
3,CODE_UCD_3400892065366,0.238292,1139.577047,698.0625,0.217979
4,CODE_UCD_3400892052120,-0.311412,536.882289,410.854894,0.15596
5,CODE_UCD_3400891996128,0.905172,5870.620147,4379.166323,0.126162
6,CODE_UCD_3400893826706,0.741695,769.993375,605.947659,6.798433
7,CODE_UCD_3400893736135,0.892141,295.137233,208.449074,0.904096
8,CODE_UCD_3400893875490,0.938086,929.827864,766.093585,1.053473
9,CODE_UCD_3400890837149,0.4951,756.472927,634.404762,2.418064


In [39]:
df_prediction_scores_h2 = pd.DataFrame(columns=['HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

for medicine in medicines:

  df_prediction_scores_h2 = test_1_baseline(df_h2[features_h2], medicine, df_prediction_scores_h2)

df_prediction_scores_h2


----------------------------------------------------------------------------------------------------
Medicine:3400892088310
Size of data set: 34
Size of training set: 30
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.108
----------Test scores----------
R^2 Score: 0.117
MAE: 178.07
MAPE: 0.061
RMSE: 220.321








----------------------------------------------------------------------------------------------------
Medicine:3400892075761
Size of data set: 34
Size of training set: 30
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.117
----------Test scores----------
R^2 Score: 0.087
MAE: 212.365
MAPE: 0.094
RMSE: 246.222








----------------------------------------------------------------------------------------------------
Medicine:3400892203645
Size of data set: 33
Size of training set: 29
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.158
----------Test scores----------
R^2 Score: -2.995
MAE: 380.002
MAPE: 0.118
RMSE: 388.768








----------------------------------------------------------------------------------------------------
Medicine:3400892065366
Size of data set: 34
Size of training set: 30
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.128
----------Test scores----------
R^2 Score: 0.543
MAE: 1205.502
MAPE: 0.175
RMSE: 1432.802








----------------------------------------------------------------------------------------------------
Medicine:3400892052120
Size of data set: 34
Size of training set: 30
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.152
----------Test scores----------
R^2 Score: 0.356
MAE: 173.571
MAPE: 0.106
RMSE: 202.447








----------------------------------------------------------------------------------------------------
Medicine:3400891996128
Size of data set: 34
Size of training set: 30
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.116
----------Test scores----------
R^2 Score: 0.029
MAE: 4908.037
MAPE: 0.068
RMSE: 6645.224








----------------------------------------------------------------------------------------------------
Medicine:3400893826706
Size of data set: 34
Size of training set: 30
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.099
----------Test scores----------
R^2 Score: -1.317
MAE: 184.755
MAPE: 0.095
RMSE: 215.811








----------------------------------------------------------------------------------------------------
Medicine:3400893736135
Size of data set: 31
Size of training set: 27
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.122
----------Test scores----------
R^2 Score: -0.404
MAE: 828.938
MAPE: 0.224
RMSE: 855.809








----------------------------------------------------------------------------------------------------
Medicine:3400893875490
Size of data set: 34
Size of training set: 30
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.06
----------Test scores----------
R^2 Score: 0.077
MAE: 424.188
MAPE: 0.048
RMSE: 452.253








----------------------------------------------------------------------------------------------------
Medicine:3400890837149
Size of data set: 34
Size of training set: 30
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.109
----------Test scores----------
R^2 Score: -0.266
MAE: 275.762
MAPE: 0.101
RMSE: 376.929








----------------------------------------------------------------------------------------------------
Medicine:3400891235203
Size of data set: 34
Size of training set: 30
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.143
----------Test scores----------
R^2 Score: -1.54
MAE: 207.312
MAPE: 0.206
RMSE: 286.447








----------------------------------------------------------------------------------------------------
Medicine:3400891225037
Size of data set: 34
Size of training set: 30
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.152
----------Test scores----------
R^2 Score: 0.586
MAE: 1297.4
MAPE: 0.087
RMSE: 1447.789








----------------------------------------------------------------------------------------------------
Medicine:3400891191226
Size of data set: 33
Size of training set: 29
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.077
----------Test scores----------
R^2 Score: 0.624
MAE: 171.25
MAPE: 0.112
RMSE: 320.612








----------------------------------------------------------------------------------------------------
Medicine:3400892729589
Size of data set: 33
Size of training set: 29
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.158
----------Test scores----------
R^2 Score: 0.067
MAE: 275.693
MAPE: 0.06
RMSE: 401.008








----------------------------------------------------------------------------------------------------
Medicine:3400892745848
Size of data set: 34
Size of training set: 30
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.153
----------Test scores----------
R^2 Score: 0.166
MAE: 651.792
MAPE: 0.223
RMSE: 785.41








----------------------------------------------------------------------------------------------------
Medicine:3400892697789
Size of data set: 34
Size of training set: 30
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.249
----------Test scores----------
R^2 Score: 0.043
MAE: 127.33
MAPE: 0.155
RMSE: 149.707








----------------------------------------------------------------------------------------------------
Medicine:3400892761527
Size of data set: 34
Size of training set: 30
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.068
----------Test scores----------
R^2 Score: -3.842
MAE: 1161.963
MAPE: 0.109
RMSE: 1382.224








----------------------------------------------------------------------------------------------------
Medicine:3400893022634
Size of data set: 34
Size of training set: 30
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.122
----------Test scores----------
R^2 Score: -6.216
MAE: 750.729
MAPE: 0.139
RMSE: 807.258








----------------------------------------------------------------------------------------------------
Medicine:3400892761695
Size of data set: 34
Size of training set: 30
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.064
----------Test scores----------
R^2 Score: -0.492
MAE: 1003.867
MAPE: 0.075
RMSE: 1335.705








----------------------------------------------------------------------------------------------------
Medicine:3400892669236
Size of data set: 34
Size of training set: 30
Size of test set: 4
Size of grid search: 6
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.075
----------Test scores----------
R^2 Score: 0.664
MAE: 321.042
MAPE: 0.039
RMSE: 367.824








----------------------------------------------------------------------------------------------------
Medicine:3400892508566
Size of data set: 23
Size of training set: 20
Size of test set: 3
Size of grid search: 6
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.438
----------Test scores----------
R^2 Score: -1.936
MAE: 12.87
MAPE: 0.271
RMSE: 13.991









Unnamed: 0,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE
0,CODE_UCD_3400892088310,0.117229,220.321067,178.070098,0.060649
1,CODE_UCD_3400892075761,0.086693,246.221519,212.365196,0.09412
2,CODE_UCD_3400892203645,-2.995292,388.768104,380.002273,0.118213
3,CODE_UCD_3400892065366,0.543373,1432.801968,1205.502273,0.175105
4,CODE_UCD_3400892052120,0.356091,202.447122,173.571429,0.10589
5,CODE_UCD_3400891996128,0.028788,6645.223808,4908.0375,0.067699
6,CODE_UCD_3400893826706,-1.317099,215.810738,184.755208,0.095074
7,CODE_UCD_3400893736135,-0.40394,855.808983,828.9375,0.223669
8,CODE_UCD_3400893875490,0.077161,452.252879,424.1875,0.048187
9,CODE_UCD_3400890837149,-0.26606,376.929292,275.762054,0.100627


In [40]:
df_prediction_scores_h3 = pd.DataFrame(columns=['HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

for medicine in medicines:

  df_prediction_scores_h3 = test_1_baseline(df_h3[features_h3], medicine, df_prediction_scores_h3)

df_prediction_scores_h3


----------------------------------------------------------------------------------------------------
Medicine:3400892088310
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.069
----------Test scores----------
R^2 Score: -2.088
MAE: 587.517
MAPE: 0.138
RMSE: 695.965








----------------------------------------------------------------------------------------------------
Medicine:3400892075761
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 5}
Training Score (MAPE):  -0.11
----------Test scores----------
R^2 Score: 0.461
MAE: 245.129
MAPE: 0.092
RMSE: 271.725








----------------------------------------------------------------------------------------------------
Medicine:3400892203645
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.114
----------Test scores----------
R^2 Score: -0.767
MAE: 455.704
MAPE: 0.094
RMSE: 522.42








----------------------------------------------------------------------------------------------------
Medicine:3400892065366
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 4}
Training Score (MAPE):  -0.068
----------Test scores----------
R^2 Score: -0.097
MAE: 549.203
MAPE: 0.079
RMSE: 659.246








----------------------------------------------------------------------------------------------------
Medicine:3400892052120
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.135
----------Test scores----------
R^2 Score: 0.773
MAE: 171.732
MAPE: 0.178
RMSE: 224.86








----------------------------------------------------------------------------------------------------
Medicine:3400891996128
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -0.058
----------Test scores----------
R^2 Score: 0.011
MAE: 4620.746
MAPE: 0.046
RMSE: 5350.936








----------------------------------------------------------------------------------------------------
Medicine:3400893826706
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 4}
Training Score (MAPE):  -0.113
----------Test scores----------
R^2 Score: 0.579
MAE: 637.347
MAPE: 0.108
RMSE: 874.015








----------------------------------------------------------------------------------------------------
Medicine:3400893736135
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.079
----------Test scores----------
R^2 Score: -13.668
MAE: 798.583
MAPE: 0.172
RMSE: 917.115








----------------------------------------------------------------------------------------------------
Medicine:3400893875490
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -0.064
----------Test scores----------
R^2 Score: 0.739
MAE: 314.489
MAPE: 0.024
RMSE: 353.112








----------------------------------------------------------------------------------------------------
Medicine:3400890837149
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.113
----------Test scores----------
R^2 Score: 0.32
MAE: 237.145
MAPE: 0.065
RMSE: 301.451








----------------------------------------------------------------------------------------------------
Medicine:3400891235203
Size of data set: 59
Size of training set: 53
Size of test set: 6
Size of grid search: 24
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 4}
Training Score (MAPE):  -1.71
----------Test scores----------
R^2 Score: 0.261
MAE: 1035.961
MAPE: 0.428
RMSE: 1394.443








----------------------------------------------------------------------------------------------------
Medicine:3400891225037
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.132
----------Test scores----------
R^2 Score: 0.928
MAE: 442.294
MAPE: 0.047
RMSE: 517.461








----------------------------------------------------------------------------------------------------
Medicine:3400891191226
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -0.115
----------Test scores----------
R^2 Score: 0.268
MAE: 581.368
MAPE: 0.095
RMSE: 749.855








----------------------------------------------------------------------------------------------------
Medicine:3400892729589
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.083
----------Test scores----------
R^2 Score: 0.639
MAE: 188.697
MAPE: 0.029
RMSE: 224.414








----------------------------------------------------------------------------------------------------
Medicine:3400892745848
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.341
----------Test scores----------
R^2 Score: 0.433
MAE: 109.857
MAPE: 0.284
RMSE: 117.869








----------------------------------------------------------------------------------------------------
Medicine:3400892697789
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 4}
Training Score (MAPE):  -0.188
----------Test scores----------
R^2 Score: 0.025
MAE: 321.477
MAPE: 0.212
RMSE: 373.974








----------------------------------------------------------------------------------------------------
Medicine:3400892761527
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 5}
Training Score (MAPE):  -0.062
----------Test scores----------
R^2 Score: 0.134
MAE: 1874.5
MAPE: 0.063
RMSE: 2067.97








----------------------------------------------------------------------------------------------------
Medicine:3400893022634
Size of data set: 46
Size of training set: 41
Size of test set: 5
Size of grid search: 18
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.073
----------Test scores----------
R^2 Score: -0.16
MAE: 282.245
MAPE: 0.084
RMSE: 375.929








----------------------------------------------------------------------------------------------------
Medicine:3400892761695
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 4}
Training Score (MAPE):  -0.053
----------Test scores----------
R^2 Score: -0.349
MAE: 1101.301
MAPE: 0.043
RMSE: 1134.201








----------------------------------------------------------------------------------------------------
Medicine:3400892669236
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -0.064
----------Test scores----------
R^2 Score: 0.207
MAE: 369.653
MAPE: 0.033
RMSE: 572.771








----------------------------------------------------------------------------------------------------
Medicine:3400892508566
Size of data set: 68
Size of training set: 61
Size of test set: 7
Size of grid search: 30
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.123
----------Test scores----------
R^2 Score: -0.063
MAE: 607.673
MAPE: 0.126
RMSE: 634.305









Unnamed: 0,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE
0,CODE_UCD_3400892088310,-2.087801,695.964991,587.516931,0.138236
1,CODE_UCD_3400892075761,0.461337,271.724513,245.128925,0.091882
2,CODE_UCD_3400892203645,-0.766697,522.419656,455.704314,0.094275
3,CODE_UCD_3400892065366,-0.096935,659.246384,549.202664,0.079148
4,CODE_UCD_3400892052120,0.773463,224.860196,171.731695,0.178457
5,CODE_UCD_3400891996128,0.010579,5350.93639,4620.745762,0.046371
6,CODE_UCD_3400893826706,0.579055,874.014666,637.346817,0.108364
7,CODE_UCD_3400893736135,-13.66816,917.11464,798.582766,0.172098
8,CODE_UCD_3400893875490,0.739438,353.111702,314.488884,0.023721
9,CODE_UCD_3400890837149,0.320315,301.450744,237.144901,0.06504


In [41]:
df_prediction_scores_h4 = pd.DataFrame(columns=['HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

for medicine in medicines:

  df_prediction_scores_h4 = test_1_baseline(df_h4[features_h4], medicine, df_prediction_scores_h4)

df_prediction_scores_h4


----------------------------------------------------------------------------------------------------
Medicine:3400892088310
Size of data set: 22
Size of training set: 19
Size of test set: 3
Size of grid search: 6
Best Parameters:  {'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.291
----------Test scores----------
R^2 Score: -0.125
MAE: 2358.0
MAPE: 0.797
RMSE: 3000.582








----------------------------------------------------------------------------------------------------
Medicine:3400892075761
Size of data set: 39
Size of training set: 35
Size of test set: 4
Size of grid search: 12
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.102
----------Test scores----------
R^2 Score: 0.158
MAE: 800.332
MAPE: 0.078
RMSE: 1011.842








----------------------------------------------------------------------------------------------------
Medicine:3400892203645
Size of data set: 39
Size of training set: 35
Size of test set: 4
Size of grid search: 12
Best Parameters:  {'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.161
----------Test scores----------
R^2 Score: -0.015
MAE: 1078.214
MAPE: 0.102
RMSE: 1241.067








----------------------------------------------------------------------------------------------------
Medicine:3400892065366
Size of data set: 39
Size of training set: 35
Size of test set: 4
Size of grid search: 12
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.097
----------Test scores----------
R^2 Score: -0.049
MAE: 3187.5
MAPE: 0.156
RMSE: 3685.813








----------------------------------------------------------------------------------------------------
Medicine:3400892052120
Size of data set: 39
Size of training set: 35
Size of test set: 4
Size of grid search: 12
Best Parameters:  {'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.148
----------Test scores----------
R^2 Score: -0.47
MAE: 1030.401
MAPE: 0.121
RMSE: 1299.366








----------------------------------------------------------------------------------------------------
Medicine:3400891996128
Size of data set: 39
Size of training set: 35
Size of test set: 4
Size of grid search: 12
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.108
----------Test scores----------
R^2 Score: -2.046
MAE: 18273.542
MAPE: 0.093
RMSE: 22749.597








----------------------------------------------------------------------------------------------------
Medicine:3400893826706
Size of data set: 39
Size of training set: 35
Size of test set: 4
Size of grid search: 12
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.203
----------Test scores----------
R^2 Score: 0.017
MAE: 805.0
MAPE: 0.144
RMSE: 1034.522








----------------------------------------------------------------------------------------------------
Medicine:3400893736135
Size of data set: 39
Size of training set: 35
Size of test set: 4
Size of grid search: 12
Best Parameters:  {'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.169
----------Test scores----------
R^2 Score: -3.837
MAE: 1324.219
MAPE: 0.18
RMSE: 1503.218








----------------------------------------------------------------------------------------------------
Medicine:3400893875490
Size of data set: 39
Size of training set: 35
Size of test set: 4
Size of grid search: 12
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.132
----------Test scores----------
R^2 Score: 0.813
MAE: 1051.633
MAPE: 0.046
RMSE: 1075.732








----------------------------------------------------------------------------------------------------
Medicine:3400890837149
Size of data set: 39
Size of training set: 35
Size of test set: 4
Size of grid search: 12
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.144
----------Test scores----------
R^2 Score: -0.067
MAE: 2745.455
MAPE: 0.186
RMSE: 3387.658








----------------------------------------------------------------------------------------------------
Medicine:3400891235203
Size of data set: 39
Size of training set: 35
Size of test set: 4
Size of grid search: 12
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.193
----------Test scores----------
R^2 Score: 0.018
MAE: 1685.59
MAPE: 0.369
RMSE: 1881.866








----------------------------------------------------------------------------------------------------
Medicine:3400891225037
Size of data set: 39
Size of training set: 35
Size of test set: 4
Size of grid search: 12
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.095
----------Test scores----------
R^2 Score: -12.77
MAE: 4701.833
MAPE: 0.212
RMSE: 4963.377








----------------------------------------------------------------------------------------------------
Medicine:3400891191226
Size of data set: 39
Size of training set: 35
Size of test set: 4
Size of grid search: 12
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.109
----------Test scores----------
R^2 Score: -0.259
MAE: 743.879
MAPE: 0.089
RMSE: 872.68








----------------------------------------------------------------------------------------------------
Medicine:3400892729589
Size of data set: 39
Size of training set: 35
Size of test set: 4
Size of grid search: 12
Best Parameters:  {'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.15
----------Test scores----------
R^2 Score: -2.016
MAE: 3822.036
MAPE: 0.311
RMSE: 4436.398








----------------------------------------------------------------------------------------------------
Medicine:3400892745848
Size of data set: 39
Size of training set: 35
Size of test set: 4
Size of grid search: 12
Best Parameters:  {'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.194
----------Test scores----------
R^2 Score: 0.85
MAE: 1011.806
MAPE: 0.081
RMSE: 1053.073








----------------------------------------------------------------------------------------------------
Medicine:3400892697789
Size of data set: 39
Size of training set: 35
Size of test set: 4
Size of grid search: 12
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.147
----------Test scores----------
R^2 Score: -5.603
MAE: 1389.95
MAPE: 0.114
RMSE: 1401.796








----------------------------------------------------------------------------------------------------
Medicine:3400892761527
Size of data set: 39
Size of training set: 35
Size of test set: 4
Size of grid search: 12
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.35
----------Test scores----------
R^2 Score: -0.985
MAE: 1930.833
MAPE: 0.317
RMSE: 2148.482








----------------------------------------------------------------------------------------------------
Medicine:3400893022634
Dataset too small
Size of data set: 8
Size of training set: 6
Size of test set: 2
Size of grid search: 6
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.38
----------Test scores----------
R^2 Score: -270.548
MAE: 988.125
MAPE: 0.331
RMSE: 988.723








----------------------------------------------------------------------------------------------------
Medicine:3400892761695
Size of data set: 39
Size of training set: 35
Size of test set: 4
Size of grid search: 12
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.086
----------Test scores----------
R^2 Score: -2.026
MAE: 2184.324
MAPE: 0.22
RMSE: 2371.163








----------------------------------------------------------------------------------------------------
Medicine:3400892669236
Size of data set: 38
Size of training set: 34
Size of test set: 4
Size of grid search: 12
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.188
----------Test scores----------
R^2 Score: -0.457
MAE: 4153.376
MAPE: 0.155
RMSE: 4936.617








----------------------------------------------------------------------------------------------------
Medicine:3400892508566
Size of data set: 39
Size of training set: 35
Size of test set: 4
Size of grid search: 12
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.164
----------Test scores----------
R^2 Score: -0.851
MAE: 1457.532
MAPE: 0.19
RMSE: 1720.274









Unnamed: 0,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE
0,CODE_UCD_3400892088310,-0.125124,3000.581944,2358.0,0.796576
1,CODE_UCD_3400892075761,0.158087,1011.842015,800.332341,0.077826
2,CODE_UCD_3400892203645,-0.014677,1241.067472,1078.214286,0.10207
3,CODE_UCD_3400892065366,-0.049212,3685.812597,3187.5,0.155625
4,CODE_UCD_3400892052120,-0.469889,1299.365501,1030.400794,0.120961
5,CODE_UCD_3400891996128,-2.046136,22749.59652,18273.541667,0.093047
6,CODE_UCD_3400893826706,0.017427,1034.522323,805.0,0.14366
7,CODE_UCD_3400893736135,-3.837125,1503.217597,1324.219426,0.17959
8,CODE_UCD_3400893875490,0.813051,1075.732162,1051.633126,0.046038
9,CODE_UCD_3400890837149,-0.067308,3387.658486,2745.454545,0.185565


## 5.2 - Building forecasting models based on clusters

In [42]:
def train_test_split_modified(df, random_state = 42, test_size = 0.1, Shuffle = False):
  df_iter = df.copy()

  np.random.seed(random_state)

  X_train = []
  y_train = []

  X_test = []
  y_test = []

  columns = df.columns

  df_train = pd.DataFrame(columns=columns)
  df_test = pd.DataFrame(columns=columns)
  i = 0
  for medicine in df_iter.HOSPI_CODE_UCD.unique():
    df_temp = df_iter[df_iter['HOSPI_CODE_UCD'] == medicine]
    i += len(df_temp)
    n_samples = len(df_temp)
    test_samples = max(m.ceil(n_samples * test_size), 2)

    if n_samples == 1:
      print('There is only one sample')
      test_samples = 1


    for _ in range(test_samples):
      random_index = np.random.choice(df_temp.index)
      random_row = df_temp.loc[random_index].to_frame().T

      # Concatenate the new row to the original DataFrame
      df_test = pd.concat([df_test, random_row], ignore_index=False)

      # Remove the selected index from df_temp
      df_temp = df_temp.drop(random_index)

    df_iter = df_iter.drop(df_iter[df_iter['HOSPI_CODE_UCD'] == medicine].index)

  # Get the indices of rows to exclude from df
  exclude_indices = df_test.index

  # Create df_train by dropping the rows specified by the exclude_indices from df
  df_train = df.drop(exclude_indices)

  if shuffle:
    X_train, y_train = shuffle(df_train.drop(['QUANTITY', 'CLUSTER'], axis=1), df_train.QUANTITY.values, random_state = random_state)
    X_test, y_test = shuffle(df_test.drop(['QUANTITY', 'CLUSTER'], axis=1).values, df_test.QUANTITY.values, random_state = random_state)

  return X_train, X_test, y_train, y_test

In [43]:
def test_2_clustering(df, df_scores, medicines):
  for cluster in df.CLUSTER.unique():
    print()
    print(100*'-')
    print(f'Cluster: {cluster}')

    # Perform the train-test split with shuffled samples
    X_train, X_test, y_train, y_test = train_test_split_modified(df[df['CLUSTER'] == cluster])
    print(f'Size of data set: {len(X_train) + len(X_test)}')
    print(f'Size of training set: {len(X_train)}')
    print(f'Size of test set: {len(X_test)}')

    df_test = pd.DataFrame(X_test, columns = df.drop(['QUANTITY', 'CLUSTER'], axis=1).copy().columns)
    df_test['QUANTITY'] = y_test

    # Define the parameter distributions for RandomizedSearchCV
    param_grid = {
        'max_depth': np.arange(2, 8, 1),
        'n_estimators': np.arange(2, max(int(m.ceil(len(X_train)*0.1)),3), 1),
        'max_features': ['sqrt']
    }

    depth_len = param_grid['max_depth'].size
    estimators_len = param_grid['n_estimators'].size

    print(f'Size of grid search: {depth_len * estimators_len}')

    # Create the RandomizedSearchCV object
    grid_search = GridSearchCV(estimator=RandomForestRegressor(random_state=42),
                              param_grid=param_grid,
                              scoring = 'neg_mean_absolute_percentage_error',
                              cv = 5,
                              n_jobs = -1)

    ''' >3 : the fold and candidate parameter indexes
        are also displayed together with the starting time of the computation.
    '''

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)

    # Fit the RandomizedSearchCV object to the data
    grid_search.fit(X_train_scaled, y_train)
    print("Finished training")

    # Get the best estimator
    best_estimator = grid_search.best_estimator_

    for medicine in df_test.HOSPI_CODE_UCD.unique():
      print()
      print(100*'-')
      print('Medicine:' + str(medicine))

      X_test_medicine = df_test[df_test['HOSPI_CODE_UCD'] == medicine].drop(['QUANTITY'], axis=1).copy().values

      scaler = StandardScaler()
      X_test_scaled = scaler.fit_transform(X_test_medicine)

      y_test_medicine = df_test[df_test['HOSPI_CODE_UCD'] == medicine]['QUANTITY'].copy().values

      # Make predictions using the best estimator
      y_pred = best_estimator.predict(X_test_scaled)

      # Calculate R^2 score
      r2 = r2_score(y_test_medicine, y_pred)

      # Calculate MAE
      mae = mean_absolute_error(y_test_medicine, y_pred)

      # Calculate MAPE
      mape = mean_absolute_percentage_error(y_test_medicine, y_pred)

      # Calculate RMSE
      rmse = np.sqrt(mean_squared_error(y_test_medicine, y_pred))

      # Print the best parameters, best score, and evaluation metrics

      # Print the best parameters, best score, and evaluation metrics
      print('Best Parameters: ', grid_search.best_params_)
      print('Training Score (MAPE): ', round(grid_search.best_score_, 3))
      print(10*'-' + 'Test scores' + 10*'-')
      print('R^2 Score:', round(r2, 3))
      print('MAE:', round(mae, 3))
      print('MAPE:', round(mape, 3))
      print('RMSE:', round(rmse, 3))
      print()


      # Create the new row as a DataFrame
      new_row = pd.DataFrame({'CLUSTER': [cluster],
                              'HOSPI_CODE_UCD': ['CODE_UCD_'+str(int(medicine))],
                              'R2': [r2],
                              'RMSE': [rmse],
                              'MAE': [mae],
                              'MAPE': [mape]})

      # Append the new row to the DataFrame
      df_scores = pd.concat([df_scores, new_row], ignore_index=True)

      plot_pred(y_pred, y_test_medicine, medicine)
      print()

      plot_mape(y_pred, y_test_medicine, medicine)
      print()

  # Return the updated DataFrame
  return df_scores

In [44]:
df_prediction_scores_h1_clustered = pd.DataFrame(columns=['CLUSTER', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h1_clustered = test_2_clustering(df_h1[features_h1], df_prediction_scores_h1_clustered, medicines)

df_prediction_scores_h1_clustered


----------------------------------------------------------------------------------------------------
Cluster: 0
Size of data set: 1157
Size of training set: 1035
Size of test set: 122
Size of grid search: 612
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400892761695
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: -256.17
MAE: 2227.402
MAPE: 1.411
RMSE: 4270.208








----------------------------------------------------------------------------------------------------
Medicine:3400892761527
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: -5.61
MAE: 2844.259
MAPE: 5.494
RMSE: 4048.73








----------------------------------------------------------------------------------------------------
Medicine:3400892508566
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: -6.988
MAE: 1425.704
MAPE: 0.412
RMSE: 1539.386








----------------------------------------------------------------------------------------------------
Medicine:3400891191226
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: -12.101
MAE: 1394.324
MAPE: 0.542
RMSE: 1791.762








----------------------------------------------------------------------------------------------------
Medicine:3400892669236
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: 0.416
MAE: 2201.534
MAPE: 0.43
RMSE: 2480.71








----------------------------------------------------------------------------------------------------
Medicine:3400892203645
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: -10.492
MAE: 1376.887
MAPE: 9.598
RMSE: 1592.07








----------------------------------------------------------------------------------------------------
Medicine:3400892052120
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: -51.563
MAE: 867.454
MAPE: 0.341
RMSE: 1059.06








----------------------------------------------------------------------------------------------------
Medicine:3400892075761
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: -34.084
MAE: 1905.297
MAPE: 10.652
RMSE: 2525.149








----------------------------------------------------------------------------------------------------
Medicine:3400892697789
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: -151.556
MAE: 2955.685
MAPE: 2.963
RMSE: 4185.796








----------------------------------------------------------------------------------------------------
Medicine:3400892745848
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: -10.905
MAE: 5430.904
MAPE: 0.731
RMSE: 5529.5








----------------------------------------------------------------------------------------------------
Medicine:3400891225037
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: 0.107
MAE: 3506.122
MAPE: 0.585
RMSE: 4205.823








----------------------------------------------------------------------------------------------------
Medicine:3400890837149
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: -3.971
MAE: 1629.995
MAPE: 6.1
RMSE: 2421.586








----------------------------------------------------------------------------------------------------
Medicine:3400893736135
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: -22.383
MAE: 1463.057
MAPE: 0.773
RMSE: 2077.044








----------------------------------------------------------------------------------------------------
Medicine:3400892729589
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: -0.814
MAE: 1755.821
MAPE: 0.476
RMSE: 1833.116








----------------------------------------------------------------------------------------------------
Medicine:3400892065366
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: -1.21
MAE: 1305.123
MAPE: 0.322
RMSE: 1789.149








----------------------------------------------------------------------------------------------------
Medicine:3400893022634
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: 0.411
MAE: 755.276
MAPE: 1.408
RMSE: 987.003








----------------------------------------------------------------------------------------------------
Medicine:3400891235203
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: -26.155
MAE: 2363.302
MAPE: 1.541
RMSE: 3572.018








----------------------------------------------------------------------------------------------------
Medicine:3400892088310
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: -3.297
MAE: 1302.761
MAPE: 32.847
RMSE: 1749.92








----------------------------------------------------------------------------------------------------
Medicine:3400893826706
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: -1.671
MAE: 1304.081
MAPE: 0.466
RMSE: 1575.261








----------------------------------------------------------------------------------------------------
Medicine:3400893875490
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: -0.875
MAE: 3646.217
MAPE: 0.713
RMSE: 4087.939








----------------------------------------------------------------------------------------------------
Medicine:3400891996128
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -9.619
----------Test scores----------
R^2 Score: -1.371
MAE: 1446.173
MAPE: 44.728
RMSE: 1901.747








----------------------------------------------------------------------------------------------------
Cluster: 1
Size of data set: 51
Size of training set: 45
Size of test set: 6
Size of grid search: 18
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400891996128
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 4}
Training Score (MAPE):  -0.131
----------Test scores----------
R^2 Score: 0.739
MAE: 3785.002
MAPE: 0.087
RMSE: 5444.906









Unnamed: 0,CLUSTER,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE
0,0,CODE_UCD_3400892761695,-256.170045,4270.20816,2227.401715,1.41136
1,0,CODE_UCD_3400892761527,-5.609822,4048.730221,2844.259301,5.494029
2,0,CODE_UCD_3400892508566,-6.987752,1539.386135,1425.704361,0.411704
3,0,CODE_UCD_3400891191226,-12.100754,1791.762448,1394.323669,0.541811
4,0,CODE_UCD_3400892669236,0.416072,2480.709585,2201.534241,0.430017
5,0,CODE_UCD_3400892203645,-10.492386,1592.069599,1376.886562,9.59755
6,0,CODE_UCD_3400892052120,-51.563108,1059.060491,867.454187,0.341012
7,0,CODE_UCD_3400892075761,-34.084019,2525.148963,1905.296696,10.652153
8,0,CODE_UCD_3400892697789,-151.555908,4185.796268,2955.684767,2.962651
9,0,CODE_UCD_3400892745848,-10.904587,5529.499951,5430.903518,0.731254


In [45]:
df_prediction_scores_h2_clustered = pd.DataFrame(columns=['CLUSTER', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h2_clustered = test_2_clustering(df_h2[features_h2], df_prediction_scores_h2_clustered, medicines)

df_prediction_scores_h2_clustered


----------------------------------------------------------------------------------------------------
Cluster: 1
There is only one sample
Size of data set: 479
Size of training set: 421
Size of test set: 58
Size of grid search: 246
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400890837149
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.839
----------Test scores----------
R^2 Score: -11.828
MAE: 644.511
MAPE: 0.235
RMSE: 841.58








----------------------------------------------------------------------------------------------------
Medicine:3400893826706
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.839
----------Test scores----------
R^2 Score: -5.835
MAE: 705.626
MAPE: 0.336
RMSE: 751.76








----------------------------------------------------------------------------------------------------
Medicine:3400891235203
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.839
----------Test scores----------
R^2 Score: -110.517
MAE: 1582.041
MAPE: 1.425
RMSE: 1661.337








----------------------------------------------------------------------------------------------------
Medicine:3400892065366
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.839
----------Test scores----------
R^2 Score: -52.215
MAE: 2373.707
MAPE: 0.439
RMSE: 2469.308








----------------------------------------------------------------------------------------------------
Medicine:3400892745848
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.839
----------Test scores----------
R^2 Score: 0.567
MAE: 376.802
MAPE: 0.131
RMSE: 404.541








----------------------------------------------------------------------------------------------------
Medicine:3400893736135
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.839
----------Test scores----------
R^2 Score: -76.8
MAE: 1081.1
MAPE: 0.257
RMSE: 1095.538








----------------------------------------------------------------------------------------------------
Medicine:3400892088310
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.839
----------Test scores----------
R^2 Score: 0.67
MAE: 212.352
MAPE: 0.081
RMSE: 215.649








----------------------------------------------------------------------------------------------------
Medicine:3400892729589
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.839
----------Test scores----------
R^2 Score: -0.769
MAE: 930.17
MAPE: 0.174
RMSE: 1280.85








----------------------------------------------------------------------------------------------------
Medicine:3400892203645
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.839
----------Test scores----------
R^2 Score: -2.782
MAE: 530.169
MAPE: 0.191
RMSE: 638.407








----------------------------------------------------------------------------------------------------
Medicine:3400892052120
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.839
----------Test scores----------
R^2 Score: -16.11
MAE: 1351.758
MAPE: 0.763
RMSE: 1407.131








----------------------------------------------------------------------------------------------------
Medicine:3400893022634
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.839
----------Test scores----------
R^2 Score: -19.02
MAE: 2599.963
MAPE: 0.487
RMSE: 2688.652








----------------------------------------------------------------------------------------------------
Medicine:3400892697789
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.839
----------Test scores----------
R^2 Score: -46.467
MAE: 1490.648
MAPE: 1.549
RMSE: 1581.146








----------------------------------------------------------------------------------------------------
Medicine:3400892075761
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.839
----------Test scores----------
R^2 Score: -6.672
MAE: 845.241
MAPE: 0.398
RMSE: 992.793








----------------------------------------------------------------------------------------------------
Medicine:3400892508566
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.839
----------Test scores----------
R^2 Score: -93129.535
MAE: 2876.938
MAPE: 52.027
RMSE: 2877.198








----------------------------------------------------------------------------------------------------
Medicine:3400891191226
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.839
----------Test scores----------
R^2 Score: -0.54
MAE: 694.749
MAPE: 0.377
RMSE: 870.903








----------------------------------------------------------------------------------------------------
Medicine:3400892669236
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.839
----------Test scores----------
R^2 Score: nan
MAE: 3234.506
MAPE: 0.479
RMSE: 3234.506




R^2 score is not well-defined with less than two samples.








----------------------------------------------------------------------------------------------------
Cluster: 0
There is only one sample
Size of data set: 218
Size of training set: 191
Size of test set: 27
Size of grid search: 108
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400893875490
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.296
----------Test scores----------
R^2 Score: -28.841
MAE: 2186.676
MAPE: 0.253
RMSE: 2393.055








----------------------------------------------------------------------------------------------------
Medicine:3400892761695
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.296
----------Test scores----------
R^2 Score: -51.284
MAE: 5252.703
MAPE: 0.335
RMSE: 5567.647








----------------------------------------------------------------------------------------------------
Medicine:3400892669236
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.296
----------Test scores----------
R^2 Score: -118.497
MAE: 3242.033
MAPE: 0.404
RMSE: 3622.982








----------------------------------------------------------------------------------------------------
Medicine:3400891996128
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.296
----------Test scores----------
R^2 Score: -36.079
MAE: 59369.944
MAPE: 0.785
RMSE: 61259.25








----------------------------------------------------------------------------------------------------
Medicine:3400892761527
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.296
----------Test scores----------
R^2 Score: -58.439
MAE: 4437.039
MAPE: 0.4
RMSE: 7593.759








----------------------------------------------------------------------------------------------------
Medicine:3400892065366
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.296
----------Test scores----------
R^2 Score: -97.147
MAE: 1615.377
MAPE: 0.184
RMSE: 1718.853








----------------------------------------------------------------------------------------------------
Medicine:3400891225037
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.296
----------Test scores----------
R^2 Score: -32.277
MAE: 11450.051
MAPE: 0.557
RMSE: 13624.178








----------------------------------------------------------------------------------------------------
Medicine:3400893022634
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.296
----------Test scores----------
R^2 Score: nan
MAE: 4155.736
MAPE: 0.582
RMSE: 4155.736




R^2 score is not well-defined with less than two samples.









Unnamed: 0,CLUSTER,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE
0,1,CODE_UCD_3400890837149,-11.827789,841.579879,644.511117,0.234965
1,1,CODE_UCD_3400893826706,-5.834532,751.760293,705.626179,0.335678
2,1,CODE_UCD_3400891235203,-110.51677,1661.336829,1582.040717,1.424721
3,1,CODE_UCD_3400892065366,-52.214883,2469.307797,2373.706597,0.439297
4,1,CODE_UCD_3400892745848,0.566847,404.540834,376.802016,0.131003
5,1,CODE_UCD_3400893736135,-76.800458,1095.537931,1081.100453,0.257141
6,1,CODE_UCD_3400892088310,0.670443,215.64923,212.352315,0.080579
7,1,CODE_UCD_3400892729589,-0.76885,1280.849959,930.169641,0.173627
8,1,CODE_UCD_3400892203645,-2.782277,638.407403,530.169081,0.191042
9,1,CODE_UCD_3400892052120,-16.109688,1407.131349,1351.758311,0.762614


In [46]:
df_prediction_scores_h3_clustered = pd.DataFrame(columns=['CLUSTER', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h3_clustered = test_2_clustering(df_h3[features_h3], df_prediction_scores_h3_clustered, medicines)

df_prediction_scores_h3_clustered


----------------------------------------------------------------------------------------------------
Cluster: 1
Size of data set: 670
Size of training set: 587
Size of test set: 83
Size of grid search: 342
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400891996128
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: -444.013
MAE: 96424.758
MAPE: 0.941
RMSE: 96542.614








----------------------------------------------------------------------------------------------------
Medicine:3400890837149
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: -32.848
MAE: 2560.855
MAPE: 0.711
RMSE: 2609.445








----------------------------------------------------------------------------------------------------
Medicine:3400892761695
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: -282.713
MAE: 18770.871
MAPE: 0.734
RMSE: 18885.465








----------------------------------------------------------------------------------------------------
Medicine:3400891235203
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: -15.324
MAE: 1760.165
MAPE: 0.366
RMSE: 1804.694








----------------------------------------------------------------------------------------------------
Medicine:3400891225037
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: -9.973
MAE: 3006.71
MAPE: 0.378
RMSE: 3052.914








----------------------------------------------------------------------------------------------------
Medicine:3400892669236
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: -11.217
MAE: 5059.521
MAPE: 0.471
RMSE: 5182.826








----------------------------------------------------------------------------------------------------
Medicine:3400893826706
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: 0.762
MAE: 448.75
MAPE: 0.078
RMSE: 561.889








----------------------------------------------------------------------------------------------------
Medicine:3400893736135
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: -7.729
MAE: 947.412
MAPE: 0.16
RMSE: 1028.237








----------------------------------------------------------------------------------------------------
Medicine:3400892697789
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: -82.056
MAE: 3857.375
MAPE: 1.79
RMSE: 3886.617








----------------------------------------------------------------------------------------------------
Medicine:3400891191226
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: -12.693
MAE: 1679.517
MAPE: 0.361
RMSE: 1890.622








----------------------------------------------------------------------------------------------------
Medicine:3400892052120
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: -274.106
MAE: 4992.196
MAPE: 3.249
RMSE: 5068.163








----------------------------------------------------------------------------------------------------
Medicine:3400892065366
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: -16.268
MAE: 1537.445
MAPE: 0.218
RMSE: 1879.398








----------------------------------------------------------------------------------------------------
Medicine:3400893875490
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: -67.175
MAE: 7042.115
MAPE: 0.549
RMSE: 7053.12








----------------------------------------------------------------------------------------------------
Medicine:3400892088310
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: -169.359
MAE: 1134.925
MAPE: 0.222
RMSE: 1377.915








----------------------------------------------------------------------------------------------------
Medicine:3400892745848
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: -1599.775
MAE: 6678.992
MAPE: 19.048
RMSE: 6694.901








----------------------------------------------------------------------------------------------------
Medicine:3400892508566
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: -181.122
MAE: 4888.122
MAPE: 1.161
RMSE: 6035.645








----------------------------------------------------------------------------------------------------
Medicine:3400892729589
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: -5.313
MAE: 1081.02
MAPE: 0.17
RMSE: 1228.215








----------------------------------------------------------------------------------------------------
Medicine:3400893022634
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: -498.29
MAE: 3206.273
MAPE: 0.897
RMSE: 3256.753








----------------------------------------------------------------------------------------------------
Medicine:3400892761527
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: -113.446
MAE: 23787.76
MAPE: 0.797
RMSE: 23987.939








----------------------------------------------------------------------------------------------------
Medicine:3400892075761
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: -3379.066
MAE: 4321.714
MAPE: 1.903
RMSE: 4357.952








----------------------------------------------------------------------------------------------------
Medicine:3400892203645
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.024
----------Test scores----------
R^2 Score: -3.665
MAE: 1801.906
MAPE: 0.393
RMSE: 1927.719








----------------------------------------------------------------------------------------------------
Cluster: 0
Size of data set: 727
Size of training set: 646
Size of test set: 81
Size of grid search: 378
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400893875490
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -131.837
MAE: 7028.65
MAPE: 0.498
RMSE: 8109.568








----------------------------------------------------------------------------------------------------
Medicine:3400892052120
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -522.171
MAE: 4995.032
MAPE: 3.581
RMSE: 5725.949








----------------------------------------------------------------------------------------------------
Medicine:3400892761527
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -139.487
MAE: 23575.414
MAPE: 0.77
RMSE: 23732.018








----------------------------------------------------------------------------------------------------
Medicine:3400891191226
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -55.343
MAE: 2426.686
MAPE: 0.378
RMSE: 3312.688








----------------------------------------------------------------------------------------------------
Medicine:3400891225037
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -10.868
MAE: 5362.565
MAPE: 0.481
RMSE: 5496.138








----------------------------------------------------------------------------------------------------
Medicine:3400892088310
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -186.817
MAE: 4107.754
MAPE: 0.712
RMSE: 7695.662








----------------------------------------------------------------------------------------------------
Medicine:3400892075761
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -549.301
MAE: 4012.253
MAPE: 1.706
RMSE: 4504.704








----------------------------------------------------------------------------------------------------
Medicine:3400892203645
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -18.602
MAE: 3546.685
MAPE: 0.73
RMSE: 4353.644








----------------------------------------------------------------------------------------------------
Medicine:3400892729589
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -44.437
MAE: 2834.874
MAPE: 0.423
RMSE: 3390.97








----------------------------------------------------------------------------------------------------
Medicine:3400891235203
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -267.197
MAE: 4910.553
MAPE: 4.624
RMSE: 4942.65








----------------------------------------------------------------------------------------------------
Medicine:3400890837149
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -91.375
MAE: 3250.102
MAPE: 0.853
RMSE: 4905.946








----------------------------------------------------------------------------------------------------
Medicine:3400892065366
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -17.345
MAE: 3324.291
MAPE: 0.444
RMSE: 3830.981








----------------------------------------------------------------------------------------------------
Medicine:3400892697789
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -111.286
MAE: 4731.308
MAPE: 2.376
RMSE: 5761.889








----------------------------------------------------------------------------------------------------
Medicine:3400893022634
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -326.896
MAE: 5103.519
MAPE: 1.308
RMSE: 6105.415








----------------------------------------------------------------------------------------------------
Medicine:3400893736135
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -109.941
MAE: 4689.693
MAPE: 0.866
RMSE: 6527.542








----------------------------------------------------------------------------------------------------
Medicine:3400892745848
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -3245.601
MAE: 7139.28
MAPE: 22.921
RMSE: 9417.7








----------------------------------------------------------------------------------------------------
Medicine:3400893826706
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -29.572
MAE: 4042.049
MAPE: 0.538
RMSE: 5319.724








----------------------------------------------------------------------------------------------------
Medicine:3400892669236
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -71.417
MAE: 4135.889
MAPE: 0.411
RMSE: 4947.801








----------------------------------------------------------------------------------------------------
Medicine:3400891996128
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -186.281
MAE: 86274.644
MAPE: 0.93
RMSE: 86422.415








----------------------------------------------------------------------------------------------------
Medicine:3400892508566
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -16.087
MAE: 2075.575
MAPE: 0.448
RMSE: 2562.587








----------------------------------------------------------------------------------------------------
Medicine:3400892761695
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.331
----------Test scores----------
R^2 Score: -121.445
MAE: 19588.049
MAPE: 0.735
RMSE: 19726.134









Unnamed: 0,CLUSTER,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE
0,1,CODE_UCD_3400891996128,-444.012975,96542.614149,96424.758432,0.941121
1,1,CODE_UCD_3400890837149,-32.848213,2609.444891,2560.85507,0.71074
2,1,CODE_UCD_3400892761695,-282.713466,18885.465065,18770.870818,0.733705
3,1,CODE_UCD_3400891235203,-15.323884,1804.69416,1760.165171,0.365912
4,1,CODE_UCD_3400891225037,-9.973188,3052.913849,3006.710486,0.377952
5,1,CODE_UCD_3400892669236,-11.216885,5182.825567,5059.520963,0.470749
6,1,CODE_UCD_3400893826706,0.762084,561.888873,448.75,0.078274
7,1,CODE_UCD_3400893736135,-7.729219,1028.237374,947.411829,0.160143
8,1,CODE_UCD_3400892697789,-82.055919,3886.617468,3857.375038,1.790278
9,1,CODE_UCD_3400891191226,-12.692587,1890.621541,1679.516627,0.361291


In [47]:
df_prediction_scores_h4_clustered = pd.DataFrame(columns=['CLUSTER', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h4_clustered = test_2_clustering(df_h4[features_h4], df_prediction_scores_h4_clustered, medicines)

df_prediction_scores_h4_clustered


----------------------------------------------------------------------------------------------------
Cluster: 0
Size of data set: 731
Size of training set: 654
Size of test set: 77
Size of grid search: 384
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400891191226
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.45
----------Test scores----------
R^2 Score: -13.456
MAE: 3641.25
MAPE: 0.443
RMSE: 4757.039








----------------------------------------------------------------------------------------------------
Medicine:3400891235203
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.45
----------Test scores----------
R^2 Score: -8.249
MAE: 4924.481
MAPE: 1.172
RMSE: 5324.816








----------------------------------------------------------------------------------------------------
Medicine:3400893736135
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.45
----------Test scores----------
R^2 Score: -11.713
MAE: 4878.207
MAPE: 0.695
RMSE: 5191.196








----------------------------------------------------------------------------------------------------
Medicine:3400890837149
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.45
----------Test scores----------
R^2 Score: -1.513
MAE: 5190.156
MAPE: 0.29
RMSE: 5590.133








----------------------------------------------------------------------------------------------------
Medicine:3400892508566
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.45
----------Test scores----------
R^2 Score: -187.331
MAE: 4727.676
MAPE: 0.697
RMSE: 5181.368








----------------------------------------------------------------------------------------------------
Medicine:3400893875490
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.45
----------Test scores----------
R^2 Score: -40.297
MAE: 10989.39
MAPE: 0.523
RMSE: 11148.877








----------------------------------------------------------------------------------------------------
Medicine:3400892052120
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.45
----------Test scores----------
R^2 Score: -4.834
MAE: 3021.979
MAPE: 0.361
RMSE: 4223.112








----------------------------------------------------------------------------------------------------
Medicine:3400892203645
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.45
----------Test scores----------
R^2 Score: -8.549
MAE: 4022.419
MAPE: 0.53
RMSE: 5019.194








----------------------------------------------------------------------------------------------------
Medicine:3400892729589
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.45
----------Test scores----------
R^2 Score: -5.288
MAE: 4657.702
MAPE: 0.267
RMSE: 5545.183








----------------------------------------------------------------------------------------------------
Medicine:3400892065366
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.45
----------Test scores----------
R^2 Score: -13.639
MAE: 9631.326
MAPE: 0.452
RMSE: 10247.529








----------------------------------------------------------------------------------------------------
Medicine:3400892088310
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.45
----------Test scores----------
R^2 Score: -5.439
MAE: 5146.429
MAPE: 1.684
RMSE: 5903.107








----------------------------------------------------------------------------------------------------
Medicine:3400891225037
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.45
----------Test scores----------
R^2 Score: -148.221
MAE: 14359.001
MAPE: 0.563
RMSE: 14397.817








----------------------------------------------------------------------------------------------------
Medicine:3400892761695
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.45
----------Test scores----------
R^2 Score: -23.648
MAE: 3443.101
MAPE: 0.416
RMSE: 4389.382








----------------------------------------------------------------------------------------------------
Medicine:3400892075761
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.45
----------Test scores----------
R^2 Score: -1.195
MAE: 1375.972
MAPE: 0.158
RMSE: 1543.704








----------------------------------------------------------------------------------------------------
Medicine:3400892761527
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.45
----------Test scores----------
R^2 Score: -25.574
MAE: 4193.23
MAPE: 0.669
RMSE: 4314.892








----------------------------------------------------------------------------------------------------
Medicine:3400892669236
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.45
----------Test scores----------
R^2 Score: -32.818
MAE: 17304.756
MAPE: 0.595
RMSE: 18090.996








----------------------------------------------------------------------------------------------------
Medicine:3400892697789
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.45
----------Test scores----------
R^2 Score: -0.633
MAE: 1623.763
MAPE: 0.151
RMSE: 1932.444








----------------------------------------------------------------------------------------------------
Medicine:3400893826706
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.45
----------Test scores----------
R^2 Score: -70.282
MAE: 8661.864
MAPE: 1.621
RMSE: 8837.353








----------------------------------------------------------------------------------------------------
Medicine:3400893022634
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.45
----------Test scores----------
R^2 Score: -515.022
MAE: 8510.289
MAPE: 5.003
RMSE: 8518.547








----------------------------------------------------------------------------------------------------
Medicine:3400892745848
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.45
----------Test scores----------
R^2 Score: -0.162
MAE: 2413.477
MAPE: 0.18
RMSE: 2712.895








----------------------------------------------------------------------------------------------------
Cluster: 1
Size of data set: 39
Size of training set: 35
Size of test set: 4
Size of grid search: 12
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400891996128
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.11
----------Test scores----------
R^2 Score: 0.048
MAE: 19716.691
MAPE: 0.106
RMSE: 21591.77









Unnamed: 0,CLUSTER,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE
0,0,CODE_UCD_3400891191226,-13.455998,4757.038941,3641.249582,0.443226
1,0,CODE_UCD_3400891235203,-8.248903,5324.816152,4924.481057,1.172237
2,0,CODE_UCD_3400893736135,-11.71295,5191.195754,4878.207335,0.695377
3,0,CODE_UCD_3400890837149,-1.513035,5590.132907,5190.155891,0.290098
4,0,CODE_UCD_3400892508566,-187.330932,5181.368007,4727.675731,0.69688
5,0,CODE_UCD_3400893875490,-40.297323,11148.876941,10989.390152,0.52343
6,0,CODE_UCD_3400892052120,-4.834008,4223.111699,3021.978516,0.361222
7,0,CODE_UCD_3400892203645,-8.549324,5019.194197,4022.418735,0.530413
8,0,CODE_UCD_3400892729589,-5.287957,5545.18276,4657.702325,0.267195
9,0,CODE_UCD_3400892065366,-13.638617,10247.529398,9631.325986,0.452089
