<a href="https://colab.research.google.com/github/douglasmmachado/MedicineConsumption/blob/master/notebooks/division_approach/6_Forecasting_Validation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 6 - Forecasting and prediction validation



---



---



In [113]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns
import math as m

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

from sklearn.tree import export_graphviz
from subprocess import call
from IPython.display import Image

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error,  mean_absolute_percentage_error
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import StandardScaler

from sklearn.utils import shuffle


df_h1_url ="https://raw.githubusercontent.com/douglasmmachado/ExploratoryDataAnalysis/master/datasets/division_approach/clustered/df_h1_clustered.csv"
df_h2_url ="https://raw.githubusercontent.com/douglasmmachado/ExploratoryDataAnalysis/master/datasets/division_approach/clustered/df_h2_clustered.csv"
df_h3_url ="https://raw.githubusercontent.com/douglasmmachado/ExploratoryDataAnalysis/master/datasets/division_approach/clustered/df_h3_clustered.csv"
df_h4_url ="https://raw.githubusercontent.com/douglasmmachado/ExploratoryDataAnalysis/master/datasets/division_approach/clustered/df_h4_clustered.csv"



df_h1 = pd.read_csv(df_h1_url)
df_h1['YEAR'] = df_h1['YEAR'].astype(int)
df_h1['MONTH'] = df_h1['MONTH'].astype(int)
df_h1['DATE'] = pd.to_datetime(df_h1['YEAR'].astype(str) + '-' + df_h1['MONTH'].astype(str), format='%Y-%m')

df_h2 = pd.read_csv(df_h2_url)
df_h2['YEAR'] = df_h2['YEAR'].astype(int)
df_h2['MONTH'] = df_h2['MONTH'].astype(int)
df_h2['DATE'] = pd.to_datetime(df_h2['YEAR'].astype(str) + '-' + df_h2['MONTH'].astype(str), format='%Y-%m')

df_h3 = pd.read_csv(df_h3_url)
df_h3['YEAR'] = df_h3['YEAR'].astype(int)
df_h3['MONTH'] = df_h3['MONTH'].astype(int)
df_h3['DATE'] = pd.to_datetime(df_h3['YEAR'].astype(str) + '-' + df_h3['MONTH'].astype(str), format='%Y-%m')

df_h4 = pd.read_csv(df_h4_url)
df_h4['YEAR'] = df_h4['YEAR'].astype(int)
df_h4['MONTH'] = df_h4['MONTH'].astype(int)
df_h4['DATE'] = pd.to_datetime(df_h4['YEAR'].astype(str) + '-' + df_h4['MONTH'].astype(str), format='%Y-%m')

In [71]:
df_h1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 525 entries, 0 to 524
Data columns (total 18 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   CODE_ATC        525 non-null    int64         
 1   DATE            525 non-null    datetime64[ns]
 2   HOSPI_CODE_UCD  525 non-null    int64         
 3   LIT_HC          525 non-null    float64       
 4   LIT_HP          525 non-null    float64       
 5   MONTH           525 non-null    int64         
 6   N_UFS           525 non-null    float64       
 7   PN_MEDICAL      525 non-null    float64       
 8   POPULATION      525 non-null    float64       
 9   P_MEDICAL       525 non-null    float64       
 10  QUANTITY        525 non-null    float64       
 11  QUANTITY_MA     525 non-null    float64       
 12  SEJ_MCO         525 non-null    float64       
 13  SEJ_SLD         525 non-null    float64       
 14  SEJ_SSR         525 non-null    float64       
 15  WEEK  

In [72]:
df_h2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 525 entries, 0 to 524
Data columns (total 18 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   CODE_ATC        525 non-null    int64         
 1   DATE            525 non-null    datetime64[ns]
 2   HOSPI_CODE_UCD  525 non-null    int64         
 3   LIT_HC          525 non-null    float64       
 4   LIT_HP          525 non-null    float64       
 5   MONTH           525 non-null    int64         
 6   N_UFS           525 non-null    float64       
 7   PN_MEDICAL      525 non-null    float64       
 8   POPULATION      525 non-null    float64       
 9   P_MEDICAL       525 non-null    float64       
 10  QUANTITY        525 non-null    float64       
 11  QUANTITY_MA     525 non-null    float64       
 12  SEJ_MCO         525 non-null    float64       
 13  SEJ_PSY         525 non-null    float64       
 14  SEJ_SSR         525 non-null    float64       
 15  WEEK  

In [73]:
df_h3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 525 entries, 0 to 524
Data columns (total 20 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   CODE_ATC        525 non-null    int64         
 1   DATE            525 non-null    datetime64[ns]
 2   HOSPI_CODE_UCD  525 non-null    int64         
 3   LIT_HC          525 non-null    float64       
 4   LIT_HP          525 non-null    float64       
 5   MONTH           525 non-null    int64         
 6   N_UFS           525 non-null    float64       
 7   PN_MEDICAL      525 non-null    float64       
 8   POPULATION      525 non-null    float64       
 9   P_MEDICAL       525 non-null    float64       
 10  QUANTITY        525 non-null    float64       
 11  QUANTITY_MA     525 non-null    float64       
 12  SEJ_HAD         525 non-null    float64       
 13  SEJ_MCO         525 non-null    float64       
 14  SEJ_PSY         525 non-null    float64       
 15  SEJ_SL

In [74]:
df_h4.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 525 entries, 0 to 524
Data columns (total 19 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   CODE_ATC        525 non-null    int64         
 1   DATE            525 non-null    datetime64[ns]
 2   HOSPI_CODE_UCD  525 non-null    int64         
 3   LIT_HC          525 non-null    float64       
 4   LIT_HP          525 non-null    float64       
 5   MONTH           525 non-null    int64         
 6   N_UFS           525 non-null    float64       
 7   PN_MEDICAL      525 non-null    float64       
 8   POPULATION      525 non-null    float64       
 9   P_MEDICAL       525 non-null    float64       
 10  QUANTITY        525 non-null    float64       
 11  QUANTITY_MA     525 non-null    float64       
 12  SEJ_MCO         525 non-null    float64       
 13  SEJ_PSY         525 non-null    float64       
 14  SEJ_SLD         525 non-null    float64       
 15  SEJ_SS

## 6.1 - New database composition based on clusters

In [75]:
df_h1_cluster_0 = df_h1[df_h1['CLUSTER'] == 0].copy()
df_h1_cluster_1 = df_h1[df_h1['CLUSTER'] == 1].copy()
df_h1_cluster_2 = df_h1[df_h1['CLUSTER'] == 2].copy()
df_h1_cluster_3 = df_h1[df_h1['CLUSTER'] == 3].copy()

In [76]:
df_h2_cluster_0 = df_h2[df_h2['CLUSTER'] == 0].copy()
df_h2_cluster_1 = df_h2[df_h2['CLUSTER'] == 1].copy()
df_h2_cluster_2 = df_h2[df_h2['CLUSTER'] == 2].copy()
df_h2_cluster_3 = df_h2[df_h2['CLUSTER'] == 3].copy()

In [77]:
df_h3_cluster_0 = df_h3[df_h3['CLUSTER'] == 0].copy()
df_h3_cluster_1 = df_h3[df_h3['CLUSTER'] == 1].copy()
df_h3_cluster_2 = df_h3[df_h3['CLUSTER'] == 2].copy()
df_h3_cluster_3 = df_h3[df_h3['CLUSTER'] == 3].copy()

In [78]:
df_h4_cluster_0 = df_h4[df_h4['CLUSTER'] == 0].copy()
df_h4_cluster_1 = df_h4[df_h4['CLUSTER'] == 1].copy()
df_h4_cluster_2 = df_h4[df_h4['CLUSTER'] == 2].copy()
df_h4_cluster_3 = df_h4[df_h4['CLUSTER'] == 3].copy()

## 6.2 - Baseline score, Test 1

In [114]:
def test_1_baseline(df, medicine, df_scores, hospital = '-', unified = False):

  df = df.fillna(0)
  scaler = StandardScaler()

  X = df[df['HOSPI_CODE_UCD'] == medicine].drop(['QUANTITY', 'DATE', 'WEEK', 'CLUSTER'], axis=1).copy().values
  X_scaled = scaler.fit_transform(X)

  y = df[df['HOSPI_CODE_UCD'] == medicine]['QUANTITY'].copy().values

  X, y = shuffle(X, y, random_state=42)

  if unified:
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y,
                                                        test_size = 0.2,
                                                        random_state = 42)

  else:
    X_train = X_scaled
    X_test = X_scaled
    y_train = y
    y_test = y

  # Define the parameter distributions for RandomizedSearchCV
  param_distributions = {
      'max_depth': np.arange(2, 31, 2),
      'n_estimators': np.arange(2, 201, 2),
      'max_features': ['sqrt', 'log2'],
      'min_samples_split': np.arange(2, 11, 2),
      'min_samples_leaf': np.arange(2, 5, 1)
  }

  # Create the RandomizedSearchCV object
  randomized_search = RandomizedSearchCV(estimator=RandomForestRegressor(random_state=42),
                                          param_distributions=param_distributions,
                                          n_iter=100,
                                          cv=5,
                                          random_state=42)

  # Fit the RandomizedSearchCV object to the data
  randomized_search.fit(X_train, y_train)

  # Get the best estimator
  best_estimator = randomized_search.best_estimator_

  # Make predictions using the best estimator
  y_pred = best_estimator.predict(X_test)

  # Calculate R^2 score
  r2 = r2_score(y_test, y_pred)

  # Calculate MAE
  mae = mean_absolute_error(y_test, y_pred)

  # Calculate MAPE
  mape = mean_absolute_percentage_error(y_test, y_pred)

  # Calculate RMSE
  rmse = np.sqrt(mean_squared_error(y_test, y_pred))

  # Print the best parameters, best score, and evaluation metrics
  print('Medicine:' + str(medicine))
  print('Best Parameters:', randomized_search.best_params_)
  print('Best Score:', randomized_search.best_score_)
  print('R^2 Score:', round(r2, 3))
  print('MAE:', round(mae, 3))
  print('MAPE:', round(mape, 3))
  print('RMSE:', round(rmse, 3))
  print()


  # Create the new row as a DataFrame
  new_row = pd.DataFrame({'ID_SITE_RATTACHE': [hospital],
                          'HOSPI_CODE_UCD': ['CODE_UCD_'+str(medicine)],
                          'R2': [r2],
                          'RMSE': [rmse],
                          'MAE': [mae],
                          'MAPE': [mape]})

  # Append the new row to the DataFrame
  df_scores = pd.concat([df_scores, new_row], ignore_index=True)

  # Return the updated DataFrame
  return df_scores


In [115]:
df_prediction_scores_h1 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

for medicine in df_h1.HOSPI_CODE_UCD.unique():

  df_prediction_scores_h1 = test_1_baseline(df_h1, medicine, df_prediction_scores_h1, hospital = 'HOSPI_1', unified = False)

df_prediction_scores_h1

Medicine:3400890837149
Best Parameters: {'n_estimators': 156, 'min_samples_split': 6, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'max_depth': 16}
Best Score: -0.22217793042634892
R^2 Score: 0.274
MAE: 133.958
MAPE: 0.047
RMSE: 183.876

Medicine:3400893826706
Best Parameters: {'n_estimators': 162, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 20}
Best Score: -2.5388417283274745
R^2 Score: 0.239
MAE: 323.606
MAPE: 0.112
RMSE: 457.942

Medicine:3400892729589
Best Parameters: {'n_estimators': 10, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'max_depth': 28}
Best Score: -0.07926018137510557
R^2 Score: 0.168
MAE: 456.71
MAPE: 0.143
RMSE: 669.208

Medicine:3400892088310
Best Parameters: {'n_estimators': 2, 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 18}
Best Score: -1.3834035012032795
R^2 Score: 0.157
MAE: 239.9
MAPE: 0.098
RMSE: 309.792

Medicine:3400892075761
Best Parameters: {'n_est

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE
0,HOSPI_1,CODE_UCD_3400890837149,0.274086,183.87578,133.958212,0.04741232
1,HOSPI_1,CODE_UCD_3400893826706,0.239043,457.941687,323.605679,0.1121733
2,HOSPI_1,CODE_UCD_3400892729589,0.168232,669.208247,456.709734,0.1434478
3,HOSPI_1,CODE_UCD_3400892088310,0.156987,309.792277,239.9,0.09768513
4,HOSPI_1,CODE_UCD_3400892075761,0.175057,149.836235,109.310896,0.0853574
5,HOSPI_1,CODE_UCD_3400892745848,0.119046,1118.464417,910.930167,0.1375839
6,HOSPI_1,CODE_UCD_3400893736135,0.039848,284.847986,232.86,0.1180574
7,HOSPI_1,CODE_UCD_3400892697789,0.035549,210.614791,153.373772,0.1556787
8,HOSPI_1,CODE_UCD_3400892203645,0.132083,157.987564,128.660547,0.09290929
9,HOSPI_1,CODE_UCD_3400891235203,0.111012,598.618495,536.419115,7.196467e+17


In [116]:
df_prediction_scores_h2 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

for medicine in df_h2.HOSPI_CODE_UCD.unique():

  df_prediction_scores_h2 = test_1_baseline(df_h2, medicine, df_prediction_scores_h2, hospital = 'HOSPI_2', unified = False)

df_prediction_scores_h2

Medicine:3400890837149
Best Parameters: {'n_estimators': 10, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'max_depth': 28}
Best Score: -0.3775767033400604
R^2 Score: 0.245
MAE: 261.013
MAPE: 0.106
RMSE: 335.681

Medicine:3400893826706
Best Parameters: {'n_estimators': 10, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'max_depth': 28}
Best Score: -0.16848725302470538
R^2 Score: 0.299
MAE: 216.755
MAPE: 0.121
RMSE: 265.74

Medicine:3400892729589
Best Parameters: {'n_estimators': 172, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 2}
Best Score: -0.6520162506094216
R^2 Score: 0.296
MAE: 415.871
MAPE: 0.089
RMSE: 503.857

Medicine:3400892088310
Best Parameters: {'n_estimators': 172, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 10}
Best Score: -0.262998058467832
R^2 Score: 0.56
MAE: 202.369
MAPE: 0.074
RMSE: 264.461

Medicine:3400892075761
Best Parameters: {'n_estim

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE
0,HOSPI_2,CODE_UCD_3400890837149,0.245237,335.680643,261.013027,0.1055289
1,HOSPI_2,CODE_UCD_3400893826706,0.298693,265.740476,216.755461,0.1208963
2,HOSPI_2,CODE_UCD_3400892729589,0.296399,503.857123,415.87052,0.08935227
3,HOSPI_2,CODE_UCD_3400892088310,0.560068,264.461104,202.369401,0.07388452
4,HOSPI_2,CODE_UCD_3400892075761,0.056785,222.860528,183.100298,0.0832208
5,HOSPI_2,CODE_UCD_3400892745848,0.087151,662.102023,493.953333,0.132782
6,HOSPI_2,CODE_UCD_3400893736135,0.565948,241.269911,199.573863,0.04852442
7,HOSPI_2,CODE_UCD_3400892697789,0.123949,239.91678,189.738443,0.4472305
8,HOSPI_2,CODE_UCD_3400892203645,0.107074,473.048953,367.422996,0.1177898
9,HOSPI_2,CODE_UCD_3400891235203,0.251636,114.011623,88.123917,0.06655938


In [117]:
df_prediction_scores_h3 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

for medicine in df_h3.HOSPI_CODE_UCD.unique():

  df_prediction_scores_h3 = test_1_baseline(df_h3, medicine, df_prediction_scores_h3, hospital = 'HOSPI_3', unified = False)

df_prediction_scores_h3

Medicine:3400890837149
Best Parameters: {'n_estimators': 80, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 16}
Best Score: -0.33623408618867023
R^2 Score: 0.153
MAE: 282.439
MAPE: 0.071
RMSE: 337.186

Medicine:3400893826706
Best Parameters: {'n_estimators': 4, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'max_depth': 30}
Best Score: -0.4506169275301735
R^2 Score: 0.007
MAE: 896.445
MAPE: 0.119
RMSE: 1042.113

Medicine:3400892729589
Best Parameters: {'n_estimators': 14, 'min_samples_split': 4, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 28}
Best Score: -0.07847870535858867
R^2 Score: 0.133
MAE: 389.746
MAPE: 0.059
RMSE: 486.664

Medicine:3400892088310
Best Parameters: {'n_estimators': 34, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_features': 'log2', 'max_depth': 8}
Best Score: -0.21112061806876312
R^2 Score: 0.185
MAE: 256.971
MAPE: 0.062
RMSE: 365.449

Medicine:3400892075761
Best Parameters: {'n_e

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE
0,HOSPI_3,CODE_UCD_3400890837149,0.153267,337.186057,282.43934,0.07084638
1,HOSPI_3,CODE_UCD_3400893826706,0.007195,1042.113163,896.445113,0.118795
2,HOSPI_3,CODE_UCD_3400892729589,0.132981,486.663893,389.74612,0.05935662
3,HOSPI_3,CODE_UCD_3400892088310,0.184712,365.449332,256.971237,0.06242373
4,HOSPI_3,CODE_UCD_3400892075761,0.132401,413.497281,315.807405,0.1224739
5,HOSPI_3,CODE_UCD_3400892745848,0.02523,143.294019,111.109471,0.3741871
6,HOSPI_3,CODE_UCD_3400893736135,0.213668,475.921122,407.130816,0.08292219
7,HOSPI_3,CODE_UCD_3400892697789,0.654155,200.88716,152.780626,0.1011606
8,HOSPI_3,CODE_UCD_3400892203645,0.074338,689.526583,524.920748,0.101918
9,HOSPI_3,CODE_UCD_3400891235203,0.085139,431.993005,345.137703,1.459344e+17


In [118]:
df_prediction_scores_h4 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

for medicine in df_h4.HOSPI_CODE_UCD.unique():

  df_prediction_scores_h4 = test_1_baseline(df_h4, medicine, df_prediction_scores_h4, hospital = 'HOSPI_4', unified = False)

df_prediction_scores_h4

Medicine:3400890837149
Best Parameters: {'n_estimators': 2, 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 18}
Best Score: -0.20280590418550695
R^2 Score: 0.282
MAE: 1323.436
MAPE: 0.082
RMSE: 1699.532

Medicine:3400893826706
Best Parameters: {'n_estimators': 6, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 10}
Best Score: -0.3894786720697251
R^2 Score: 0.069
MAE: 948.991
MAPE: 0.188
RMSE: 1103.41

Medicine:3400892729589
Best Parameters: {'n_estimators': 108, 'min_samples_split': 4, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 8}
Best Score: -0.20345983632316642
R^2 Score: 0.113
MAE: 2000.909
MAPE: 0.13
RMSE: 2681.98

Medicine:3400892088310
Best Parameters: {'n_estimators': 150, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 30}
Best Score: -0.16758469896738354
R^2 Score: 0.08
MAE: 1381.614
MAPE: 0.302
RMSE: 1840.892

Medicine:3400892075761
Best Parameters: {'

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE
0,HOSPI_4,CODE_UCD_3400890837149,0.282205,1699.531781,1323.435897,0.08245601
1,HOSPI_4,CODE_UCD_3400893826706,0.069404,1103.40999,948.991323,0.1879398
2,HOSPI_4,CODE_UCD_3400892729589,0.113266,2681.979724,2000.908676,0.1301504
3,HOSPI_4,CODE_UCD_3400892088310,0.079973,1840.891951,1381.613596,0.3023194
4,HOSPI_4,CODE_UCD_3400892075761,0.15797,1095.184269,882.037912,0.09991422
5,HOSPI_4,CODE_UCD_3400892745848,0.193248,2416.399607,1819.851292,0.1775477
6,HOSPI_4,CODE_UCD_3400893736135,0.190284,1250.819728,1043.987179,0.1402999
7,HOSPI_4,CODE_UCD_3400892697789,0.107139,1264.838231,895.04497,0.07657127
8,HOSPI_4,CODE_UCD_3400892203645,0.496089,884.119267,702.963556,0.07565269
9,HOSPI_4,CODE_UCD_3400891235203,0.105287,1216.910829,947.126394,0.1699248


## 6.3 - Clustering score, Test 2

In [120]:
def test_2_clustering(df, df_scores, hospital = '-', unified = False):

  df = df.fillna(0)
  cluster = df.CLUSTER.unique()[0]
  scaler = StandardScaler()

  X = df.drop(['QUANTITY', 'DATE', 'WEEK', 'CLUSTER'], axis=1).copy().values
  y = df['QUANTITY'].copy().values
  X,  y = shuffle(X, y, random_state = 42)
  if unified:
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=0.2,
                                                        random_state=42)
  else:
    X_train = X
    y_train = y

    X_test = X
    y_test = y


  # Define the parameter distributions for RandomizedSearchCV
  param_distributions = {
      'max_depth': np.arange(2, 31, 2),
      'n_estimators': np.arange(2, 201, 2),
      'max_features': ['sqrt', 'log2'],
      'min_samples_split': np.arange(2, 11, 2),
      'min_samples_leaf': np.arange(2, 5, 1)
  }

  # Create the RandomizedSearchCV object
  randomized_search = RandomizedSearchCV(estimator=RandomForestRegressor(random_state=42),
                                          param_distributions=param_distributions,
                                          n_iter=100,
                                          cv=5,
                                          random_state=42)

  scaler = StandardScaler()
  X_train_scaled = scaler.fit_transform(X_train)

  # Fit the RandomizedSearchCV object to the data
  randomized_search.fit(X_train_scaled, y_train)

  # Get the best estimator
  best_estimator = randomized_search.best_estimator_

  print(f'Cluster: {cluster}')
  print('Best Parameters:', randomized_search.best_params_)
  print('Best Score:', randomized_search.best_score_)
  print(f'Data in train: {len(X_train)}')

  df_test = pd.DataFrame(X_test, columns = df.drop(['QUANTITY', 'DATE', 'WEEK', 'CLUSTER'], axis=1).copy().columns)
  df_test['QUANTITY'] = y_test

  for medicine in df_test.HOSPI_CODE_UCD.unique():
    X_test_medicine = df_test[df_test['HOSPI_CODE_UCD'] == medicine].drop(['QUANTITY'], axis=1).copy().values
    scaler = StandardScaler()
    X_test_scaled = scaler.fit_transform(X_test_medicine)

    y_test_medicine = df_test[df_test['HOSPI_CODE_UCD'] == medicine]['QUANTITY'].copy().values

    print()
    print(f'Data in test: {len(X_test_medicine)}')
    # Make predictions using the best estimator
    y_pred = best_estimator.predict(X_test_scaled)

    # Calculate R^2 score
    r2 = r2_score(y_test_medicine, y_pred)

    # Calculate MAE
    mae = mean_absolute_error(y_test_medicine, y_pred)

    # Calculate MAPE
    mape = mean_absolute_percentage_error(y_test_medicine, y_pred)

    # Calculate RMSE
    rmse = np.sqrt(mean_squared_error(y_test_medicine, y_pred))

    # Print the best parameters, best score, and evaluation metrics
    print('Medicine:' + str(medicine))
    print('Medicines in cluster: ')
    print('R^2 Score:', round(r2, 3))
    print('MAE:', round(mae, 3))
    print('MAPE:', round(mape, 3))
    print('RMSE:', round(rmse, 3))
    print()


    # Create the new row as a DataFrame
    new_row = pd.DataFrame({'ID_SITE_RATTACHE': [hospital],
                            'CLUSTER': [cluster],
                            'HOSPI_CODE_UCD': ['CODE_UCD_'+str(medicine)],
                            'R2': [r2],
                            'RMSE': [rmse],
                            'MAE': [mae],
                            'MAPE': [mape]})

    # Append the new row to the DataFrame
    df_scores = pd.concat([df_scores, new_row], ignore_index=True)

  # Return the updated DataFrame
  return df_scores


### Hospital 1 - Cluster 0

In [121]:
df_prediction_scores_h1_cluster_0 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h1_cluster_0 = test_2_clustering(df_h1_cluster_0, df_prediction_scores_h1_cluster_0, hospital = 'HOSPI_1')

df_prediction_scores_h1_cluster_0

Cluster: 0
Best Parameters: {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 28}
Best Score: 0.9800677090193013
Data in train: 150

Data in test: 25
Medicine:3400891996128.0
Medicines in cluster: 
R^2 Score: -115.149
MAE: 50099.411
MAPE: 0.898
RMSE: 50429.45


Data in test: 25
Medicine:3400892065366.0
Medicines in cluster: 
R^2 Score: -858.315
MAE: 6914.676
MAPE: 1.426
RMSE: 9087.237


Data in test: 25
Medicine:3400892203645.0
Medicines in cluster: 
R^2 Score: -3871.306
MAE: 6646.794
MAPE: 4.747
RMSE: 10552.825


Data in test: 25
Medicine:3400892088310.0
Medicines in cluster: 
R^2 Score: -638.349
MAE: 4733.252
MAPE: 1.848
RMSE: 8531.44


Data in test: 25
Medicine:3400892052120.0
Medicines in cluster: 
R^2 Score: -476.971
MAE: 4863.255
MAPE: 1.682
RMSE: 7721.1


Data in test: 25
Medicine:3400892075761.0
Medicines in cluster: 
R^2 Score: -4167.813
MAE: 6174.223
MAPE: 4.216
RMSE: 10651.509



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_1,CODE_UCD_3400891996128.0,-115.149461,50429.449862,50099.410746,0.898397,0.0
1,HOSPI_1,CODE_UCD_3400892065366.0,-858.315386,9087.236723,6914.676402,1.426162,0.0
2,HOSPI_1,CODE_UCD_3400892203645.0,-3871.30643,10552.824527,6646.794336,4.747446,0.0
3,HOSPI_1,CODE_UCD_3400892088310.0,-638.348629,8531.440048,4733.252026,1.848152,0.0
4,HOSPI_1,CODE_UCD_3400892052120.0,-476.970834,7721.099633,4863.255253,1.682343,0.0
5,HOSPI_1,CODE_UCD_3400892075761.0,-4167.812582,10651.508874,6174.223472,4.216417,0.0


In [122]:
df_prediction_scores_h1_cluster_1 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h1_cluster_1 = test_2_clustering(df_h1_cluster_1, df_prediction_scores_h1_cluster_1, hospital = 'HOSPI_1')

df_prediction_scores_h1_cluster_1

Cluster: 1
Best Parameters: {'n_estimators': 10, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'max_depth': 28}
Best Score: 0.9606519978305036
Data in train: 75

Data in test: 25
Medicine:3400893736135.0
Medicines in cluster: 
R^2 Score: -23.538
MAE: 1288.243
MAPE: 0.642
RMSE: 1439.994


Data in test: 25
Medicine:3400893875490.0
Medicines in cluster: 
R^2 Score: -57.501
MAE: 5667.302
MAPE: 0.615
RMSE: 5730.259


Data in test: 25
Medicine:3400893826706.0
Medicines in cluster: 
R^2 Score: -0.782
MAE: 563.645
MAPE: 0.176
RMSE: 700.796



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_1,CODE_UCD_3400893736135.0,-23.537777,1439.993947,1288.242899,0.642254,1.0
1,HOSPI_1,CODE_UCD_3400893875490.0,-57.501492,5730.258784,5667.302108,0.615497,1.0
2,HOSPI_1,CODE_UCD_3400893826706.0,-0.782065,700.796339,563.644547,0.175755,1.0


In [123]:
df_prediction_scores_h1_cluster_2 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h1_cluster_2 = test_2_clustering(df_h1_cluster_2, df_prediction_scores_h1_cluster_2, hospital = 'HOSPI_1')

df_prediction_scores_h1_cluster_2

Cluster: 2
Best Parameters: {'n_estimators': 34, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_features': 'log2', 'max_depth': 8}
Best Score: 0.9543771099702628
Data in train: 100

Data in test: 25
Medicine:3400891225037.0
Medicines in cluster: 
R^2 Score: -77.868
MAE: 11368.758
MAPE: 0.712
RMSE: 11700.846


Data in test: 25
Medicine:3400890837149.0
Medicines in cluster: 
R^2 Score: -247.185
MAE: 2278.093
MAPE: 0.787
RMSE: 3399.926


Data in test: 25
Medicine:3400891235203.0
Medicines in cluster: 
R^2 Score: -92.251
MAE: 5096.951
MAPE: 1.0507281690965774e+18
RMSE: 6130.979


Data in test: 25
Medicine:3400891191226.0
Medicines in cluster: 
R^2 Score: -181.082
MAE: 2574.961
MAPE: 0.945
RMSE: 3851.149



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_1,CODE_UCD_3400891225037.0,-77.868333,11700.846066,11368.758289,0.7120304,2.0
1,HOSPI_1,CODE_UCD_3400890837149.0,-247.184858,3399.925894,2278.093305,0.7873945,2.0
2,HOSPI_1,CODE_UCD_3400891235203.0,-92.251336,6130.979058,5096.951417,1.050728e+18,2.0
3,HOSPI_1,CODE_UCD_3400891191226.0,-181.082072,3851.149154,2574.960635,0.9448985,2.0


In [124]:
df_prediction_scores_h1_cluster_3 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h1_cluster_3 = test_2_clustering(df_h1_cluster_3, df_prediction_scores_h1_cluster_3, hospital = 'HOSPI_1')

df_prediction_scores_h1_cluster_3

Cluster: 3
Best Parameters: {'n_estimators': 56, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 8}
Best Score: 0.8429799561362176
Data in train: 200

Data in test: 25
Medicine:3400892729589.0
Medicines in cluster: 
R^2 Score: -1.237
MAE: 918.382
MAPE: 0.228
RMSE: 1097.476


Data in test: 25
Medicine:3400892508566.0
Medicines in cluster: 
R^2 Score: -2.797
MAE: 1291.017
MAPE: 0.369
RMSE: 1581.635


Data in test: 25
Medicine:3400892761527.0
Medicines in cluster: 
R^2 Score: 0.232
MAE: 1019.629
MAPE: 0.475
RMSE: 1311.764


Data in test: 25
Medicine:3400892697789.0
Medicines in cluster: 
R^2 Score: -250.811
MAE: 2809.913
MAPE: 2.588
RMSE: 3403.188


Data in test: 25
Medicine:3400892745848.0
Medicines in cluster: 
R^2 Score: -4.141
MAE: 2461.78
MAPE: 0.372
RMSE: 2701.883


Data in test: 25
Medicine:3400892761695.0
Medicines in cluster: 
R^2 Score: -319.621
MAE: 2791.665
MAPE: 1.643
RMSE: 3277.787


Data in test: 25
Medicine:3400892669236.0
Medicines in c

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_1,CODE_UCD_3400892729589.0,-1.237018,1097.475516,918.381844,0.2276042,3.0
1,HOSPI_1,CODE_UCD_3400892508566.0,-2.797254,1581.634899,1291.017399,0.368521,3.0
2,HOSPI_1,CODE_UCD_3400892761527.0,0.231917,1311.763692,1019.628502,0.4752143,3.0
3,HOSPI_1,CODE_UCD_3400892697789.0,-250.810957,3403.188333,2809.912735,2.58768,3.0
4,HOSPI_1,CODE_UCD_3400892745848.0,-4.140934,2701.883149,2461.780188,0.3722367,3.0
5,HOSPI_1,CODE_UCD_3400892761695.0,-319.620862,3277.787233,2791.664899,1.642967,3.0
6,HOSPI_1,CODE_UCD_3400892669236.0,0.356439,3657.482795,3342.482288,1.559758e+18,3.0
7,HOSPI_1,CODE_UCD_3400893022634.0,-23.316886,1773.493409,1402.368268,0.4227549,3.0


In [125]:
pd.concat([df_prediction_scores_h1_cluster_0,
           df_prediction_scores_h1_cluster_1,
           df_prediction_scores_h1_cluster_2,
           df_prediction_scores_h1_cluster_3])

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_1,CODE_UCD_3400891996128.0,-115.149461,50429.449862,50099.410746,0.8983973,0.0
1,HOSPI_1,CODE_UCD_3400892065366.0,-858.315386,9087.236723,6914.676402,1.426162,0.0
2,HOSPI_1,CODE_UCD_3400892203645.0,-3871.30643,10552.824527,6646.794336,4.747446,0.0
3,HOSPI_1,CODE_UCD_3400892088310.0,-638.348629,8531.440048,4733.252026,1.848152,0.0
4,HOSPI_1,CODE_UCD_3400892052120.0,-476.970834,7721.099633,4863.255253,1.682343,0.0
5,HOSPI_1,CODE_UCD_3400892075761.0,-4167.812582,10651.508874,6174.223472,4.216417,0.0
0,HOSPI_1,CODE_UCD_3400893736135.0,-23.537777,1439.993947,1288.242899,0.6422544,1.0
1,HOSPI_1,CODE_UCD_3400893875490.0,-57.501492,5730.258784,5667.302108,0.6154972,1.0
2,HOSPI_1,CODE_UCD_3400893826706.0,-0.782065,700.796339,563.644547,0.175755,1.0
0,HOSPI_1,CODE_UCD_3400891225037.0,-77.868333,11700.846066,11368.758289,0.7120304,2.0


In [126]:
df_prediction_scores_h2_cluster_0 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h2_cluster_0 = test_2_clustering(df_h2_cluster_0, df_prediction_scores_h2_cluster_0, hospital = 'HOSPI_2')

df_prediction_scores_h2_cluster_0

Cluster: 0
Best Parameters: {'n_estimators': 116, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 28}
Best Score: 0.9737867602026032
Data in train: 150

Data in test: 25
Medicine:3400891996128.0
Medicines in cluster: 
R^2 Score: -32.219
MAE: 60087.57
MAPE: 0.855
RMSE: 61201.588


Data in test: 25
Medicine:3400892065366.0
Medicines in cluster: 
R^2 Score: -21.178
MAE: 6593.731
MAPE: 0.961
RMSE: 9544.754


Data in test: 25
Medicine:3400892203645.0
Medicines in cluster: 
R^2 Score: -649.658
MAE: 7993.375
MAPE: 2.27
RMSE: 12769.524


Data in test: 25
Medicine:3400892088310.0
Medicines in cluster: 
R^2 Score: -1142.95
MAE: 8167.203
MAPE: 2.62
RMSE: 13485.676


Data in test: 25
Medicine:3400892052120.0
Medicines in cluster: 
R^2 Score: -3288.648
MAE: 8036.967
MAPE: 4.303
RMSE: 13077.044


Data in test: 25
Medicine:3400892075761.0
Medicines in cluster: 
R^2 Score: -3605.543
MAE: 9983.896
MAPE: 4.464
RMSE: 13780.769



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_2,CODE_UCD_3400891996128.0,-32.219115,61201.588087,60087.570358,0.854634,0.0
1,HOSPI_2,CODE_UCD_3400892065366.0,-21.178122,9544.753826,6593.731433,0.960877,0.0
2,HOSPI_2,CODE_UCD_3400892203645.0,-649.657766,12769.523585,7993.375041,2.270363,0.0
3,HOSPI_2,CODE_UCD_3400892088310.0,-1142.950398,13485.675654,8167.203117,2.620171,0.0
4,HOSPI_2,CODE_UCD_3400892052120.0,-3288.648213,13077.043729,8036.966913,4.302583,0.0
5,HOSPI_2,CODE_UCD_3400892075761.0,-3605.543015,13780.768894,9983.895696,4.463711,0.0


In [127]:
df_prediction_scores_h2_cluster_1 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h2_cluster_1 = test_2_clustering(df_h2_cluster_1, df_prediction_scores_h2_cluster_1, hospital = 'HOSPI_2')

df_prediction_scores_h2_cluster_1

Cluster: 1
Best Parameters: {'n_estimators': 118, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 18}
Best Score: 0.9749903359907174
Data in train: 75

Data in test: 25
Medicine:3400893736135.0
Medicines in cluster: 
R^2 Score: -4.676
MAE: 728.435
MAPE: 0.181
RMSE: 872.454


Data in test: 25
Medicine:3400893875490.0
Medicines in cluster: 
R^2 Score: -112.057
MAE: 4455.033
MAPE: 0.508
RMSE: 4516.97


Data in test: 25
Medicine:3400893826706.0
Medicines in cluster: 
R^2 Score: -62.369
MAE: 2367.374
MAPE: 1.274
RMSE: 2526.057



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_2,CODE_UCD_3400893736135.0,-4.675725,872.454454,728.434712,0.180794,1.0
1,HOSPI_2,CODE_UCD_3400893875490.0,-112.057349,4516.970126,4455.033341,0.508288,1.0
2,HOSPI_2,CODE_UCD_3400893826706.0,-62.369301,2526.057038,2367.373543,1.273888,1.0


In [128]:
df_prediction_scores_h2_cluster_2 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h2_cluster_2 = test_2_clustering(df_h2_cluster_2, df_prediction_scores_h2_cluster_2, hospital = 'HOSPI_2')

df_prediction_scores_h2_cluster_2

Cluster: 2
Best Parameters: {'n_estimators': 34, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_features': 'log2', 'max_depth': 8}
Best Score: 0.938436794720683
Data in train: 100

Data in test: 25
Medicine:3400891225037.0
Medicines in cluster: 
R^2 Score: -11.783
MAE: 10781.853
MAPE: 0.689
RMSE: 11182.898


Data in test: 25
Medicine:3400890837149.0
Medicines in cluster: 
R^2 Score: -110.846
MAE: 2834.046
MAPE: 1.02
RMSE: 4086.318


Data in test: 25
Medicine:3400891235203.0
Medicines in cluster: 
R^2 Score: -1818.827
MAE: 4574.486
MAPE: 3.26
RMSE: 5622.22


Data in test: 25
Medicine:3400891191226.0
Medicines in cluster: 
R^2 Score: -361.93
MAE: 2616.762
MAPE: 0.903
RMSE: 4000.469



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_2,CODE_UCD_3400891225037.0,-11.783333,11182.897941,10781.852536,0.689228,2.0
1,HOSPI_2,CODE_UCD_3400890837149.0,-110.846494,4086.317721,2834.046325,1.020244,2.0
2,HOSPI_2,CODE_UCD_3400891235203.0,-1818.826658,5622.220051,4574.485518,3.260302,2.0
3,HOSPI_2,CODE_UCD_3400891191226.0,-361.929754,4000.468775,2616.762263,0.903224,2.0


In [129]:
df_prediction_scores_h2_cluster_3 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h2_cluster_3 = test_2_clustering(df_h2_cluster_3, df_prediction_scores_h2_cluster_3, hospital = 'HOSPI_2')

df_prediction_scores_h2_cluster_3

Cluster: 3
Best Parameters: {'n_estimators': 118, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 18}
Best Score: 0.97266860370285
Data in train: 200

Data in test: 25
Medicine:3400892729589.0
Medicines in cluster: 
R^2 Score: -24.707
MAE: 2227.384
MAPE: 0.451
RMSE: 3045.573


Data in test: 25
Medicine:3400892508566.0
Medicines in cluster: 
R^2 Score: -74287.514
MAE: 6718.506
MAPE: 9.344910903819555e+18
RMSE: 7236.742


Data in test: 25
Medicine:3400892761527.0
Medicines in cluster: 
R^2 Score: -28.29
MAE: 5398.491
MAPE: 0.446
RMSE: 5757.157


Data in test: 25
Medicine:3400892697789.0
Medicines in cluster: 
R^2 Score: -583.371
MAE: 5801.049
MAPE: 8.415
RMSE: 6196.412


Data in test: 25
Medicine:3400892745848.0
Medicines in cluster: 
R^2 Score: -24.139
MAE: 2839.845
MAPE: 0.714
RMSE: 3474.542


Data in test: 25
Medicine:3400892761695.0
Medicines in cluster: 
R^2 Score: -72.565
MAE: 8608.865
MAPE: 0.557
RMSE: 8845.639


Data in test: 25
Medicine:340089

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_2,CODE_UCD_3400892729589.0,-24.706917,3045.573405,2227.383675,0.4509768,3.0
1,HOSPI_2,CODE_UCD_3400892508566.0,-74287.513832,7236.741719,6718.505578,9.344911e+18,3.0
2,HOSPI_2,CODE_UCD_3400892761527.0,-28.290101,5757.157376,5398.490745,0.4460193,3.0
3,HOSPI_2,CODE_UCD_3400892697789.0,-583.371138,6196.411968,5801.048604,8.415075,3.0
4,HOSPI_2,CODE_UCD_3400892745848.0,-24.138771,3474.542029,2839.845069,0.713788,3.0
5,HOSPI_2,CODE_UCD_3400892761695.0,-72.564573,8845.638947,8608.865406,0.5566178,3.0
6,HOSPI_2,CODE_UCD_3400892669236.0,-10.400427,2668.912965,2320.664334,0.2911809,3.0
7,HOSPI_2,CODE_UCD_3400893022634.0,-14.647708,2557.835131,1879.245046,0.3571867,3.0


In [130]:
pd.concat([df_prediction_scores_h2_cluster_0,
           df_prediction_scores_h2_cluster_1,
           df_prediction_scores_h2_cluster_2,
           df_prediction_scores_h2_cluster_3])

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_2,CODE_UCD_3400891996128.0,-32.219115,61201.588087,60087.570358,0.8546341,0.0
1,HOSPI_2,CODE_UCD_3400892065366.0,-21.178122,9544.753826,6593.731433,0.9608773,0.0
2,HOSPI_2,CODE_UCD_3400892203645.0,-649.657766,12769.523585,7993.375041,2.270363,0.0
3,HOSPI_2,CODE_UCD_3400892088310.0,-1142.950398,13485.675654,8167.203117,2.620171,0.0
4,HOSPI_2,CODE_UCD_3400892052120.0,-3288.648213,13077.043729,8036.966913,4.302583,0.0
5,HOSPI_2,CODE_UCD_3400892075761.0,-3605.543015,13780.768894,9983.895696,4.463711,0.0
0,HOSPI_2,CODE_UCD_3400893736135.0,-4.675725,872.454454,728.434712,0.1807937,1.0
1,HOSPI_2,CODE_UCD_3400893875490.0,-112.057349,4516.970126,4455.033341,0.5082878,1.0
2,HOSPI_2,CODE_UCD_3400893826706.0,-62.369301,2526.057038,2367.373543,1.273888,1.0
0,HOSPI_2,CODE_UCD_3400891225037.0,-11.783333,11182.897941,10781.852536,0.6892277,2.0


In [131]:
df_prediction_scores_unified_cluster = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_unified_cluster = test_2_clustering(pd.concat([df_h1,
                                                                   df_h2,
                                                                   df_h3,
                                                                   df_h4]), df_prediction_scores_unified_cluster, hospital = '-', unified = True)

df_prediction_scores_unified_cluster

Cluster: 2
Best Parameters: {'n_estimators': 150, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 28}
Best Score: 0.9829438834798914
Data in train: 1680

Data in test: 18
Medicine:3400892508566.0
Medicines in cluster: 
R^2 Score: -11.26
MAE: 7245.813
MAPE: 9.523121264430038e+17
RMSE: 9133.186


Data in test: 24
Medicine:3400893736135.0
Medicines in cluster: 
R^2 Score: -12.047
MAE: 5737.546
MAPE: 1.093
RMSE: 8078.514


Data in test: 24
Medicine:3400892729589.0
Medicines in cluster: 
R^2 Score: -2.488
MAE: 4866.629
MAPE: 0.542
RMSE: 9391.785


Data in test: 23
Medicine:3400892669236.0
Medicines in cluster: 
R^2 Score: 0.548
MAE: 5371.191
MAPE: 1.75788131005822e+17
RMSE: 5882.58


Data in test: 25
Medicine:3400892203645.0
Medicines in cluster: 
R^2 Score: -7.892
MAE: 5735.851
MAPE: 0.857
RMSE: 8789.486


Data in test: 25
Medicine:3400892052120.0
Medicines in cluster: 
R^2 Score: -11.34
MAE: 5811.732
MAPE: 1.214
RMSE: 10084.773


Data in test: 13
Medici

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,-,CODE_UCD_3400892508566.0,-11.259947,9133.186439,7245.812712,9.523121e+17,2.0
1,-,CODE_UCD_3400893736135.0,-12.046824,8078.51374,5737.54627,1.093199,2.0
2,-,CODE_UCD_3400892729589.0,-2.488232,9391.785207,4866.628669,0.5419175,2.0
3,-,CODE_UCD_3400892669236.0,0.548191,5882.580333,5371.191199,1.757881e+17,2.0
4,-,CODE_UCD_3400892203645.0,-7.892319,8789.485649,5735.850703,0.8572455,2.0
5,-,CODE_UCD_3400892052120.0,-11.33955,10084.772605,5811.731639,1.213964,2.0
6,-,CODE_UCD_3400892761695.0,0.941241,2393.997663,2112.131422,0.2766933,2.0
7,-,CODE_UCD_3400893875490.0,0.305623,5109.702905,4387.364119,0.4199861,2.0
8,-,CODE_UCD_3400892761527.0,-2.565096,10541.923528,4835.35751,0.4394565,2.0
9,-,CODE_UCD_3400890837149.0,-1.650624,8186.728738,3823.660803,0.3575448,2.0


In [132]:
df_prediction_scores_h3_cluster_0 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h3_cluster_0 = test_2_clustering(df_h3_cluster_0, df_prediction_scores_h3_cluster_0, hospital = 'HOSPI_3')

df_prediction_scores_h3_cluster_0

Cluster: 0
Best Parameters: {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 28}
Best Score: 0.990149248050012
Data in train: 150

Data in test: 25
Medicine:3400891996128.0
Medicines in cluster: 
R^2 Score: -260.391
MAE: 86477.644
MAPE: 0.846
RMSE: 88147.313


Data in test: 25
Medicine:3400892065366.0
Medicines in cluster: 
R^2 Score: -641.193
MAE: 7755.421
MAPE: 1.006
RMSE: 14980.429


Data in test: 25
Medicine:3400892203645.0
Medicines in cluster: 
R^2 Score: -382.185
MAE: 7603.525
MAPE: 1.378
RMSE: 14029.078


Data in test: 25
Medicine:3400892088310.0
Medicines in cluster: 
R^2 Score: -1751.302
MAE: 10944.879
MAPE: 2.357
RMSE: 16942.451


Data in test: 25
Medicine:3400892052120.0
Medicines in cluster: 
R^2 Score: -6539.507
MAE: 14208.333
MAPE: 9.282
RMSE: 19736.98


Data in test: 25
Medicine:3400892075761.0
Medicines in cluster: 
R^2 Score: -1859.384
MAE: 10339.518
MAPE: 3.697
RMSE: 19147.606



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_3,CODE_UCD_3400891996128.0,-260.391306,88147.313225,86477.643685,0.845882,0.0
1,HOSPI_3,CODE_UCD_3400892065366.0,-641.192695,14980.429481,7755.420911,1.006184,0.0
2,HOSPI_3,CODE_UCD_3400892203645.0,-382.185169,14029.078043,7603.525474,1.377901,0.0
3,HOSPI_3,CODE_UCD_3400892088310.0,-1751.302271,16942.450735,10944.878563,2.357469,0.0
4,HOSPI_3,CODE_UCD_3400892052120.0,-6539.506571,19736.98015,14208.333153,9.281973,0.0
5,HOSPI_3,CODE_UCD_3400892075761.0,-1859.384227,19147.606405,10339.517881,3.697477,0.0


In [133]:
df_prediction_scores_h3_cluster_1 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h3_cluster_1 = test_2_clustering(df_h3_cluster_1, df_prediction_scores_h3_cluster_1, hospital = 'HOSPI_3')

df_prediction_scores_h3_cluster_1

Cluster: 1
Best Parameters: {'n_estimators': 56, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 8}
Best Score: 0.9494100244200917
Data in train: 75

Data in test: 25
Medicine:3400893736135.0
Medicines in cluster: 
R^2 Score: -23.515
MAE: 2441.742
MAPE: 0.485
RMSE: 2657.353


Data in test: 25
Medicine:3400893875490.0
Medicines in cluster: 
R^2 Score: -42.146
MAE: 5985.118
MAPE: 0.434
RMSE: 6057.268


Data in test: 25
Medicine:3400893826706.0
Medicines in cluster: 
R^2 Score: 0.106
MAE: 736.787
MAPE: 0.095
RMSE: 988.894



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_3,CODE_UCD_3400893736135.0,-23.515141,2657.352764,2441.7422,0.484647,1.0
1,HOSPI_3,CODE_UCD_3400893875490.0,-42.145966,6057.267747,5985.117873,0.434311,1.0
2,HOSPI_3,CODE_UCD_3400893826706.0,0.106008,988.893843,736.786672,0.094948,1.0


In [134]:
df_prediction_scores_h3_cluster_2 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h3_cluster_2 = test_2_clustering(df_h3_cluster_2, df_prediction_scores_h3_cluster_2, hospital = 'HOSPI_3')

df_prediction_scores_h3_cluster_2

Cluster: 2
Best Parameters: {'n_estimators': 56, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 8}
Best Score: 0.9460435111215494
Data in train: 100

Data in test: 25
Medicine:3400891225037.0
Medicines in cluster: 
R^2 Score: -26.865
MAE: 6221.179
MAPE: 0.588
RMSE: 6355.493


Data in test: 25
Medicine:3400890837149.0
Medicines in cluster: 
R^2 Score: -14.711
MAE: 1232.285
MAPE: 0.307
RMSE: 1452.424


Data in test: 25
Medicine:3400891235203.0
Medicines in cluster: 
R^2 Score: -56.325
MAE: 3331.646
MAPE: 4.6991498038436634e+17
RMSE: 3419.581


Data in test: 25
Medicine:3400891191226.0
Medicines in cluster: 
R^2 Score: -10.222
MAE: 1811.597
MAPE: 0.289
RMSE: 2013.571



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_3,CODE_UCD_3400891225037.0,-26.864903,6355.493285,6221.179069,0.588123,2.0
1,HOSPI_3,CODE_UCD_3400890837149.0,-14.710656,1452.424304,1232.284727,0.30659,2.0
2,HOSPI_3,CODE_UCD_3400891235203.0,-56.325476,3419.581114,3331.645587,4.69915e+17,2.0
3,HOSPI_3,CODE_UCD_3400891191226.0,-10.221642,2013.571231,1811.597286,0.2890733,2.0


In [135]:
df_prediction_scores_h3_cluster_3 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h3_cluster_3 = test_2_clustering(df_h3_cluster_3, df_prediction_scores_h3_cluster_3, hospital = 'HOSPI_3')

df_prediction_scores_h3_cluster_3

Cluster: 3
Best Parameters: {'n_estimators': 104, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 22}
Best Score: 0.9827729796503734
Data in train: 200

Data in test: 25
Medicine:3400892729589.0
Medicines in cluster: 
R^2 Score: -182.022
MAE: 4898.087
MAPE: 0.708
RMSE: 7070.762


Data in test: 25
Medicine:3400892508566.0
Medicines in cluster: 
R^2 Score: -91.01
MAE: 5591.995
MAPE: 1.155
RMSE: 7581.868


Data in test: 25
Medicine:3400892761527.0
Medicines in cluster: 
R^2 Score: -67.304
MAE: 17884.203
MAPE: 0.626
RMSE: 18464.332


Data in test: 25
Medicine:3400892697789.0
Medicines in cluster: 
R^2 Score: -1020.437
MAE: 8824.525
MAPE: 4.907
RMSE: 10917.345


Data in test: 25
Medicine:3400892745848.0
Medicines in cluster: 
R^2 Score: -6855.562
MAE: 9870.253
MAPE: 22.038
RMSE: 12017.945


Data in test: 25
Medicine:3400892761695.0
Medicines in cluster: 
R^2 Score: -166.85
MAE: 16149.267
MAPE: 0.626
RMSE: 16929.506


Data in test: 25
Medicine:340089266923

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_3,CODE_UCD_3400892729589.0,-182.021725,7070.762235,4898.087272,0.708292,3.0
1,HOSPI_3,CODE_UCD_3400892508566.0,-91.00972,7581.868401,5591.995222,1.15458,3.0
2,HOSPI_3,CODE_UCD_3400892761527.0,-67.303929,18464.331674,17884.203198,0.626195,3.0
3,HOSPI_3,CODE_UCD_3400892697789.0,-1020.436682,10917.345049,8824.525453,4.907324,3.0
4,HOSPI_3,CODE_UCD_3400892745848.0,-6855.561991,12017.945331,9870.253368,22.037945,3.0
5,HOSPI_3,CODE_UCD_3400892761695.0,-166.850471,16929.505688,16149.267126,0.625727,3.0
6,HOSPI_3,CODE_UCD_3400892669236.0,-57.041239,5653.940664,4664.689591,0.448938,3.0
7,HOSPI_3,CODE_UCD_3400893022634.0,-599.988248,9717.326441,8116.198115,2.27255,3.0


In [136]:
pd.concat([df_prediction_scores_h3_cluster_0,
           df_prediction_scores_h3_cluster_1,
           df_prediction_scores_h3_cluster_2,
           df_prediction_scores_h3_cluster_3])

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_3,CODE_UCD_3400891996128.0,-260.391306,88147.313225,86477.643685,0.8458819,0.0
1,HOSPI_3,CODE_UCD_3400892065366.0,-641.192695,14980.429481,7755.420911,1.006184,0.0
2,HOSPI_3,CODE_UCD_3400892203645.0,-382.185169,14029.078043,7603.525474,1.377901,0.0
3,HOSPI_3,CODE_UCD_3400892088310.0,-1751.302271,16942.450735,10944.878563,2.357469,0.0
4,HOSPI_3,CODE_UCD_3400892052120.0,-6539.506571,19736.98015,14208.333153,9.281973,0.0
5,HOSPI_3,CODE_UCD_3400892075761.0,-1859.384227,19147.606405,10339.517881,3.697477,0.0
0,HOSPI_3,CODE_UCD_3400893736135.0,-23.515141,2657.352764,2441.7422,0.4846475,1.0
1,HOSPI_3,CODE_UCD_3400893875490.0,-42.145966,6057.267747,5985.117873,0.4343115,1.0
2,HOSPI_3,CODE_UCD_3400893826706.0,0.106008,988.893843,736.786672,0.0949483,1.0
0,HOSPI_3,CODE_UCD_3400891225037.0,-26.864903,6355.493285,6221.179069,0.588123,2.0


In [137]:
df_prediction_scores_h4_cluster_0 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h4_cluster_0 = test_2_clustering(df_h4_cluster_0, df_prediction_scores_h4_cluster_0, hospital = 'HOSPI_4')

df_prediction_scores_h4_cluster_0

Cluster: 0
Best Parameters: {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 28}
Best Score: 0.972273243991984
Data in train: 150

Data in test: 25
Medicine:3400891996128.0
Medicines in cluster: 
R^2 Score: -55.795
MAE: 179028.009
MAPE: 0.877
RMSE: 180797.616


Data in test: 25
Medicine:3400892065366.0
Medicines in cluster: 
R^2 Score: -83.799
MAE: 13772.693
MAPE: 0.671
RMSE: 20838.384


Data in test: 25
Medicine:3400892203645.0
Medicines in cluster: 
R^2 Score: -544.576
MAE: 14335.995
MAPE: 1.412
RMSE: 29091.199


Data in test: 25
Medicine:3400892088310.0
Medicines in cluster: 
R^2 Score: -286.375
MAE: 20252.898
MAPE: 2.933
RMSE: 32535.105


Data in test: 25
Medicine:3400892052120.0
Medicines in cluster: 
R^2 Score: -476.611
MAE: 17127.117
MAPE: 1.866
RMSE: 30078.053


Data in test: 25
Medicine:3400892075761.0
Medicines in cluster: 
R^2 Score: -501.561
MAE: 14287.224
MAPE: 1.462
RMSE: 26755.786



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_4,CODE_UCD_3400891996128.0,-55.794512,180797.616087,179028.009374,0.876736,0.0
1,HOSPI_4,CODE_UCD_3400892065366.0,-83.798841,20838.384267,13772.693064,0.671457,0.0
2,HOSPI_4,CODE_UCD_3400892203645.0,-544.576248,29091.199275,14335.995217,1.412215,0.0
3,HOSPI_4,CODE_UCD_3400892088310.0,-286.374553,32535.105431,20252.898396,2.933134,0.0
4,HOSPI_4,CODE_UCD_3400892052120.0,-476.611177,30078.052742,17127.116562,1.866089,0.0
5,HOSPI_4,CODE_UCD_3400892075761.0,-501.561045,26755.785826,14287.223602,1.46167,0.0


In [138]:
df_prediction_scores_h4_cluster_1 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h4_cluster_1 = test_2_clustering(df_h4_cluster_1, df_prediction_scores_h4_cluster_1, hospital = 'HOSPI_4')

df_prediction_scores_h4_cluster_1

Cluster: 1
Best Parameters: {'n_estimators': 188, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 20}
Best Score: 0.9122716243832386
Data in train: 75

Data in test: 25
Medicine:3400893736135.0
Medicines in cluster: 
R^2 Score: -2.696
MAE: 2100.563
MAPE: 0.298
RMSE: 2672.454


Data in test: 25
Medicine:3400893875490.0
Medicines in cluster: 
R^2 Score: -22.968
MAE: 12981.448
MAPE: 0.596
RMSE: 13208.621


Data in test: 25
Medicine:3400893826706.0
Medicines in cluster: 
R^2 Score: -10.603
MAE: 3415.124
MAPE: 0.635
RMSE: 3896.129



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_4,CODE_UCD_3400893736135.0,-2.696271,2672.453716,2100.563298,0.298071,1.0
1,HOSPI_4,CODE_UCD_3400893875490.0,-22.968037,13208.62067,12981.448221,0.595802,1.0
2,HOSPI_4,CODE_UCD_3400893826706.0,-10.602563,3896.128523,3415.124489,0.635183,1.0


In [139]:
df_prediction_scores_h4_cluster_2 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h4_cluster_2 = test_2_clustering(df_h4_cluster_2, df_prediction_scores_h4_cluster_2, hospital = 'HOSPI_4')

df_prediction_scores_h4_cluster_2

Cluster: 2
Best Parameters: {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 28}
Best Score: 0.8953182576453924
Data in train: 100

Data in test: 25
Medicine:3400891225037.0
Medicines in cluster: 
R^2 Score: -17.606
MAE: 11171.205
MAPE: 0.438
RMSE: 11693.944


Data in test: 25
Medicine:3400890837149.0
Medicines in cluster: 
R^2 Score: -3.468
MAE: 3253.032
MAPE: 0.192
RMSE: 4240.215


Data in test: 25
Medicine:3400891235203.0
Medicines in cluster: 
R^2 Score: -52.787
MAE: 8760.294
MAPE: 1.527
RMSE: 9435.275


Data in test: 25
Medicine:3400891191226.0
Medicines in cluster: 
R^2 Score: -35.997
MAE: 5160.246
MAPE: 0.557
RMSE: 6486.786



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_4,CODE_UCD_3400891225037.0,-17.605538,11693.944354,11171.204819,0.438031,2.0
1,HOSPI_4,CODE_UCD_3400890837149.0,-3.468048,4240.215364,3253.031666,0.191814,2.0
2,HOSPI_4,CODE_UCD_3400891235203.0,-52.786766,9435.274949,8760.293943,1.527125,2.0
3,HOSPI_4,CODE_UCD_3400891191226.0,-35.997406,6486.786485,5160.245831,0.556861,2.0


In [140]:
df_prediction_scores_h4_cluster_3 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h4_cluster_3 = test_2_clustering(df_h4_cluster_3, df_prediction_scores_h4_cluster_3, hospital = 'HOSPI_4')

df_prediction_scores_h4_cluster_3

Cluster: 3
Best Parameters: {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 28}
Best Score: 0.879085292600281
Data in train: 200

Data in test: 25
Medicine:3400892729589.0
Medicines in cluster: 
R^2 Score: -3.339
MAE: 5055.853
MAPE: 0.288
RMSE: 5932.598


Data in test: 25
Medicine:3400892508566.0
Medicines in cluster: 
R^2 Score: -24.717
MAE: 4720.594
MAPE: 0.656
RMSE: 5815.756


Data in test: 25
Medicine:3400892761527.0
Medicines in cluster: 
R^2 Score: -22.858
MAE: 5963.738
MAPE: 0.965
RMSE: 6747.228


Data in test: 25
Medicine:3400892697789.0
Medicines in cluster: 
R^2 Score: -4.617
MAE: 2645.513
MAPE: 0.215
RMSE: 3172.33


Data in test: 25
Medicine:3400892745848.0
Medicines in cluster: 
R^2 Score: -0.266
MAE: 2441.228
MAPE: 0.211
RMSE: 3027.367


Data in test: 25
Medicine:3400892761695.0
Medicines in cluster: 
R^2 Score: -13.554
MAE: 4110.254
MAPE: 0.466
RMSE: 5166.614


Data in test: 25
Medicine:3400892669236.0
Medicines in

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_4,CODE_UCD_3400892729589.0,-3.338826,5932.59841,5055.852698,0.288048,3.0
1,HOSPI_4,CODE_UCD_3400892508566.0,-24.717126,5815.755834,4720.594143,0.6564001,3.0
2,HOSPI_4,CODE_UCD_3400892761527.0,-22.858309,6747.228219,5963.737893,0.9647868,3.0
3,HOSPI_4,CODE_UCD_3400892697789.0,-4.616563,3172.330437,2645.513115,0.2145869,3.0
4,HOSPI_4,CODE_UCD_3400892745848.0,-0.266289,3027.367319,2441.227853,0.2106325,3.0
5,HOSPI_4,CODE_UCD_3400892761695.0,-13.553574,5166.614063,4110.253876,0.4660381,3.0
6,HOSPI_4,CODE_UCD_3400892669236.0,-7.410125,13803.094355,13399.183718,0.5259088,3.0
7,HOSPI_4,CODE_UCD_3400893022634.0,-109.855226,10653.431461,10098.921296,2.751987e+19,3.0


In [141]:
pd.concat([df_prediction_scores_h4_cluster_0,
           df_prediction_scores_h4_cluster_1,
           df_prediction_scores_h4_cluster_2,
           df_prediction_scores_h4_cluster_3])

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_4,CODE_UCD_3400891996128.0,-55.794512,180797.616087,179028.009374,0.8767357,0.0
1,HOSPI_4,CODE_UCD_3400892065366.0,-83.798841,20838.384267,13772.693064,0.671457,0.0
2,HOSPI_4,CODE_UCD_3400892203645.0,-544.576248,29091.199275,14335.995217,1.412215,0.0
3,HOSPI_4,CODE_UCD_3400892088310.0,-286.374553,32535.105431,20252.898396,2.933134,0.0
4,HOSPI_4,CODE_UCD_3400892052120.0,-476.611177,30078.052742,17127.116562,1.866089,0.0
5,HOSPI_4,CODE_UCD_3400892075761.0,-501.561045,26755.785826,14287.223602,1.46167,0.0
0,HOSPI_4,CODE_UCD_3400893736135.0,-2.696271,2672.453716,2100.563298,0.2980713,1.0
1,HOSPI_4,CODE_UCD_3400893875490.0,-22.968037,13208.62067,12981.448221,0.595802,1.0
2,HOSPI_4,CODE_UCD_3400893826706.0,-10.602563,3896.128523,3415.124489,0.6351828,1.0
0,HOSPI_4,CODE_UCD_3400891225037.0,-17.605538,11693.944354,11171.204819,0.4380307,2.0
