<a href="https://colab.research.google.com/github/douglasmmachado/MedicineConsumption/blob/master/notebooks/division_approach/6_Forecasting_Validation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 6 - Forecasting and prediction validation



---



---



In [113]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns
import math as m

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

from sklearn.tree import export_graphviz
from subprocess import call
from IPython.display import Image

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error,  mean_absolute_percentage_error
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import StandardScaler

from sklearn.utils import shuffle


df_h1_url ="https://raw.githubusercontent.com/douglasmmachado/ExploratoryDataAnalysis/master/datasets/division_approach/clustered/df_h1_clustered.csv"
df_h2_url ="https://raw.githubusercontent.com/douglasmmachado/ExploratoryDataAnalysis/master/datasets/division_approach/clustered/df_h2_clustered.csv"
df_h3_url ="https://raw.githubusercontent.com/douglasmmachado/ExploratoryDataAnalysis/master/datasets/division_approach/clustered/df_h3_clustered.csv"
df_h4_url ="https://raw.githubusercontent.com/douglasmmachado/ExploratoryDataAnalysis/master/datasets/division_approach/clustered/df_h4_clustered.csv"



df_h1 = pd.read_csv(df_h1_url)
df_h1['YEAR'] = df_h1['YEAR'].astype(int)
df_h1['MONTH'] = df_h1['MONTH'].astype(int)
df_h1['DATE'] = pd.to_datetime(df_h1['YEAR'].astype(str) + '-' + df_h1['MONTH'].astype(str), format='%Y-%m')

df_h2 = pd.read_csv(df_h2_url)
df_h2['YEAR'] = df_h2['YEAR'].astype(int)
df_h2['MONTH'] = df_h2['MONTH'].astype(int)
df_h2['DATE'] = pd.to_datetime(df_h2['YEAR'].astype(str) + '-' + df_h2['MONTH'].astype(str), format='%Y-%m')

df_h3 = pd.read_csv(df_h3_url)
df_h3['YEAR'] = df_h3['YEAR'].astype(int)
df_h3['MONTH'] = df_h3['MONTH'].astype(int)
df_h3['DATE'] = pd.to_datetime(df_h3['YEAR'].astype(str) + '-' + df_h3['MONTH'].astype(str), format='%Y-%m')

df_h4 = pd.read_csv(df_h4_url)
df_h4['YEAR'] = df_h4['YEAR'].astype(int)
df_h4['MONTH'] = df_h4['MONTH'].astype(int)
df_h4['DATE'] = pd.to_datetime(df_h4['YEAR'].astype(str) + '-' + df_h4['MONTH'].astype(str), format='%Y-%m')

In [71]:
df_h1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 525 entries, 0 to 524
Data columns (total 18 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   CODE_ATC        525 non-null    int64         
 1   DATE            525 non-null    datetime64[ns]
 2   HOSPI_CODE_UCD  525 non-null    int64         
 3   LIT_HC          525 non-null    float64       
 4   LIT_HP          525 non-null    float64       
 5   MONTH           525 non-null    int64         
 6   N_UFS           525 non-null    float64       
 7   PN_MEDICAL      525 non-null    float64       
 8   POPULATION      525 non-null    float64       
 9   P_MEDICAL       525 non-null    float64       
 10  QUANTITY        525 non-null    float64       
 11  QUANTITY_MA     525 non-null    float64       
 12  SEJ_MCO         525 non-null    float64       
 13  SEJ_SLD         525 non-null    float64       
 14  SEJ_SSR         525 non-null    float64       
 15  WEEK  

In [72]:
df_h2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 525 entries, 0 to 524
Data columns (total 18 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   CODE_ATC        525 non-null    int64         
 1   DATE            525 non-null    datetime64[ns]
 2   HOSPI_CODE_UCD  525 non-null    int64         
 3   LIT_HC          525 non-null    float64       
 4   LIT_HP          525 non-null    float64       
 5   MONTH           525 non-null    int64         
 6   N_UFS           525 non-null    float64       
 7   PN_MEDICAL      525 non-null    float64       
 8   POPULATION      525 non-null    float64       
 9   P_MEDICAL       525 non-null    float64       
 10  QUANTITY        525 non-null    float64       
 11  QUANTITY_MA     525 non-null    float64       
 12  SEJ_MCO         525 non-null    float64       
 13  SEJ_PSY         525 non-null    float64       
 14  SEJ_SSR         525 non-null    float64       
 15  WEEK  

In [73]:
df_h3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 525 entries, 0 to 524
Data columns (total 20 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   CODE_ATC        525 non-null    int64         
 1   DATE            525 non-null    datetime64[ns]
 2   HOSPI_CODE_UCD  525 non-null    int64         
 3   LIT_HC          525 non-null    float64       
 4   LIT_HP          525 non-null    float64       
 5   MONTH           525 non-null    int64         
 6   N_UFS           525 non-null    float64       
 7   PN_MEDICAL      525 non-null    float64       
 8   POPULATION      525 non-null    float64       
 9   P_MEDICAL       525 non-null    float64       
 10  QUANTITY        525 non-null    float64       
 11  QUANTITY_MA     525 non-null    float64       
 12  SEJ_HAD         525 non-null    float64       
 13  SEJ_MCO         525 non-null    float64       
 14  SEJ_PSY         525 non-null    float64       
 15  SEJ_SL

In [74]:
df_h4.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 525 entries, 0 to 524
Data columns (total 19 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   CODE_ATC        525 non-null    int64         
 1   DATE            525 non-null    datetime64[ns]
 2   HOSPI_CODE_UCD  525 non-null    int64         
 3   LIT_HC          525 non-null    float64       
 4   LIT_HP          525 non-null    float64       
 5   MONTH           525 non-null    int64         
 6   N_UFS           525 non-null    float64       
 7   PN_MEDICAL      525 non-null    float64       
 8   POPULATION      525 non-null    float64       
 9   P_MEDICAL       525 non-null    float64       
 10  QUANTITY        525 non-null    float64       
 11  QUANTITY_MA     525 non-null    float64       
 12  SEJ_MCO         525 non-null    float64       
 13  SEJ_PSY         525 non-null    float64       
 14  SEJ_SLD         525 non-null    float64       
 15  SEJ_SS

## 6.1 - New database composition based on clusters

In [75]:
df_h1_cluster_0 = df_h1[df_h1['CLUSTER'] == 0].copy()
df_h1_cluster_1 = df_h1[df_h1['CLUSTER'] == 1].copy()
df_h1_cluster_2 = df_h1[df_h1['CLUSTER'] == 2].copy()
df_h1_cluster_3 = df_h1[df_h1['CLUSTER'] == 3].copy()

In [76]:
df_h2_cluster_0 = df_h2[df_h2['CLUSTER'] == 0].copy()
df_h2_cluster_1 = df_h2[df_h2['CLUSTER'] == 1].copy()
df_h2_cluster_2 = df_h2[df_h2['CLUSTER'] == 2].copy()
df_h2_cluster_3 = df_h2[df_h2['CLUSTER'] == 3].copy()

In [77]:
df_h3_cluster_0 = df_h3[df_h3['CLUSTER'] == 0].copy()
df_h3_cluster_1 = df_h3[df_h3['CLUSTER'] == 1].copy()
df_h3_cluster_2 = df_h3[df_h3['CLUSTER'] == 2].copy()
df_h3_cluster_3 = df_h3[df_h3['CLUSTER'] == 3].copy()

In [78]:
df_h4_cluster_0 = df_h4[df_h4['CLUSTER'] == 0].copy()
df_h4_cluster_1 = df_h4[df_h4['CLUSTER'] == 1].copy()
df_h4_cluster_2 = df_h4[df_h4['CLUSTER'] == 2].copy()
df_h4_cluster_3 = df_h4[df_h4['CLUSTER'] == 3].copy()

## 6.2 - Baseline score, Test 1

In [114]:
def test_1_baseline(df, medicine, df_scores, hospital = '-', unified = False):

  df = df.fillna(0)
  scaler = StandardScaler()

  X = df[df['HOSPI_CODE_UCD'] == medicine].drop(['QUANTITY', 'DATE', 'WEEK', 'CLUSTER'], axis=1).copy().values
  X_scaled = scaler.fit_transform(X)

  y = df[df['HOSPI_CODE_UCD'] == medicine]['QUANTITY'].copy().values

  X, y = shuffle(X, y, random_state=42)

  if unified:
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y,
                                                        test_size = 0.4,
                                                        random_state = 42)

  else:
    X_train = X_scaled
    X_test = X_scaled
    y_train = y
    y_test = y

  # Define the parameter distributions for RandomizedSearchCV
  param_distributions = {
      'max_depth': np.arange(2, 31, 2),
      'n_estimators': np.arange(2, 201, 2),
      'max_features': ['sqrt', 'log2'],
      'min_samples_split': np.arange(2, 11, 2),
      'min_samples_leaf': np.arange(2, 5, 1)
  }

  # Create the RandomizedSearchCV object
  randomized_search = RandomizedSearchCV(estimator=RandomForestRegressor(random_state=42),
                                          param_distributions=param_distributions,
                                          n_iter=100,
                                          cv=5,
                                          random_state=42)

  # Fit the RandomizedSearchCV object to the data
  randomized_search.fit(X_train, y_train)

  # Get the best estimator
  best_estimator = randomized_search.best_estimator_

  # Make predictions using the best estimator
  y_pred = best_estimator.predict(X_test)

  # Calculate R^2 score
  r2 = r2_score(y_test, y_pred)

  # Calculate MAE
  mae = mean_absolute_error(y_test, y_pred)

  # Calculate MAPE
  mape = mean_absolute_percentage_error(y_test, y_pred)

  # Calculate RMSE
  rmse = np.sqrt(mean_squared_error(y_test, y_pred))

  # Print the best parameters, best score, and evaluation metrics
  print('Medicine:' + str(medicine))
  print('Best Parameters:', randomized_search.best_params_)
  print('Best Score:', randomized_search.best_score_)
  print('R^2 Score:', round(r2, 3))
  print('MAE:', round(mae, 3))
  print('MAPE:', round(mape, 3))
  print('RMSE:', round(rmse, 3))
  print()


  # Create the new row as a DataFrame
  new_row = pd.DataFrame({'ID_SITE_RATTACHE': [hospital],
                          'HOSPI_CODE_UCD': ['CODE_UCD_'+str(medicine)],
                          'R2': [r2],
                          'RMSE': [rmse],
                          'MAE': [mae],
                          'MAPE': [mape]})

  # Append the new row to the DataFrame
  df_scores = pd.concat([df_scores, new_row], ignore_index=True)

  # Return the updated DataFrame
  return df_scores


In [115]:
df_prediction_scores_h1 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

for medicine in df_h1.HOSPI_CODE_UCD.unique():

  df_prediction_scores_h1 = test_1_baseline(df_h1, medicine, df_prediction_scores_h1, hospital = 'HOSPI_1', unified = False)

df_prediction_scores_h1

Medicine:3400890837149
Best Parameters: {'n_estimators': 156, 'min_samples_split': 6, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'max_depth': 16}
Best Score: -0.22217793042634892
R^2 Score: 0.274
MAE: 133.958
MAPE: 0.047
RMSE: 183.876

Medicine:3400893826706
Best Parameters: {'n_estimators': 162, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 20}
Best Score: -2.5388417283274745
R^2 Score: 0.239
MAE: 323.606
MAPE: 0.112
RMSE: 457.942

Medicine:3400892729589
Best Parameters: {'n_estimators': 10, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'max_depth': 28}
Best Score: -0.07926018137510557
R^2 Score: 0.168
MAE: 456.71
MAPE: 0.143
RMSE: 669.208

Medicine:3400892088310
Best Parameters: {'n_estimators': 2, 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 18}
Best Score: -1.3834035012032795
R^2 Score: 0.157
MAE: 239.9
MAPE: 0.098
RMSE: 309.792

Medicine:3400892075761
Best Parameters: {'n_est

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE
0,HOSPI_1,CODE_UCD_3400890837149,0.274086,183.87578,133.958212,0.04741232
1,HOSPI_1,CODE_UCD_3400893826706,0.239043,457.941687,323.605679,0.1121733
2,HOSPI_1,CODE_UCD_3400892729589,0.168232,669.208247,456.709734,0.1434478
3,HOSPI_1,CODE_UCD_3400892088310,0.156987,309.792277,239.9,0.09768513
4,HOSPI_1,CODE_UCD_3400892075761,0.175057,149.836235,109.310896,0.0853574
5,HOSPI_1,CODE_UCD_3400892745848,0.119046,1118.464417,910.930167,0.1375839
6,HOSPI_1,CODE_UCD_3400893736135,0.039848,284.847986,232.86,0.1180574
7,HOSPI_1,CODE_UCD_3400892697789,0.035549,210.614791,153.373772,0.1556787
8,HOSPI_1,CODE_UCD_3400892203645,0.132083,157.987564,128.660547,0.09290929
9,HOSPI_1,CODE_UCD_3400891235203,0.111012,598.618495,536.419115,7.196467e+17


In [116]:
df_prediction_scores_h2 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

for medicine in df_h2.HOSPI_CODE_UCD.unique():

  df_prediction_scores_h2 = test_1_baseline(df_h2, medicine, df_prediction_scores_h2, hospital = 'HOSPI_2', unified = False)

df_prediction_scores_h2

Medicine:3400890837149
Best Parameters: {'n_estimators': 10, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'max_depth': 28}
Best Score: -0.3775767033400604
R^2 Score: 0.245
MAE: 261.013
MAPE: 0.106
RMSE: 335.681

Medicine:3400893826706
Best Parameters: {'n_estimators': 10, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'max_depth': 28}
Best Score: -0.16848725302470538
R^2 Score: 0.299
MAE: 216.755
MAPE: 0.121
RMSE: 265.74

Medicine:3400892729589
Best Parameters: {'n_estimators': 172, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 2}
Best Score: -0.6520162506094216
R^2 Score: 0.296
MAE: 415.871
MAPE: 0.089
RMSE: 503.857

Medicine:3400892088310
Best Parameters: {'n_estimators': 172, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 10}
Best Score: -0.262998058467832
R^2 Score: 0.56
MAE: 202.369
MAPE: 0.074
RMSE: 264.461

Medicine:3400892075761
Best Parameters: {'n_estim

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE
0,HOSPI_2,CODE_UCD_3400890837149,0.245237,335.680643,261.013027,0.1055289
1,HOSPI_2,CODE_UCD_3400893826706,0.298693,265.740476,216.755461,0.1208963
2,HOSPI_2,CODE_UCD_3400892729589,0.296399,503.857123,415.87052,0.08935227
3,HOSPI_2,CODE_UCD_3400892088310,0.560068,264.461104,202.369401,0.07388452
4,HOSPI_2,CODE_UCD_3400892075761,0.056785,222.860528,183.100298,0.0832208
5,HOSPI_2,CODE_UCD_3400892745848,0.087151,662.102023,493.953333,0.132782
6,HOSPI_2,CODE_UCD_3400893736135,0.565948,241.269911,199.573863,0.04852442
7,HOSPI_2,CODE_UCD_3400892697789,0.123949,239.91678,189.738443,0.4472305
8,HOSPI_2,CODE_UCD_3400892203645,0.107074,473.048953,367.422996,0.1177898
9,HOSPI_2,CODE_UCD_3400891235203,0.251636,114.011623,88.123917,0.06655938


In [117]:
df_prediction_scores_h3 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

for medicine in df_h3.HOSPI_CODE_UCD.unique():

  df_prediction_scores_h3 = test_1_baseline(df_h3, medicine, df_prediction_scores_h3, hospital = 'HOSPI_3', unified = False)

df_prediction_scores_h3

Medicine:3400890837149
Best Parameters: {'n_estimators': 80, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 16}
Best Score: -0.33623408618867023
R^2 Score: 0.153
MAE: 282.439
MAPE: 0.071
RMSE: 337.186

Medicine:3400893826706
Best Parameters: {'n_estimators': 4, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'max_depth': 30}
Best Score: -0.4506169275301735
R^2 Score: 0.007
MAE: 896.445
MAPE: 0.119
RMSE: 1042.113

Medicine:3400892729589
Best Parameters: {'n_estimators': 14, 'min_samples_split': 4, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 28}
Best Score: -0.07847870535858867
R^2 Score: 0.133
MAE: 389.746
MAPE: 0.059
RMSE: 486.664

Medicine:3400892088310
Best Parameters: {'n_estimators': 34, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_features': 'log2', 'max_depth': 8}
Best Score: -0.21112061806876312
R^2 Score: 0.185
MAE: 256.971
MAPE: 0.062
RMSE: 365.449

Medicine:3400892075761
Best Parameters: {'n_e

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE
0,HOSPI_3,CODE_UCD_3400890837149,0.153267,337.186057,282.43934,0.07084638
1,HOSPI_3,CODE_UCD_3400893826706,0.007195,1042.113163,896.445113,0.118795
2,HOSPI_3,CODE_UCD_3400892729589,0.132981,486.663893,389.74612,0.05935662
3,HOSPI_3,CODE_UCD_3400892088310,0.184712,365.449332,256.971237,0.06242373
4,HOSPI_3,CODE_UCD_3400892075761,0.132401,413.497281,315.807405,0.1224739
5,HOSPI_3,CODE_UCD_3400892745848,0.02523,143.294019,111.109471,0.3741871
6,HOSPI_3,CODE_UCD_3400893736135,0.213668,475.921122,407.130816,0.08292219
7,HOSPI_3,CODE_UCD_3400892697789,0.654155,200.88716,152.780626,0.1011606
8,HOSPI_3,CODE_UCD_3400892203645,0.074338,689.526583,524.920748,0.101918
9,HOSPI_3,CODE_UCD_3400891235203,0.085139,431.993005,345.137703,1.459344e+17


In [None]:
df_prediction_scores_h4 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

for medicine in df_h4.HOSPI_CODE_UCD.unique():

  df_prediction_scores_h4 = test_1_baseline(df_h4, medicine, df_prediction_scores_h4, hospital = 'HOSPI_4', unified = False)

df_prediction_scores_h4

Medicine:3400890837149
Best Parameters: {'n_estimators': 2, 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 18}
Best Score: -0.20280590418550695
R^2 Score: 0.282
MAE: 1323.436
MAPE: 0.082
RMSE: 1699.532

Medicine:3400893826706
Best Parameters: {'n_estimators': 6, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 10}
Best Score: -0.3894786720697251
R^2 Score: 0.069
MAE: 948.991
MAPE: 0.188
RMSE: 1103.41

Medicine:3400892729589
Best Parameters: {'n_estimators': 108, 'min_samples_split': 4, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 8}
Best Score: -0.20345983632316642
R^2 Score: 0.113
MAE: 2000.909
MAPE: 0.13
RMSE: 2681.98

Medicine:3400892088310
Best Parameters: {'n_estimators': 150, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'log2', 'max_depth': 30}
Best Score: -0.16758469896738354
R^2 Score: 0.08
MAE: 1381.614
MAPE: 0.302
RMSE: 1840.892

Medicine:3400892075761
Best Parameters: {'

In [None]:
df_prediction_scores_unified = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_unified = pd.concat([df_h1,df_h2,df_h3, df_h4])

for medicine in df_unified.HOSPI_CODE_UCD.unique():

  df_prediction_scores_unified = test_1_baseline(df_unified, medicine, df_prediction_scores_unified, hospital = '-', unified = True)

df_prediction_scores_unified

## 6.3 - Clustering score, Test 2

In [100]:
def test_2_clustering(df, df_scores, hospital = '-', unified = False):

  df = df.fillna(0)
  cluster = df.CLUSTER.unique()[0]
  scaler = StandardScaler()

  X = df.drop(['QUANTITY', 'DATE', 'WEEK', 'CLUSTER'], axis=1).copy().values
  y = df['QUANTITY'].copy().values
  X,  y = shuffle(X, y, random_state = 42)
  if unified:
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=0.2,
                                                        random_state=42)
  else:
    X_train = X
    y_train = y

    X_test = X
    y_test = y


  # Define the parameter distributions for RandomizedSearchCV
  param_distributions = {
      'max_depth': np.arange(2, 31, 2),
      'n_estimators': np.arange(2, 201, 2),
      'max_features': ['sqrt', 'log2'],
      'min_samples_split': np.arange(2, 11, 2),
      'min_samples_leaf': np.arange(2, 5, 1)
  }

  # Create the RandomizedSearchCV object
  randomized_search = RandomizedSearchCV(estimator=RandomForestRegressor(random_state=42),
                                          param_distributions=param_distributions,
                                          n_iter=100,
                                          cv=5,
                                          random_state=42)

  scaler = StandardScaler()
  X_train_scaled = scaler.fit_transform(X_train)

  # Fit the RandomizedSearchCV object to the data
  randomized_search.fit(X_train_scaled, y_train)

  # Get the best estimator
  best_estimator = randomized_search.best_estimator_

  print(f'Cluster: {cluster}')
  print('Best Parameters:', randomized_search.best_params_)
  print('Best Score:', randomized_search.best_score_)
  print(f'Data in train: {len(X_train)}')

  df_test = pd.DataFrame(X_test, columns = df.drop(['QUANTITY', 'DATE', 'WEEK', 'CLUSTER'], axis=1).copy().columns)
  df_test['QUANTITY'] = y_test

  for medicine in df_test.HOSPI_CODE_UCD.unique():
    X_test_medicine = df_test[df_test['HOSPI_CODE_UCD'] == medicine].drop(['QUANTITY'], axis=1).copy().values
    scaler = StandardScaler()
    X_test_scaled = scaler.fit_transform(X_test_medicine)

    y_test_medicine = df_test[df_test['HOSPI_CODE_UCD'] == medicine]['QUANTITY'].copy().values

    print()
    print(f'Data in test: {len(X_test_medicine)}')
    # Make predictions using the best estimator
    y_pred = best_estimator.predict(X_test_scaled)

    # Calculate R^2 score
    r2 = r2_score(y_test_medicine, y_pred)

    # Calculate MAE
    mae = mean_absolute_error(y_test_medicine, y_pred)

    # Calculate MAPE
    mape = mean_absolute_percentage_error(y_test_medicine, y_pred)

    # Calculate RMSE
    rmse = np.sqrt(mean_squared_error(y_test_medicine, y_pred))

    # Print the best parameters, best score, and evaluation metrics
    print('Medicine:' + str(medicine))
    print('Medicines in cluster: ')
    print('R^2 Score:', round(r2, 3))
    print('MAE:', round(mae, 3))
    print('MAPE:', round(mape, 3))
    print('RMSE:', round(rmse, 3))
    print()


    # Create the new row as a DataFrame
    new_row = pd.DataFrame({'ID_SITE_RATTACHE': [hospital],
                            'CLUSTER': [cluster],
                            'HOSPI_CODE_UCD': ['CODE_UCD_'+str(medicine)],
                            'R2': [r2],
                            'RMSE': [rmse],
                            'MAE': [mae],
                            'MAPE': [mape]})

    # Append the new row to the DataFrame
    df_scores = pd.concat([df_scores, new_row], ignore_index=True)

  # Return the updated DataFrame
  return df_scores


### Hospital 1 - Cluster 0

In [88]:
df_prediction_scores_h1_cluster_0 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h1_cluster_0 = test_2_clustering(df_h1_cluster_0, df_prediction_scores_h1_cluster_0, hospital = 'HOSPI_1')

df_prediction_scores_h1_cluster_0

Cluster: 0
Best Parameters: {'n_estimators': 56, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 8}
Best Score: 0.9860001252032944
Data in train: 150

Data in test: 25
Medicine:3400892088310.0
Medicines in cluster: 
R^2 Score: -477.284
MAE: 4341.182
MAPE: 1.709
RMSE: 7378.979


Data in test: 25
Medicine:3400892075761.0
Medicines in cluster: 
R^2 Score: -3346.423
MAE: 5774.492
MAPE: 3.963
RMSE: 9544.656


Data in test: 25
Medicine:3400892203645.0
Medicines in cluster: 
R^2 Score: -3063.739
MAE: 5990.862
MAPE: 4.269
RMSE: 9388.163


Data in test: 25
Medicine:3400892065366.0
Medicines in cluster: 
R^2 Score: -681.5
MAE: 6221.056
MAPE: 1.283
RMSE: 8098.54


Data in test: 25
Medicine:3400892052120.0
Medicines in cluster: 
R^2 Score: -446.407
MAE: 4781.059
MAPE: 1.652
RMSE: 7470.156


Data in test: 25
Medicine:3400891996128.0
Medicines in cluster: 
R^2 Score: -114.787
MAE: 50039.473
MAPE: 0.898
RMSE: 50350.651



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_1,CODE_UCD_3400892088310.0,-477.283674,7378.978626,4341.182232,1.708845,0.0
1,HOSPI_1,CODE_UCD_3400892075761.0,-3346.423312,9544.655685,5774.492165,3.963178,0.0
2,HOSPI_1,CODE_UCD_3400892203645.0,-3063.739162,9388.162907,5990.86211,4.269109,0.0
3,HOSPI_1,CODE_UCD_3400892065366.0,-681.49954,8098.539955,6221.055651,1.282704,0.0
4,HOSPI_1,CODE_UCD_3400892052120.0,-446.406609,7470.155736,4781.058749,1.651672,0.0
5,HOSPI_1,CODE_UCD_3400891996128.0,-114.786762,50350.650601,50039.47283,0.897843,0.0


In [89]:
df_prediction_scores_h1_cluster_1 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h1_cluster_1 = test_2_clustering(df_h1_cluster_1, df_prediction_scores_h1_cluster_1, hospital = 'HOSPI_1')

df_prediction_scores_h1_cluster_1

Cluster: 1
Best Parameters: {'n_estimators': 56, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 8}
Best Score: 0.9699407705540729
Data in train: 75

Data in test: 25
Medicine:3400893826706.0
Medicines in cluster: 
R^2 Score: -1.115
MAE: 624.286
MAPE: 0.204
RMSE: 763.419


Data in test: 25
Medicine:3400893736135.0
Medicines in cluster: 
R^2 Score: -51.876
MAE: 1886.336
MAPE: 0.918
RMSE: 2113.832


Data in test: 25
Medicine:3400893875490.0
Medicines in cluster: 
R^2 Score: -48.025
MAE: 5149.435
MAPE: 0.56
RMSE: 5245.646



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_1,CODE_UCD_3400893826706.0,-1.114786,763.419434,624.285526,0.204435,1.0
1,HOSPI_1,CODE_UCD_3400893736135.0,-51.875554,2113.832265,1886.335814,0.918217,1.0
2,HOSPI_1,CODE_UCD_3400893875490.0,-48.024869,5245.646064,5149.435308,0.559704,1.0


In [90]:
df_prediction_scores_h1_cluster_2 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h1_cluster_2 = test_2_clustering(df_h1_cluster_2, df_prediction_scores_h1_cluster_2, hospital = 'HOSPI_1')

df_prediction_scores_h1_cluster_2

Cluster: 2
Best Parameters: {'n_estimators': 10, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'max_depth': 28}
Best Score: 0.9829074704074202
Data in train: 100

Data in test: 25
Medicine:3400890837149.0
Medicines in cluster: 
R^2 Score: -232.244
MAE: 2106.618
MAPE: 0.724
RMSE: 3295.996


Data in test: 25
Medicine:3400891235203.0
Medicines in cluster: 
R^2 Score: -74.437
MAE: 4549.636
MAPE: 9.186927478955926e+17
RMSE: 5514.34


Data in test: 25
Medicine:3400891225037.0
Medicines in cluster: 
R^2 Score: -87.106
MAE: 12070.663
MAPE: 0.755
RMSE: 12367.135


Data in test: 25
Medicine:3400891191226.0
Medicines in cluster: 
R^2 Score: -156.749
MAE: 2468.811
MAPE: 0.905
RMSE: 3584.594



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_1,CODE_UCD_3400890837149.0,-232.243535,3295.995556,2106.618107,0.7239941,2.0
1,HOSPI_1,CODE_UCD_3400891235203.0,-74.436651,5514.339503,4549.635625,9.186927e+17,2.0
2,HOSPI_1,CODE_UCD_3400891225037.0,-87.106172,12367.134962,12070.662748,0.7546377,2.0
3,HOSPI_1,CODE_UCD_3400891191226.0,-156.748901,3584.593531,2468.810681,0.9053158,2.0


In [91]:
df_prediction_scores_h1_cluster_3 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h1_cluster_3 = test_2_clustering(df_h1_cluster_3, df_prediction_scores_h1_cluster_3, hospital = 'HOSPI_1')

df_prediction_scores_h1_cluster_3

Cluster: 3
Best Parameters: {'n_estimators': 56, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 8}
Best Score: 0.8214988142733833
Data in train: 200

Data in test: 25
Medicine:3400892729589.0
Medicines in cluster: 
R^2 Score: -1.117
MAE: 896.576
MAPE: 0.218
RMSE: 1067.643


Data in test: 25
Medicine:3400892745848.0
Medicines in cluster: 
R^2 Score: -3.629
MAE: 2366.804
MAPE: 0.355
RMSE: 2563.808


Data in test: 25
Medicine:3400892697789.0
Medicines in cluster: 
R^2 Score: -246.243
MAE: 2741.717
MAPE: 2.521
RMSE: 3372.178


Data in test: 25
Medicine:3400892761527.0
Medicines in cluster: 
R^2 Score: 0.144
MAE: 1098.155
MAPE: 0.604
RMSE: 1384.823


Data in test: 25
Medicine:3400893022634.0
Medicines in cluster: 
R^2 Score: -22.471
MAE: 1341.5
MAPE: 0.404
RMSE: 1742.392


Data in test: 25
Medicine:3400892761695.0
Medicines in cluster: 
R^2 Score: -330.385
MAE: 2852.756
MAPE: 1.684
RMSE: 3332.355


Data in test: 25
Medicine:3400892669236.0
Medicines in c

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_1,CODE_UCD_3400892729589.0,-1.117055,1067.643157,896.575749,0.2182333,3.0
1,HOSPI_1,CODE_UCD_3400892745848.0,-3.628924,2563.808263,2366.803796,0.355125,3.0
2,HOSPI_1,CODE_UCD_3400892697789.0,-246.242759,3372.177781,2741.716735,2.521108,3.0
3,HOSPI_1,CODE_UCD_3400892761527.0,0.143978,1384.822527,1098.155083,0.6039782,3.0
4,HOSPI_1,CODE_UCD_3400893022634.0,-22.471471,1742.391532,1341.500354,0.4042938,3.0
5,HOSPI_1,CODE_UCD_3400892761695.0,-330.38502,3332.355351,2852.756269,1.683773,3.0
6,HOSPI_1,CODE_UCD_3400892669236.0,0.36179,3642.245722,3360.944891,1.645985e+18,3.0
7,HOSPI_1,CODE_UCD_3400892508566.0,-2.604047,1540.872423,1232.6389,0.3512088,3.0


In [92]:
pd.concat([df_prediction_scores_h1_cluster_0,
           df_prediction_scores_h1_cluster_1,
           df_prediction_scores_h1_cluster_2,
           df_prediction_scores_h1_cluster_3])

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_1,CODE_UCD_3400892088310.0,-477.283674,7378.978626,4341.182232,1.708845,0.0
1,HOSPI_1,CODE_UCD_3400892075761.0,-3346.423312,9544.655685,5774.492165,3.963178,0.0
2,HOSPI_1,CODE_UCD_3400892203645.0,-3063.739162,9388.162907,5990.86211,4.269109,0.0
3,HOSPI_1,CODE_UCD_3400892065366.0,-681.49954,8098.539955,6221.055651,1.282704,0.0
4,HOSPI_1,CODE_UCD_3400892052120.0,-446.406609,7470.155736,4781.058749,1.651672,0.0
5,HOSPI_1,CODE_UCD_3400891996128.0,-114.786762,50350.650601,50039.47283,0.897843,0.0
0,HOSPI_1,CODE_UCD_3400893826706.0,-1.114786,763.419434,624.285526,0.2044352,1.0
1,HOSPI_1,CODE_UCD_3400893736135.0,-51.875554,2113.832265,1886.335814,0.9182171,1.0
2,HOSPI_1,CODE_UCD_3400893875490.0,-48.024869,5245.646064,5149.435308,0.5597039,1.0
0,HOSPI_1,CODE_UCD_3400890837149.0,-232.243535,3295.995556,2106.618107,0.7239941,2.0


In [93]:
df_prediction_scores_h2_cluster_0 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h2_cluster_0 = test_2_clustering(df_h2_cluster_0, df_prediction_scores_h2_cluster_0, hospital = 'HOSPI_2')

df_prediction_scores_h2_cluster_0

Cluster: 0
Best Parameters: {'n_estimators': 10, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_features': 'sqrt', 'max_depth': 28}
Best Score: 0.9753097730392384
Data in train: 150

Data in test: 25
Medicine:3400892088310.0
Medicines in cluster: 
R^2 Score: -206.474
MAE: 4091.766
MAPE: 1.365
RMSE: 5743.159


Data in test: 25
Medicine:3400892075761.0
Medicines in cluster: 
R^2 Score: -1040.586
MAE: 5936.128
MAPE: 2.708
RMSE: 7405.859


Data in test: 25
Medicine:3400892203645.0
Medicines in cluster: 
R^2 Score: -117.068
MAE: 4059.372
MAPE: 1.179
RMSE: 5439.571


Data in test: 25
Medicine:3400892065366.0
Medicines in cluster: 
R^2 Score: -2.912
MAE: 3392.25
MAPE: 0.568
RMSE: 4008.855


Data in test: 25
Medicine:3400892052120.0
Medicines in cluster: 
R^2 Score: -825.785
MAE: 4986.887
MAPE: 2.756
RMSE: 6555.883


Data in test: 25
Medicine:3400891996128.0
Medicines in cluster: 
R^2 Score: -35.896
MAE: 63694.417
MAPE: 0.903
RMSE: 64499.429



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_2,CODE_UCD_3400892088310.0,-206.473885,5743.158914,4091.765527,1.365308,0.0
1,HOSPI_2,CODE_UCD_3400892075761.0,-1040.585869,7405.858859,5936.127637,2.70834,0.0
2,HOSPI_2,CODE_UCD_3400892203645.0,-117.068069,5439.570585,4059.372257,1.179043,0.0
3,HOSPI_2,CODE_UCD_3400892065366.0,-2.912334,4008.854695,3392.250098,0.568238,0.0
4,HOSPI_2,CODE_UCD_3400892052120.0,-825.785295,6555.883369,4986.887487,2.756359,0.0
5,HOSPI_2,CODE_UCD_3400891996128.0,-35.895586,64499.429472,63694.41738,0.903116,0.0


In [94]:
df_prediction_scores_h2_cluster_1 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h2_cluster_1 = test_2_clustering(df_h2_cluster_1, df_prediction_scores_h2_cluster_1, hospital = 'HOSPI_2')

df_prediction_scores_h2_cluster_1

Cluster: 1
Best Parameters: {'n_estimators': 178, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 28}
Best Score: 0.97664629840971
Data in train: 75

Data in test: 25
Medicine:3400893826706.0
Medicines in cluster: 
R^2 Score: -61.444
MAE: 2335.015
MAPE: 1.252
RMSE: 2507.545


Data in test: 25
Medicine:3400893736135.0
Medicines in cluster: 
R^2 Score: -5.457
MAE: 785.183
MAPE: 0.195
RMSE: 930.578


Data in test: 25
Medicine:3400893875490.0
Medicines in cluster: 
R^2 Score: -115.181
MAE: 4512.173
MAPE: 0.515
RMSE: 4578.947



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_2,CODE_UCD_3400893826706.0,-61.443897,2507.544752,2335.015377,1.25173,1.0
1,HOSPI_2,CODE_UCD_3400893736135.0,-5.457164,930.578468,785.183434,0.194941,1.0
2,HOSPI_2,CODE_UCD_3400893875490.0,-115.181133,4578.947073,4512.173459,0.51493,1.0


In [95]:
df_prediction_scores_h2_cluster_2 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h2_cluster_2 = test_2_clustering(df_h2_cluster_2, df_prediction_scores_h2_cluster_2, hospital = 'HOSPI_2')

df_prediction_scores_h2_cluster_2

Cluster: 2
Best Parameters: {'n_estimators': 138, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 8}
Best Score: 0.9460215783332977
Data in train: 100

Data in test: 25
Medicine:3400890837149.0
Medicines in cluster: 
R^2 Score: -83.912
MAE: 2466.133
MAPE: 0.893
RMSE: 3560.456


Data in test: 25
Medicine:3400891235203.0
Medicines in cluster: 
R^2 Score: -1417.924
MAE: 4164.944
MAPE: 2.979
RMSE: 4964.465


Data in test: 25
Medicine:3400891225037.0
Medicines in cluster: 
R^2 Score: -11.905
MAE: 10916.182
MAPE: 0.695
RMSE: 11236.196


Data in test: 25
Medicine:3400891191226.0
Medicines in cluster: 
R^2 Score: -294.542
MAE: 2371.172
MAPE: 0.817
RMSE: 3610.02



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_2,CODE_UCD_3400890837149.0,-83.912044,3560.455745,2466.132833,0.892539,2.0
1,HOSPI_2,CODE_UCD_3400891235203.0,-1417.924094,4964.464506,4164.943564,2.979403,2.0
2,HOSPI_2,CODE_UCD_3400891225037.0,-11.905475,11236.19594,10916.182358,0.695361,2.0
3,HOSPI_2,CODE_UCD_3400891191226.0,-294.542482,3610.019569,2371.172359,0.81678,2.0


In [96]:
df_prediction_scores_h2_cluster_3 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h2_cluster_3 = test_2_clustering(df_h2_cluster_3, df_prediction_scores_h2_cluster_3, hospital = 'HOSPI_2')

df_prediction_scores_h2_cluster_3

Cluster: 3
Best Parameters: {'n_estimators': 148, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 30}
Best Score: 0.9729852726485635
Data in train: 200

Data in test: 25
Medicine:3400892729589.0
Medicines in cluster: 
R^2 Score: -25.948
MAE: 2333.374
MAPE: 0.474
RMSE: 3118.241


Data in test: 25
Medicine:3400892745848.0
Medicines in cluster: 
R^2 Score: -27.604
MAE: 3006.301
MAPE: 0.75
RMSE: 3706.264


Data in test: 25
Medicine:3400892697789.0
Medicines in cluster: 
R^2 Score: -592.205
MAE: 5826.799
MAPE: 8.606
RMSE: 6243.07


Data in test: 25
Medicine:3400892761527.0
Medicines in cluster: 
R^2 Score: -28.077
MAE: 5351.145
MAPE: 0.443
RMSE: 5736.16


Data in test: 25
Medicine:3400893022634.0
Medicines in cluster: 
R^2 Score: -15.862
MAE: 1992.339
MAPE: 0.38
RMSE: 2655.257


Data in test: 25
Medicine:3400892761695.0
Medicines in cluster: 
R^2 Score: -70.745
MAE: 8482.561
MAPE: 0.549
RMSE: 8735.531


Data in test: 25
Medicine:3400892669236.0
Medicines 

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_2,CODE_UCD_3400892729589.0,-25.948286,3118.240797,2333.373839,0.4742084,3.0
1,HOSPI_2,CODE_UCD_3400892745848.0,-27.603666,3706.264367,3006.300924,0.7498831,3.0
2,HOSPI_2,CODE_UCD_3400892697789.0,-592.204681,6243.069788,5826.799247,8.605797,3.0
3,HOSPI_2,CODE_UCD_3400892761527.0,-28.076841,5736.160299,5351.145301,0.4426493,3.0
4,HOSPI_2,CODE_UCD_3400893022634.0,-15.862374,2655.256834,1992.339303,0.3795602,3.0
5,HOSPI_2,CODE_UCD_3400892761695.0,-70.744543,8735.530542,8482.560913,0.5488819,3.0
6,HOSPI_2,CODE_UCD_3400892669236.0,-10.846693,2720.648321,2418.78114,0.3033603,3.0
7,HOSPI_2,CODE_UCD_3400892508566.0,-74738.997343,7258.698818,6665.422317,9.02462e+18,3.0


In [97]:
pd.concat([df_prediction_scores_h2_cluster_0,
           df_prediction_scores_h2_cluster_1,
           df_prediction_scores_h2_cluster_2,
           df_prediction_scores_h2_cluster_3])

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_2,CODE_UCD_3400892088310.0,-206.473885,5743.158914,4091.765527,1.365308,0.0
1,HOSPI_2,CODE_UCD_3400892075761.0,-1040.585869,7405.858859,5936.127637,2.70834,0.0
2,HOSPI_2,CODE_UCD_3400892203645.0,-117.068069,5439.570585,4059.372257,1.179043,0.0
3,HOSPI_2,CODE_UCD_3400892065366.0,-2.912334,4008.854695,3392.250098,0.5682379,0.0
4,HOSPI_2,CODE_UCD_3400892052120.0,-825.785295,6555.883369,4986.887487,2.756359,0.0
5,HOSPI_2,CODE_UCD_3400891996128.0,-35.895586,64499.429472,63694.41738,0.9031161,0.0
0,HOSPI_2,CODE_UCD_3400893826706.0,-61.443897,2507.544752,2335.015377,1.25173,1.0
1,HOSPI_2,CODE_UCD_3400893736135.0,-5.457164,930.578468,785.183434,0.1949411,1.0
2,HOSPI_2,CODE_UCD_3400893875490.0,-115.181133,4578.947073,4512.173459,0.5149302,1.0
0,HOSPI_2,CODE_UCD_3400890837149.0,-83.912044,3560.455745,2466.132833,0.892539,2.0


In [101]:
df_prediction_scores_unified_cluster = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_unified_cluster = test_2_clustering(pd.concat([df_h1,
                                                                   df_h2,
                                                                   df_h3,
                                                                   df_h4]), df_prediction_scores_unified_cluster, hospital = '-', unified = True)

df_prediction_scores_unified_cluster

Cluster: 2
Best Parameters: {'n_estimators': 148, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 30}
Best Score: 0.9833660752083169
Data in train: 1680

Data in test: 23
Medicine:3400892697789.0
Medicines in cluster: 
R^2 Score: -1.468
MAE: 4275.202
MAPE: 1.59
RMSE: 6923.962


Data in test: 17
Medicine:3400891235203.0
Medicines in cluster: 
R^2 Score: -27.962
MAE: 6432.755
MAPE: 3.182589599574867e+17
RMSE: 12668.9


Data in test: 28
Medicine:3400893022634.0
Medicines in cluster: 
R^2 Score: -40.473
MAE: 7670.671
MAPE: 2.9334176744656717e+18
RMSE: 9571.901


Data in test: 22
Medicine:3400893875490.0
Medicines in cluster: 
R^2 Score: -0.268
MAE: 4583.073
MAPE: 0.459
RMSE: 5223.74


Data in test: 16
Medicine:3400892075761.0
Medicines in cluster: 
R^2 Score: -9.395
MAE: 5943.926
MAPE: 1.029
RMSE: 11136.308


Data in test: 25
Medicine:3400892761527.0
Medicines in cluster: 
R^2 Score: 0.771
MAE: 4215.898
MAPE: 0.432
RMSE: 4801.525


Data in test: 26
Medic

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,-,CODE_UCD_3400892697789.0,-1.468257,6923.962269,4275.20168,1.589802,2.0
1,-,CODE_UCD_3400891235203.0,-27.962264,12668.899578,6432.7548,3.18259e+17,2.0
2,-,CODE_UCD_3400893022634.0,-40.473136,9571.900964,7670.671071,2.933418e+18,2.0
3,-,CODE_UCD_3400893875490.0,-0.267955,5223.739982,4583.073336,0.4590597,2.0
4,-,CODE_UCD_3400892075761.0,-9.394848,11136.308055,5943.925722,1.028725,2.0
5,-,CODE_UCD_3400892761527.0,0.771023,4801.524895,4215.898169,0.4322115,2.0
6,-,CODE_UCD_3400891225037.0,-2.836753,10523.568298,9116.297142,0.5819679,2.0
7,-,CODE_UCD_3400892065366.0,-1.006592,7327.562115,4423.63981,0.4767596,2.0
8,-,CODE_UCD_3400892761695.0,0.780427,4790.360542,3687.772425,0.3161963,2.0
9,-,CODE_UCD_3400893736135.0,-9.22245,8201.037279,5815.2152,0.9440176,2.0


In [102]:
df_prediction_scores_h3_cluster_0 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h3_cluster_0 = test_2_clustering(df_h3_cluster_0, df_prediction_scores_h3_cluster_0, hospital = 'HOSPI_3')

df_prediction_scores_h3_cluster_0

Cluster: 0
Best Parameters: {'n_estimators': 34, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_features': 'log2', 'max_depth': 8}
Best Score: 0.9920245124885779
Data in train: 150

Data in test: 25
Medicine:3400892088310.0
Medicines in cluster: 
R^2 Score: -940.818
MAE: 8270.865
MAPE: 1.787
RMSE: 12420.958


Data in test: 25
Medicine:3400892075761.0
Medicines in cluster: 
R^2 Score: -1512.642
MAE: 9118.652
MAPE: 3.273
RMSE: 17271.288


Data in test: 25
Medicine:3400892203645.0
Medicines in cluster: 
R^2 Score: -233.77
MAE: 6086.587
MAPE: 1.114
RMSE: 10981.109


Data in test: 25
Medicine:3400892065366.0
Medicines in cluster: 
R^2 Score: -522.791
MAE: 7278.668
MAPE: 0.949
RMSE: 13529.157


Data in test: 25
Medicine:3400892052120.0
Medicines in cluster: 
R^2 Score: -4315.362
MAE: 11678.059
MAPE: 7.806
RMSE: 16033.707


Data in test: 25
Medicine:3400891996128.0
Medicines in cluster: 
R^2 Score: -268.358
MAE: 88095.85
MAPE: 0.861
RMSE: 89480.55



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_3,CODE_UCD_3400892088310.0,-940.817502,12420.958134,8270.864872,1.786761,0.0
1,HOSPI_3,CODE_UCD_3400892075761.0,-1512.641816,17271.287988,9118.652174,3.272517,0.0
2,HOSPI_3,CODE_UCD_3400892203645.0,-233.770124,10981.108938,6086.58665,1.113506,0.0
3,HOSPI_3,CODE_UCD_3400892065366.0,-522.791351,13529.157271,7278.668197,0.948733,0.0
4,HOSPI_3,CODE_UCD_3400892052120.0,-4315.3618,16033.706568,11678.059461,7.805917,0.0
5,HOSPI_3,CODE_UCD_3400891996128.0,-268.358239,89480.549506,88095.850019,0.861458,0.0


In [103]:
df_prediction_scores_h3_cluster_1 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h3_cluster_1 = test_2_clustering(df_h3_cluster_1, df_prediction_scores_h3_cluster_1, hospital = 'HOSPI_3')

df_prediction_scores_h3_cluster_1

Cluster: 1
Best Parameters: {'n_estimators': 104, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 22}
Best Score: 0.9414717998745867
Data in train: 75

Data in test: 25
Medicine:3400893826706.0
Medicines in cluster: 
R^2 Score: -0.083
MAE: 823.482
MAPE: 0.106
RMSE: 1088.439


Data in test: 25
Medicine:3400893736135.0
Medicines in cluster: 
R^2 Score: -26.502
MAE: 2551.865
MAPE: 0.505
RMSE: 2814.596


Data in test: 25
Medicine:3400893875490.0
Medicines in cluster: 
R^2 Score: -41.005
MAE: 5891.528
MAPE: 0.428
RMSE: 5976.656



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_3,CODE_UCD_3400893826706.0,-0.083035,1088.439264,823.482469,0.106346,1.0
1,HOSPI_3,CODE_UCD_3400893736135.0,-26.502234,2814.595543,2551.864909,0.504736,1.0
2,HOSPI_3,CODE_UCD_3400893875490.0,-41.005213,5976.656068,5891.527626,0.427825,1.0


In [104]:
df_prediction_scores_h3_cluster_2 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h3_cluster_2 = test_2_clustering(df_h3_cluster_2, df_prediction_scores_h3_cluster_2, hospital = 'HOSPI_3')

df_prediction_scores_h3_cluster_2

Cluster: 2
Best Parameters: {'n_estimators': 108, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 18}
Best Score: 0.9586569821339985
Data in train: 100

Data in test: 25
Medicine:3400890837149.0
Medicines in cluster: 
R^2 Score: -28.823
MAE: 1717.682
MAPE: 0.429
RMSE: 2001.111


Data in test: 25
Medicine:3400891235203.0
Medicines in cluster: 
R^2 Score: -76.699
MAE: 3812.345
MAPE: 4.964312476115745e+17
RMSE: 3981.132


Data in test: 25
Medicine:3400891225037.0
Medicines in cluster: 
R^2 Score: -23.337
MAE: 5739.586
MAPE: 0.544
RMSE: 5939.555


Data in test: 25
Medicine:3400891191226.0
Medicines in cluster: 
R^2 Score: -7.615
MAE: 1445.734
MAPE: 0.235
RMSE: 1764.229



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_3,CODE_UCD_3400890837149.0,-28.822899,2001.111482,1717.681853,0.4285763,2.0
1,HOSPI_3,CODE_UCD_3400891235203.0,-76.698922,3981.13194,3812.344709,4.964312e+17,2.0
2,HOSPI_3,CODE_UCD_3400891225037.0,-23.33699,5939.555264,5739.586405,0.5440071,2.0
3,HOSPI_3,CODE_UCD_3400891191226.0,-7.614546,1764.229152,1445.734439,0.2353143,2.0


In [105]:
df_prediction_scores_h3_cluster_3 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h3_cluster_3 = test_2_clustering(df_h3_cluster_3, df_prediction_scores_h3_cluster_3, hospital = 'HOSPI_3')

df_prediction_scores_h3_cluster_3

Cluster: 3
Best Parameters: {'n_estimators': 108, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 18}
Best Score: 0.9881804681283658
Data in train: 200

Data in test: 25
Medicine:3400892729589.0
Medicines in cluster: 
R^2 Score: -175.711
MAE: 4858.895
MAPE: 0.703
RMSE: 6947.796


Data in test: 25
Medicine:3400892745848.0
Medicines in cluster: 
R^2 Score: -6794.243
MAE: 9893.408
MAPE: 22.119
RMSE: 11964.086


Data in test: 25
Medicine:3400892697789.0
Medicines in cluster: 
R^2 Score: -1002.328
MAE: 8659.569
MAPE: 4.813
RMSE: 10820.14


Data in test: 25
Medicine:3400892761527.0
Medicines in cluster: 
R^2 Score: -65.555
MAE: 17619.631
MAPE: 0.617
RMSE: 18226.462


Data in test: 25
Medicine:3400893022634.0
Medicines in cluster: 
R^2 Score: -623.091
MAE: 8252.245
MAPE: 2.308
RMSE: 9902.337


Data in test: 25
Medicine:3400892761695.0
Medicines in cluster: 
R^2 Score: -163.463
MAE: 15903.91
MAPE: 0.616
RMSE: 16757.806


Data in test: 25
Medicine:34008926692

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_3,CODE_UCD_3400892729589.0,-175.711275,6947.795734,4858.8951,0.703358,3.0
1,HOSPI_3,CODE_UCD_3400892745848.0,-6794.243414,11964.086101,9893.407625,22.118565,3.0
2,HOSPI_3,CODE_UCD_3400892697789.0,-1002.328428,10820.139761,8659.569299,4.813101,3.0
3,HOSPI_3,CODE_UCD_3400892761527.0,-65.555394,18226.462214,17619.631315,0.617069,3.0
4,HOSPI_3,CODE_UCD_3400893022634.0,-623.090856,9902.337229,8252.244899,2.308218,3.0
5,HOSPI_3,CODE_UCD_3400892761695.0,-163.463054,16757.806499,15903.910379,0.616456,3.0
6,HOSPI_3,CODE_UCD_3400892669236.0,-57.257662,5664.472019,4789.622075,0.461434,3.0
7,HOSPI_3,CODE_UCD_3400892508566.0,-91.968553,7621.271334,5652.91353,1.160583,3.0


In [106]:
pd.concat([df_prediction_scores_h3_cluster_0,
           df_prediction_scores_h3_cluster_1,
           df_prediction_scores_h3_cluster_2,
           df_prediction_scores_h3_cluster_3])

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_3,CODE_UCD_3400892088310.0,-940.817502,12420.958134,8270.864872,1.786761,0.0
1,HOSPI_3,CODE_UCD_3400892075761.0,-1512.641816,17271.287988,9118.652174,3.272517,0.0
2,HOSPI_3,CODE_UCD_3400892203645.0,-233.770124,10981.108938,6086.58665,1.113506,0.0
3,HOSPI_3,CODE_UCD_3400892065366.0,-522.791351,13529.157271,7278.668197,0.9487332,0.0
4,HOSPI_3,CODE_UCD_3400892052120.0,-4315.3618,16033.706568,11678.059461,7.805917,0.0
5,HOSPI_3,CODE_UCD_3400891996128.0,-268.358239,89480.549506,88095.850019,0.8614581,0.0
0,HOSPI_3,CODE_UCD_3400893826706.0,-0.083035,1088.439264,823.482469,0.1063459,1.0
1,HOSPI_3,CODE_UCD_3400893736135.0,-26.502234,2814.595543,2551.864909,0.504736,1.0
2,HOSPI_3,CODE_UCD_3400893875490.0,-41.005213,5976.656068,5891.527626,0.4278247,1.0
0,HOSPI_3,CODE_UCD_3400890837149.0,-28.822899,2001.111482,1717.681853,0.4285763,2.0


In [107]:
df_prediction_scores_h4_cluster_0 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h4_cluster_0 = test_2_clustering(df_h4_cluster_0, df_prediction_scores_h4_cluster_0, hospital = 'HOSPI_4')

df_prediction_scores_h4_cluster_0

Cluster: 0
Best Parameters: {'n_estimators': 2, 'min_samples_split': 8, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 18}
Best Score: 0.9779564431605948
Data in train: 150

Data in test: 25
Medicine:3400892088310.0
Medicines in cluster: 
R^2 Score: -793.197
MAE: 30784.746
MAPE: 4.139
RMSE: 54086.906


Data in test: 25
Medicine:3400892075761.0
Medicines in cluster: 
R^2 Score: -1463.068
MAE: 23450.636
MAPE: 2.389
RMSE: 45667.148


Data in test: 25
Medicine:3400892203645.0
Medicines in cluster: 
R^2 Score: -1211.729
MAE: 19961.044
MAPE: 1.962
RMSE: 43372.627


Data in test: 25
Medicine:3400892065366.0
Medicines in cluster: 
R^2 Score: -261.276
MAE: 21588.206
MAPE: 1.05
RMSE: 36647.849


Data in test: 25
Medicine:3400892052120.0
Medicines in cluster: 
R^2 Score: -1146.726
MAE: 23890.13
MAPE: 2.54
RMSE: 46626.373


Data in test: 25
Medicine:3400891996128.0
Medicines in cluster: 
R^2 Score: -51.602
MAE: 169171.141
MAPE: 0.834
RMSE: 173996.375



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_4,CODE_UCD_3400892088310.0,-793.196838,54086.906267,30784.746349,4.138589,0.0
1,HOSPI_4,CODE_UCD_3400892075761.0,-1463.067738,45667.147504,23450.635513,2.388631,0.0
2,HOSPI_4,CODE_UCD_3400892203645.0,-1211.728736,43372.627497,19961.044452,1.962426,0.0
3,HOSPI_4,CODE_UCD_3400892065366.0,-261.276164,36647.849449,21588.20583,1.049966,0.0
4,HOSPI_4,CODE_UCD_3400892052120.0,-1146.726237,46626.372702,23890.130238,2.539727,0.0
5,HOSPI_4,CODE_UCD_3400891996128.0,-51.601893,173996.375085,169171.141361,0.834,0.0


In [108]:
df_prediction_scores_h4_cluster_1 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h4_cluster_1 = test_2_clustering(df_h4_cluster_1, df_prediction_scores_h4_cluster_1, hospital = 'HOSPI_4')

df_prediction_scores_h4_cluster_1

Cluster: 1
Best Parameters: {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 28}
Best Score: 0.9271866581146281
Data in train: 75

Data in test: 25
Medicine:3400893826706.0
Medicines in cluster: 
R^2 Score: -10.313
MAE: 3375.687
MAPE: 0.629
RMSE: 3847.141


Data in test: 25
Medicine:3400893736135.0
Medicines in cluster: 
R^2 Score: -2.769
MAE: 2106.105
MAPE: 0.299
RMSE: 2698.462


Data in test: 25
Medicine:3400893875490.0
Medicines in cluster: 
R^2 Score: -23.346
MAE: 13093.958
MAPE: 0.601
RMSE: 13312.359



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_4,CODE_UCD_3400893826706.0,-10.312633,3847.141367,3375.686537,0.628714,1.0
1,HOSPI_4,CODE_UCD_3400893736135.0,-2.768565,2698.461738,2106.105242,0.298784,1.0
2,HOSPI_4,CODE_UCD_3400893875490.0,-23.345997,13312.358952,13093.958282,0.600997,1.0


In [109]:
df_prediction_scores_h4_cluster_2 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h4_cluster_2 = test_2_clustering(df_h4_cluster_2, df_prediction_scores_h4_cluster_2, hospital = 'HOSPI_4')

df_prediction_scores_h4_cluster_2

Cluster: 2
Best Parameters: {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 28}
Best Score: 0.9120003712097511
Data in train: 100

Data in test: 25
Medicine:3400890837149.0
Medicines in cluster: 
R^2 Score: -3.645
MAE: 3468.44
MAPE: 0.204
RMSE: 4323.399


Data in test: 25
Medicine:3400891235203.0
Medicines in cluster: 
R^2 Score: -51.148
MAE: 8581.887
MAPE: 1.497
RMSE: 9290.452


Data in test: 25
Medicine:3400891225037.0
Medicines in cluster: 
R^2 Score: -17.569
MAE: 11105.649
MAPE: 0.436
RMSE: 11682.366


Data in test: 25
Medicine:3400891191226.0
Medicines in cluster: 
R^2 Score: -36.582
MAE: 5056.711
MAPE: 0.544
RMSE: 6537.855



Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_4,CODE_UCD_3400890837149.0,-3.645075,4323.399261,3468.439965,0.204148,2.0
1,HOSPI_4,CODE_UCD_3400891235203.0,-51.148283,9290.452118,8581.886514,1.496975,2.0
2,HOSPI_4,CODE_UCD_3400891225037.0,-17.568712,11682.365766,11105.648817,0.43592,2.0
3,HOSPI_4,CODE_UCD_3400891191226.0,-36.582244,6537.855495,5056.711216,0.544318,2.0


In [110]:
df_prediction_scores_h4_cluster_3 = pd.DataFrame(columns=['ID_SITE_RATTACHE', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_h4_cluster_3 = test_2_clustering(df_h4_cluster_3, df_prediction_scores_h4_cluster_3, hospital = 'HOSPI_4')

df_prediction_scores_h4_cluster_3

Cluster: 3
Best Parameters: {'n_estimators': 56, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 8}
Best Score: 0.8864955237961647
Data in train: 200

Data in test: 25
Medicine:3400892729589.0
Medicines in cluster: 
R^2 Score: -3.682
MAE: 5187.459
MAPE: 0.297
RMSE: 6162.806


Data in test: 25
Medicine:3400892745848.0
Medicines in cluster: 
R^2 Score: -0.597
MAE: 2784.172
MAPE: 0.242
RMSE: 3400.082


Data in test: 25
Medicine:3400892697789.0
Medicines in cluster: 
R^2 Score: -6.935
MAE: 3093.146
MAPE: 0.252
RMSE: 3770.548


Data in test: 25
Medicine:3400892761527.0
Medicines in cluster: 
R^2 Score: -23.713
MAE: 5916.309
MAPE: 0.951
RMSE: 6867.075


Data in test: 25
Medicine:3400893022634.0
Medicines in cluster: 
R^2 Score: -106.477
MAE: 9714.936
MAPE: 2.522027085247644e+19
RMSE: 10489.84


Data in test: 25
Medicine:3400892761695.0
Medicines in cluster: 
R^2 Score: -15.7
MAE: 4357.378
MAPE: 0.492
RMSE: 5534.493


Data in test: 25
Medicine:3400892669236

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_4,CODE_UCD_3400892729589.0,-3.682085,6162.805731,5187.458621,0.2967653,3.0
1,HOSPI_4,CODE_UCD_3400892745848.0,-0.597281,3400.082003,2784.171521,0.241576,3.0
2,HOSPI_4,CODE_UCD_3400892697789.0,-6.93456,3770.548399,3093.145785,0.2515874,3.0
3,HOSPI_4,CODE_UCD_3400892761527.0,-23.713393,6867.074578,5916.309262,0.951342,3.0
4,HOSPI_4,CODE_UCD_3400893022634.0,-106.476843,10489.84044,9714.935568,2.522027e+19,3.0
5,HOSPI_4,CODE_UCD_3400892761695.0,-15.699879,5534.493036,4357.378151,0.4921969,3.0
6,HOSPI_4,CODE_UCD_3400892669236.0,-7.529451,13900.670991,13496.362494,0.5332991,3.0
7,HOSPI_4,CODE_UCD_3400892508566.0,-25.796812,5936.582533,4627.137446,0.6406228,3.0


In [111]:
pd.concat([df_prediction_scores_h4_cluster_0,
           df_prediction_scores_h4_cluster_1,
           df_prediction_scores_h4_cluster_2,
           df_prediction_scores_h4_cluster_3])

Unnamed: 0,ID_SITE_RATTACHE,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE,CLUSTER
0,HOSPI_4,CODE_UCD_3400892088310.0,-793.196838,54086.906267,30784.746349,4.138589,0.0
1,HOSPI_4,CODE_UCD_3400892075761.0,-1463.067738,45667.147504,23450.635513,2.388631,0.0
2,HOSPI_4,CODE_UCD_3400892203645.0,-1211.728736,43372.627497,19961.044452,1.962426,0.0
3,HOSPI_4,CODE_UCD_3400892065366.0,-261.276164,36647.849449,21588.20583,1.049966,0.0
4,HOSPI_4,CODE_UCD_3400892052120.0,-1146.726237,46626.372702,23890.130238,2.539727,0.0
5,HOSPI_4,CODE_UCD_3400891996128.0,-51.601893,173996.375085,169171.141361,0.8339999,0.0
0,HOSPI_4,CODE_UCD_3400893826706.0,-10.312633,3847.141367,3375.686537,0.6287139,1.0
1,HOSPI_4,CODE_UCD_3400893736135.0,-2.768565,2698.461738,2106.105242,0.298784,1.0
2,HOSPI_4,CODE_UCD_3400893875490.0,-23.345997,13312.358952,13093.958282,0.6009966,1.0
0,HOSPI_4,CODE_UCD_3400890837149.0,-3.645075,4323.399261,3468.439965,0.2041481,2.0
