<a href="https://colab.research.google.com/github/douglasmmachado/MedicineConsumption/blob/master/notebooks/unified_approach/5_Forecasting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 5 - Forecasting and prediction



---



---



In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns
import math as m

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

from sklearn.tree import export_graphviz
from subprocess import call
from IPython.display import Image

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import RandomizedSearchCV



df_url ="https://raw.githubusercontent.com/douglasmmachado/MedicineConsumption/master/datasets/unified_approach/clustered/df_clustered.csv"

df = pd.read_csv(df_url)
df['YEAR'] = df['YEAR'].astype(int)
df['MONTH'] = df['MONTH'].astype(int)
df['DATE'] = pd.to_datetime(df['YEAR'].astype(str) + '-' + df['MONTH'].astype(str), format='%Y-%m')

In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2100 entries, 0 to 2099
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   CODE_ATC          2100 non-null   int64         
 1   DATE              2100 non-null   datetime64[ns]
 2   HOSPI_CODE_UCD    2100 non-null   int64         
 3   ID_SITE_RATTACHE  2100 non-null   object        
 4   LIT_HC            2100 non-null   float64       
 5   LIT_HP            2100 non-null   float64       
 6   MONTH             2100 non-null   int64         
 7   N_UFS             2100 non-null   float64       
 8   PN_MEDICAL        2100 non-null   float64       
 9   POPULATION        2100 non-null   float64       
 10  P_MEDICAL         2100 non-null   float64       
 11  QUANTITY          2100 non-null   float64       
 12  QUANTITY_MA       2100 non-null   float64       
 13  SEJ_MCO           2100 non-null   float64       
 14  SEJ_PSY           2100 n

## 5.1 - New database composition based on clusters

In [3]:
df_cluster_0 = df[df['CLUSTER'] == 0].copy()
df_cluster_1 = df[df['CLUSTER'] == 1].copy()
df_cluster_2 = df[df['CLUSTER'] == 2].copy()
df_cluster_3 = df[df['CLUSTER'] == 3].copy()

## 5.2 - Building forecasting models based on clusters

In [4]:
def plot_scatter(medicines, y_test, y_pred):
    plt.figure(figsize=(12, 6))
    plt.scatter(medicines, y_test, s=20, color='red', label='Test')
    plt.scatter(medicines, y_pred, s=20, color='blue', label='Pred')
    plt.xlabel('HOSPI_CODE_UCD')
    plt.ylabel('QUANTITY')
    plt.title(f'Scatter plot of ground truth and prediction per medicine')
    plt.xticks(rotation=90)
    plt.grid()
    plt.legend(title="Categories")
    plt.show()

In [5]:
def visualize_decision_tree(regr, feature_names, estimator_index = 5):
    estimator = regr.estimators_[estimator_index]

    # Export as dot file
    export_graphviz(estimator,
                    out_file='tree.dot',
                    feature_names=feature_names,
                    rounded=True,
                    precision=2,
                    filled=True)

    # Convert to png
    call(['dot', '-Tpng', 'tree.dot', '-o', 'tree.png', '-Gdpi=600'])

    # Display in Jupyter Notebook
    return Image(filename='tree.png')

In [6]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import RandomizedSearchCV

def predict_medicine_consumption_cluster(df, df_scores, hospital='-', cluster='-'):
  df = df.fillna(0)
  X = df.drop(['QUANTITY', 'DATE', 'WEEK', 'CLUSTER', 'ID_SITE_RATTACHE'], axis=1).copy().values
  y = df['QUANTITY'].copy().values

  # Split the data into training and testing sets
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle = True)

  # Define the parameter distributions for RandomizedSearchCV
  param_distributions = {
      'max_depth': np.arange(2, 31, 2),
      'n_estimators': np.arange(2, 201, 2),
      'max_features': ['sqrt', 'log2'],
      'min_samples_split': np.arange(2, 11, 2),
      'min_samples_leaf': np.arange(2, 5, 1)
  }

  # Create the RandomizedSearchCV object
  randomized_search = RandomizedSearchCV(estimator=RandomForestRegressor(random_state=42),
                                          param_distributions=param_distributions,
                                          n_iter=100,
                                          cv=5,
                                          random_state=42)

  # Fit the RandomizedSearchCV object to the training data
  randomized_search.fit(X_train, y_train)

  # Get the best estimator
  best_estimator = randomized_search.best_estimator_

  # Make predictions using the best estimator
  y_pred = best_estimator.predict(X_test)

  # Calculate R^2 score
  r2 = r2_score(y_test, y_pred)

  # Calculate MAE
  mae = mean_absolute_error(y_test, y_pred)

  # Calculate RMSE
  rmse = np.sqrt(mean_squared_error(y_test, y_pred))

  for medicine in df.HOSPI_CODE_UCD.unique():
    # Create the new row as a DataFrame
    new_row = pd.DataFrame({'ID_SITE_RATTACHE': [hospital],
                            'CLUSTER': [cluster],
                            'HOSPI_CODE_UCD': [medicine],
                            'R2': [r2],
                            'RMSE': [rmse],
                            'MAE': [mae]})

    # Append the new row to the DataFrame
    df_scores = pd.concat([df_scores, new_row], ignore_index=True)

  # Print the best parameters, best score, and evaluation metrics
  print(f'Hospital: {hospital} Cluster: {cluster}')
  print('Best Parameters:', randomized_search.best_params_)
  print('Best Score:', randomized_search.best_score_)
  print('R^2 Score:', r2)
  print('MAE:', mae)
  print('RMSE:', rmse)
  print()

  feature_names = df.drop(['QUANTITY', 'DATE', 'WEEK', 'CLUSTER', 'ID_SITE_RATTACHE'],axis=1).copy().columns
  visualize_decision_tree(best_estimator, feature_names)

  return df_scores

In [7]:
def predict_cluster(df, cluster, hospital = '-'):
  medicines = df.HOSPI_CODE_UCD.unique()

  print(f'Medicines in cluster {cluster}: ')
  print()

  for medicine in medicines:
    print(f'--- {medicine}')
  print()

  df_scores = pd.DataFrame(columns=['ID_SITE_RATTACHE','CLUSTER', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE'])
  df_scores = predict_medicine_consumption_cluster(df, df_scores, hospital= hospital, cluster = cluster)
  return df_scores

### Cluster 0

In [8]:
df_scores_cluster_0 = predict_cluster(df_cluster_0, cluster = '0')
df_scores_cluster_0

Medicines in cluster 0: 

--- 3400891996128
--- 3400892052120
--- 3400892065366
--- 3400892075761
--- 3400892088310
--- 3400892203645

Hospital: - Cluster: 0
Best Parameters: {'n_estimators': 188, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 20}
Best Score: 0.9082149818669404
R^2 Score: 0.8477671660393529
MAE: 2489.711366362302
RMSE: 9986.711561329657



Unnamed: 0,ID_SITE_RATTACHE,CLUSTER,HOSPI_CODE_UCD,R2,RMSE,MAE
0,-,0,3400891996128,0.847767,9986.711561,2489.711366
1,-,0,3400892052120,0.847767,9986.711561,2489.711366
2,-,0,3400892065366,0.847767,9986.711561,2489.711366
3,-,0,3400892075761,0.847767,9986.711561,2489.711366
4,-,0,3400892088310,0.847767,9986.711561,2489.711366
5,-,0,3400892203645,0.847767,9986.711561,2489.711366


### Cluster 1


In [9]:
df_scores_cluster_1 = predict_cluster(df_cluster_1, cluster = '1')
df_scores_cluster_1

Medicines in cluster 1: 

--- 3400893736135
--- 3400893826706
--- 3400893875490

Hospital: - Cluster: 1
Best Parameters: {'n_estimators': 104, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 22}
Best Score: 0.751674159370586
R^2 Score: 0.8636821222678303
MAE: 1415.2233314090856
RMSE: 2024.8598957336972



Unnamed: 0,ID_SITE_RATTACHE,CLUSTER,HOSPI_CODE_UCD,R2,RMSE,MAE
0,-,1,3400893736135,0.863682,2024.859896,1415.223331
1,-,1,3400893826706,0.863682,2024.859896,1415.223331
2,-,1,3400893875490,0.863682,2024.859896,1415.223331


### Cluster 2

In [10]:
df_scores_cluster_2 = predict_cluster(df_cluster_2, cluster = '2')
df_scores_cluster_2

Medicines in cluster 2: 

--- 3400890837149
--- 3400891191226
--- 3400891225037
--- 3400891235203

Hospital: - Cluster: 2
Best Parameters: {'n_estimators': 104, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 22}
Best Score: 0.9212369507244033
R^2 Score: 0.910894189770744
MAE: 943.0866405710633
RMSE: 1569.2100198414091



Unnamed: 0,ID_SITE_RATTACHE,CLUSTER,HOSPI_CODE_UCD,R2,RMSE,MAE
0,-,2,3400890837149,0.910894,1569.21002,943.086641
1,-,2,3400891191226,0.910894,1569.21002,943.086641
2,-,2,3400891225037,0.910894,1569.21002,943.086641
3,-,2,3400891235203,0.910894,1569.21002,943.086641


### Cluster 3

In [11]:
df_scores_cluster_3 = predict_cluster(df_cluster_3, cluster = '3')
df_scores_cluster_3

Medicines in cluster 3: 

--- 3400892508566
--- 3400892669236
--- 3400892697789
--- 3400892729589
--- 3400892745848
--- 3400892761527
--- 3400892761695
--- 3400893022634

Hospital: - Cluster: 3
Best Parameters: {'n_estimators': 118, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 18}
Best Score: 0.8576507132605323
R^2 Score: 0.8956875098349905
MAE: 1343.9497280259777
RMSE: 1912.3774867004522



Unnamed: 0,ID_SITE_RATTACHE,CLUSTER,HOSPI_CODE_UCD,R2,RMSE,MAE
0,-,3,3400892508566,0.895688,1912.377487,1343.949728
1,-,3,3400892669236,0.895688,1912.377487,1343.949728
2,-,3,3400892697789,0.895688,1912.377487,1343.949728
3,-,3,3400892729589,0.895688,1912.377487,1343.949728
4,-,3,3400892745848,0.895688,1912.377487,1343.949728
5,-,3,3400892761527,0.895688,1912.377487,1343.949728
6,-,3,3400892761695,0.895688,1912.377487,1343.949728
7,-,3,3400893022634,0.895688,1912.377487,1343.949728


In [19]:
pd.concat([df_scores_cluster_0, df_scores_cluster_1, df_scores_cluster_2, df_scores_cluster_3])

Unnamed: 0,ID_SITE_RATTACHE,CLUSTER,HOSPI_CODE_UCD,R2,RMSE,MAE
0,-,0,3400891996128,0.847767,9986.711561,2489.711366
1,-,0,3400892052120,0.847767,9986.711561,2489.711366
2,-,0,3400892065366,0.847767,9986.711561,2489.711366
3,-,0,3400892075761,0.847767,9986.711561,2489.711366
4,-,0,3400892088310,0.847767,9986.711561,2489.711366
5,-,0,3400892203645,0.847767,9986.711561,2489.711366
0,-,1,3400893736135,0.863682,2024.859896,1415.223331
1,-,1,3400893826706,0.863682,2024.859896,1415.223331
2,-,1,3400893875490,0.863682,2024.859896,1415.223331
0,-,2,3400890837149,0.910894,1569.21002,943.086641


In [12]:
def predict_medicine_consumption_cluster_partitioned(df, df_scores, medicine, hospital='-', cluster='-'):
  df = df.fillna(0)
  X = df.drop(['QUANTITY', 'DATE', 'WEEK', 'CLUSTER', 'ID_SITE_RATTACHE'], axis=1).copy().values
  y = df['QUANTITY'].copy().values

  # Split the data into training and testing sets
  X_train = df[df['HOSPI_CODE_UCD'] != medicine].drop(['QUANTITY', 'DATE', 'WEEK', 'CLUSTER', 'ID_SITE_RATTACHE'], axis=1).copy().values
  X_test = df[df['HOSPI_CODE_UCD'] == medicine].drop(['QUANTITY', 'DATE', 'WEEK', 'CLUSTER', 'ID_SITE_RATTACHE'], axis=1).copy().values
  y_train = df[df['HOSPI_CODE_UCD'] != medicine]['QUANTITY'].copy().values
  y_test = df[df['HOSPI_CODE_UCD'] == medicine]['QUANTITY'].copy().values

  # Define the parameter distributions for RandomizedSearchCV
  param_distributions = {
      'max_depth': np.arange(2, 31, 2),
      'n_estimators': np.arange(2, 201, 2),
      'max_features': ['sqrt', 'log2'],
      'min_samples_split': np.arange(2, 11, 2),
      'min_samples_leaf': np.arange(2, 5, 1)
  }

  # Create the RandomizedSearchCV object
  randomized_search = RandomizedSearchCV(estimator=RandomForestRegressor(random_state=42),
                                          param_distributions=param_distributions,
                                          n_iter=100,
                                          cv=5,
                                          random_state=42)

  # Fit the RandomizedSearchCV object to the training data
  randomized_search.fit(X_train, y_train)

  # Get the best estimator
  best_estimator = randomized_search.best_estimator_

  # Make predictions using the best estimator
  y_pred = best_estimator.predict(X_test)

  # Calculate R^2 score
  r2 = r2_score(y_test, y_pred)

  # Calculate MAE
  mae = mean_absolute_error(y_test, y_pred)

  # Calculate RMSE
  rmse = np.sqrt(mean_squared_error(y_test, y_pred))

  new_row = pd.DataFrame({'ID_SITE_RATTACHE': [hospital],
                          'CLUSTER': [cluster],
                          'HOSPI_CODE_UCD': [medicine],
                          'R2': [r2],
                          'RMSE': [rmse],
                          'MAE': [mae]})

  # Append the new row to the DataFrame
  df_scores = pd.concat([df_scores, new_row], ignore_index=True)

  # Print the best parameters, best score, and evaluation metrics
  print(f'Hospital: {hospital} Cluster: {cluster}')
  print('Best Parameters:', randomized_search.best_params_)
  print('Best Score:', randomized_search.best_score_)
  print('R^2 Score:', r2)
  print('MAE:', mae)
  print('RMSE:', rmse)
  print()

  return df_scores

In [13]:
def predict_cluster_partitioned(df, hospital, cluster, medicine):
  print('Evaluating medicine: ')
  print(f'--- {medicine}')
  print()

  df_scores = pd.DataFrame(columns=['CLUSTER', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE'])
  df_scores = predict_medicine_consumption_cluster_partitioned(df, df_scores, medicine, hospital=hospital, cluster=cluster)
  return df_scores

In [14]:
df_scores_cluster_0_complete = pd.DataFrame(columns=['CLUSTER', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE'])

for medicine in df_cluster_0.HOSPI_CODE_UCD.unique():
  df_scores_cluster_0_partitioned = predict_cluster_partitioned(df_cluster_0, '-', '0', medicine)
  df_scores_cluster_0_complete = pd.concat([df_scores_cluster_0_complete,df_scores_cluster_0_partitioned])

df_scores_cluster_0_complete

Evaluating medicine: 
--- 3400891996128

Hospital: - Cluster: 0
Best Parameters: {'n_estimators': 104, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 22}
Best Score: 0.773665818737939
R^2 Score: -3.488536303227362
MAE: 73503.17418925848
RMSE: 82849.05347427288

Evaluating medicine: 
--- 3400892052120

Hospital: - Cluster: 0
Best Parameters: {'n_estimators': 56, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 8}
Best Score: 0.8505708405750653
R^2 Score: -332.065457611914
MAE: 15756.09676279985
RMSE: 16191.748530092942

Evaluating medicine: 
--- 3400892065366

Hospital: - Cluster: 0
Best Parameters: {'n_estimators': 56, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 8}
Best Score: 0.8636962009036248
R^2 Score: 0.1143368647947467
MAE: 1667.073515542384
RMSE: 2122.5853181909324

Evaluating medicine: 
--- 3400892075761

Hospital: - Cluster: 0
Best Parameters: {'n_estimators': 56, 'm

Unnamed: 0,CLUSTER,HOSPI_CODE_UCD,R2,RMSE,MAE,ID_SITE_RATTACHE
0,0,3400891996128,-3.488536,82849.053474,73503.174189,-
0,0,3400892052120,-332.065458,16191.74853,15756.096763,-
0,0,3400892065366,0.114337,2122.585318,1667.073516,-
0,0,3400892075761,-1.145936,1266.065263,1117.063918,-
0,0,3400892088310,0.448012,1436.461011,1080.928257,-
0,0,3400892203645,0.40881,2522.094249,1967.090567,-


In [15]:
df_scores_cluster_1_complete = pd.DataFrame(columns=['CLUSTER', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE'])

for medicine in df_cluster_1.HOSPI_CODE_UCD.unique():
  df_scores_cluster_1_partitioned = predict_cluster_partitioned(df_cluster_1, '-', '1', medicine)
  df_scores_cluster_1_complete = pd.concat([df_scores_cluster_1_complete,df_scores_cluster_1_partitioned])

df_scores_cluster_1_complete

Evaluating medicine: 
--- 3400893736135

Hospital: - Cluster: 1
Best Parameters: {'n_estimators': 104, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 22}
Best Score: 0.7710062415039423
R^2 Score: 0.5423224442962924
MAE: 1364.1191071913565
RMSE: 1699.2346107927215

Evaluating medicine: 
--- 3400893826706

Hospital: - Cluster: 1
Best Parameters: {'n_estimators': 104, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 22}
Best Score: 0.7724490117068411
R^2 Score: 0.2546652592729485
MAE: 1899.0241139993739
RMSE: 2243.145670089847

Evaluating medicine: 
--- 3400893875490

Hospital: - Cluster: 1
Best Parameters: {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 28}
Best Score: 0.6346467540531838
R^2 Score: -0.7475620141826254
MAE: 6300.7545193559745
RMSE: 7333.865935459566



Unnamed: 0,CLUSTER,HOSPI_CODE_UCD,R2,RMSE,MAE,ID_SITE_RATTACHE
0,1,3400893736135,0.542322,1699.234611,1364.119107,-
0,1,3400893826706,0.254665,2243.14567,1899.024114,-
0,1,3400893875490,-0.747562,7333.865935,6300.754519,-


In [16]:
df_scores_cluster_2_complete = pd.DataFrame(columns=['CLUSTER', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE'])

for medicine in df_cluster_2.HOSPI_CODE_UCD.unique():
  df_scores_cluster_2_partitioned = predict_cluster_partitioned(df_cluster_2, '-', '2', medicine)
  df_scores_cluster_2_complete = pd.concat([df_scores_cluster_2_complete,df_scores_cluster_2_partitioned])

df_scores_cluster_2_complete

Evaluating medicine: 
--- 3400890837149

Hospital: - Cluster: 2
Best Parameters: {'n_estimators': 56, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 8}
Best Score: 0.9007505712102102
R^2 Score: -3.030187239145339
MAE: 1395.1536831539822
RMSE: 2066.3492323274686

Evaluating medicine: 
--- 3400891191226

Hospital: - Cluster: 2
Best Parameters: {'n_estimators': 56, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 8}
Best Score: 0.91871108699998
R^2 Score: 0.2628953888207215
MAE: 2251.5110517515445
RMSE: 2661.38814986132

Evaluating medicine: 
--- 3400891225037

Hospital: - Cluster: 2
Best Parameters: {'n_estimators': 56, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2', 'max_depth': 8}
Best Score: 0.8276920757523033
R^2 Score: -1.6247074852971464
MAE: 10712.752716141347
RMSE: 12051.941694036583

Evaluating medicine: 
--- 3400891235203

Hospital: - Cluster: 2
Best Parameters: {'n_estimators': 88, '

Unnamed: 0,CLUSTER,HOSPI_CODE_UCD,R2,RMSE,MAE,ID_SITE_RATTACHE
0,2,3400890837149,-3.030187,2066.349232,1395.153683,-
0,2,3400891191226,0.262895,2661.38815,2251.511052,-
0,2,3400891225037,-1.624707,12051.941694,10712.752716,-
0,2,3400891235203,-174.332211,6700.11063,6349.473876,-


In [17]:
df_scores_cluster_3_complete = pd.DataFrame(columns=['CLUSTER', 'HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE'])

for medicine in df_cluster_3.HOSPI_CODE_UCD.unique():
  df_scores_cluster_3_partitioned = predict_cluster_partitioned(df_cluster_3, '-', '3', medicine)
  df_scores_cluster_3_complete = pd.concat([df_scores_cluster_3_complete,df_scores_cluster_3_partitioned])

df_scores_cluster_3_complete

Evaluating medicine: 
--- 3400892508566

Hospital: - Cluster: 3
Best Parameters: {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 28}
Best Score: 0.8588191268545575
R^2 Score: 0.3732325660453555
MAE: 2019.7051624223425
RMSE: 2206.8662775133403

Evaluating medicine: 
--- 3400892669236

Hospital: - Cluster: 3
Best Parameters: {'n_estimators': 172, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 10}
Best Score: 0.851528987692469
R^2 Score: -4.250052492069923
MAE: 2328.283738357215
RMSE: 2827.0263319169217

Evaluating medicine: 
--- 3400892697789

Hospital: - Cluster: 3
Best Parameters: {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': 28}
Best Score: 0.8328716055294894
R^2 Score: -18.061804748577618
MAE: 1929.799422354273
RMSE: 2210.6462734613306

Evaluating medicine: 
--- 3400892729589

Hospital: - Cluster: 3
Best Parameters: {'n_estimators'

Unnamed: 0,CLUSTER,HOSPI_CODE_UCD,R2,RMSE,MAE,ID_SITE_RATTACHE
0,3,3400892508566,0.373233,2206.866278,2019.705162,-
0,3,3400892669236,-4.250052,2827.026332,2328.283738,-
0,3,3400892697789,-18.061805,2210.646273,1929.799422,-
0,3,3400892729589,-0.105535,2029.198271,1634.346444,-
0,3,3400892745848,-0.076358,4838.878792,3840.699882,-
0,3,3400892761527,0.304122,3875.997134,3117.109218,-
0,3,3400892761695,0.489454,6605.118703,5319.885674,-
0,3,3400893022634,-0.684066,2500.510411,1763.611903,-


In [18]:
pd.concat([df_scores_cluster_0_complete, df_scores_cluster_1_complete, df_scores_cluster_2_complete, df_scores_cluster_3_complete])

Unnamed: 0,CLUSTER,HOSPI_CODE_UCD,R2,RMSE,MAE,ID_SITE_RATTACHE
0,0,3400891996128,-3.488536,82849.053474,73503.174189,-
0,0,3400892052120,-332.065458,16191.74853,15756.096763,-
0,0,3400892065366,0.114337,2122.585318,1667.073516,-
0,0,3400892075761,-1.145936,1266.065263,1117.063918,-
0,0,3400892088310,0.448012,1436.461011,1080.928257,-
0,0,3400892203645,0.40881,2522.094249,1967.090567,-
0,1,3400893736135,0.542322,1699.234611,1364.119107,-
0,1,3400893826706,0.254665,2243.14567,1899.024114,-
0,1,3400893875490,-0.747562,7333.865935,6300.754519,-
0,2,3400890837149,-3.030187,2066.349232,1395.153683,-
