<a href="https://colab.research.google.com/github/douglasmmachado/MedicineConsumption/blob/main/notebooks/causal_method/unified_approach/5_Forecasting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 5 - Forecasting and prediction



---



---



In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns
import math as m

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

from sklearn.tree import export_graphviz
from subprocess import call
from IPython.display import Image

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error,  mean_absolute_percentage_error
from sklearn.model_selection import RandomizedSearchCV
from sklearn.preprocessing import StandardScaler

from sklearn.utils import shuffle
from sklearn.model_selection import GridSearchCV


df_agg_clusters_url = "https://raw.githubusercontent.com/douglasmmachado/MedicineConsumption/main/datasets/unified_approach/clustered/df_clustered.csv"
df_url = "https://raw.githubusercontent.com/douglasmmachado/ExploratoryDataAnalysis/main/datasets/unified_approach/df_ma.csv"
df_clustered_pca_url = "https://raw.githubusercontent.com/douglasmmachado/MedicineConsumption/main/datasets/unified_approach/clustered/df_clustered_pca.csv"

df = pd.read_csv(df_url)
df_agg_clusters = pd.read_csv(df_agg_clusters_url)
df_clustered_pca = pd.read_csv(df_clustered_pca_url)


# Convert numerical columns to string
for i, curr_df in enumerate([df, df_agg_clusters]):
    numerical_columns = ['HOSPI_CODE_UCD']
    curr_df[numerical_columns] = curr_df[numerical_columns].astype('string')


medicines = ['3400892088310','3400892075761','3400892203645',
             '3400892065366','3400892052120','3400891996128',
             '3400893826706','3400893736135','3400893875490',
             '3400890837149','3400891235203','3400891225037',
             '3400891191226','3400892729589','3400892745848',
             '3400892697789','3400892761527','3400893022634',
             '3400892761695','3400892669236','3400892508566']

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6636 entries, 0 to 6635
Data columns (total 39 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   DATE            6636 non-null   object 
 1   HOSPI_CODE_UCD  6636 non-null   string 
 2   LIT_HC          6636 non-null   float64
 3   LIT_HP          6636 non-null   float64
 4   N_ETB           6636 non-null   float64
 5   N_UFS           6636 non-null   float64
 6   PN_MEDICAL      6636 non-null   float64
 7   POPULATION      6636 non-null   float64
 8   P_MEDICAL       6636 non-null   float64
 9   QUANTITY        6636 non-null   float64
 10  QUANTITY_MA     6636 non-null   float64
 11  RESID           6636 non-null   float64
 12  SEASONAL        6636 non-null   float64
 13  SEJ_HAD         6636 non-null   float64
 14  SEJ_MCO         6636 non-null   float64
 15  SEJ_PSY         6636 non-null   float64
 16  SEJ_SLD         6636 non-null   float64
 17  SEJ_SSR         6636 non-null   f

In [4]:
df_agg_clusters.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6320 entries, 0 to 6319
Data columns (total 40 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   DATE            6320 non-null   object 
 1   HOSPI_CODE_UCD  6320 non-null   string 
 2   LIT_HC          6320 non-null   float64
 3   LIT_HP          6320 non-null   float64
 4   N_ETB           6320 non-null   float64
 5   N_UFS           6320 non-null   float64
 6   PN_MEDICAL      6320 non-null   float64
 7   POPULATION      6320 non-null   float64
 8   P_MEDICAL       6320 non-null   float64
 9   QUANTITY        6320 non-null   float64
 10  QUANTITY_MA     6320 non-null   float64
 11  RESID           6320 non-null   float64
 12  SEASONAL        6320 non-null   float64
 13  SEJ_HAD         6320 non-null   float64
 14  SEJ_MCO         6320 non-null   float64
 15  SEJ_PSY         6320 non-null   float64
 16  SEJ_SLD         6320 non-null   float64
 17  SEJ_SSR         6320 non-null   f

In [22]:
df_agg_clusters = df_agg_clusters[df_agg_clusters['QUANTITY'] > 0]
df_agg_clusters.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3867 entries, 4 to 6318
Data columns (total 40 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   DATE            3867 non-null   object 
 1   HOSPI_CODE_UCD  3867 non-null   string 
 2   LIT_HC          3867 non-null   float64
 3   LIT_HP          3867 non-null   float64
 4   N_ETB           3867 non-null   float64
 5   N_UFS           3867 non-null   float64
 6   PN_MEDICAL      3867 non-null   float64
 7   POPULATION      3867 non-null   float64
 8   P_MEDICAL       3867 non-null   float64
 9   QUANTITY        3867 non-null   float64
 10  QUANTITY_MA     3867 non-null   float64
 11  RESID           3867 non-null   float64
 12  SEASONAL        3867 non-null   float64
 13  SEJ_HAD         3867 non-null   float64
 14  SEJ_MCO         3867 non-null   float64
 15  SEJ_PSY         3867 non-null   float64
 16  SEJ_SLD         3867 non-null   float64
 17  SEJ_SSR         3867 non-null   f

In [23]:
df = df[(df['QUANTITY'] > 0)]
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4072 entries, 4 to 6634
Data columns (total 39 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   DATE            4072 non-null   object 
 1   HOSPI_CODE_UCD  4072 non-null   string 
 2   LIT_HC          4072 non-null   float64
 3   LIT_HP          4072 non-null   float64
 4   N_ETB           4072 non-null   float64
 5   N_UFS           4072 non-null   float64
 6   PN_MEDICAL      4072 non-null   float64
 7   POPULATION      4072 non-null   float64
 8   P_MEDICAL       4072 non-null   float64
 9   QUANTITY        4072 non-null   float64
 10  QUANTITY_MA     4072 non-null   float64
 11  RESID           4072 non-null   float64
 12  SEASONAL        4072 non-null   float64
 13  SEJ_HAD         4072 non-null   float64
 14  SEJ_MCO         4072 non-null   float64
 15  SEJ_PSY         4072 non-null   float64
 16  SEJ_SLD         4072 non-null   float64
 17  SEJ_SSR         4072 non-null   f

In [24]:
df_clustered_pca.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6320 entries, 0 to 6319
Data columns (total 40 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   DATE            6320 non-null   object 
 1   HOSPI_CODE_UCD  6320 non-null   int64  
 2   LIT_HC          6320 non-null   float64
 3   LIT_HP          6320 non-null   float64
 4   N_ETB           6320 non-null   float64
 5   N_UFS           6320 non-null   float64
 6   PN_MEDICAL      6320 non-null   float64
 7   POPULATION      6320 non-null   float64
 8   P_MEDICAL       6320 non-null   float64
 9   QUANTITY        6320 non-null   float64
 10  QUANTITY_MA     6320 non-null   float64
 11  RESID           6320 non-null   float64
 12  SEASONAL        6320 non-null   float64
 13  SEJ_HAD         6320 non-null   float64
 14  SEJ_MCO         6320 non-null   float64
 15  SEJ_PSY         6320 non-null   float64
 16  SEJ_SLD         6320 non-null   float64
 17  SEJ_SSR         6320 non-null   f

## 5.1 - New database composition based on clusters

## 5.2 - Building forecasting models based on clusters

In [10]:
import plotly.graph_objects as go

def plot_pred(y_pred, y_test, medicine):
    # Create a scatter plot for y_pred
    fig = go.Figure(data=go.Scatter(x=np.arange(len(y_pred)), y=y_pred, mode='markers', name='y_pred', marker=dict(symbol='circle')))

    # Add scatter plot for y_test
    fig.add_trace(go.Scatter(x=np.arange(len(y_test)), y=y_test, mode='markers', name='y_test', marker=dict(symbol='x')))

    # Set axes labels and title
    fig.update_layout(xaxis_title='Test samples', yaxis_title='Quantity',
                      title=f'y_pred and y_test for medicine: {medicine}')

    # Show the plot
    fig.show()


In [11]:
import plotly.graph_objects as go

def plot_mape(y_pred, y_test, medicine, epsilon=0.001):
    mape_array = np.abs(y_test - y_pred) / np.maximum(epsilon, np.abs(y_test))

    # Create a scatter plot for MAPE
    fig = go.Figure(data=go.Scatter(x=np.arange(len(y_pred)), y=mape_array, mode='markers', name='MAPE'))

    # Set axes labels and title
    fig.update_layout(xaxis_title='Test samples', yaxis_title='MAPE',
                      title=f'MAPE for medicine: {medicine}')

    # Show the plot
    fig.show()


In [26]:
def test_1_baseline(df, medicine, df_scores):
  print()
  print(100*'-')
  print('Medicine:' + str(medicine))

  X = df[df['HOSPI_CODE_UCD'] == medicine].drop(['QUANTITY', 'HOSPI_CODE_UCD', 'DATE', 'QUANTITY_MA', 'RESID', 'SEASONAL', 'TREND'], axis=1).values

  scaler = StandardScaler()
  X_scaled = scaler.fit_transform(X)

  y = df[df['HOSPI_CODE_UCD'] == medicine]['QUANTITY'].values

  X_scaled, y = shuffle(X_scaled, y, random_state=42)

  if m.ceil(len(X_scaled) * 0.1) == 1:
    print('Dataset too small')
    test_size = 2
  else:
    test_size = 0.1

  # Split the data into training and testing sets
  X_train, X_test, y_train, y_test = train_test_split(X_scaled, y,
                                                      test_size = test_size,
                                                      random_state = 42)
  print(f'Size of data set: {len(X)}')
  print(f'Size of training set: {len(X_train)}')
  print(f'Size of test set: {len(X_test)}')

  # Define the parameter distributions for RandomizedSearchCV
  param_grid = {
      'max_depth': np.arange(2, 8, 1),
      'n_estimators': np.arange(2, max(int(m.ceil(len(X_train)*0.1)),3), 1),
      'max_features': ['sqrt']
  }
  depth_len = param_grid['max_depth'].size
  estimators_len = param_grid['n_estimators'].size

  print(f'Size of grid search: {depth_len * estimators_len}')

  # Create the RandomizedSearchCV object
  grid_search = GridSearchCV(estimator=RandomForestRegressor(random_state=42),
                             param_grid=param_grid,
                             scoring = 'neg_mean_absolute_percentage_error',
                             cv = 5,
                             n_jobs = -1)

  ''' >3 : the fold and candidate parameter indexes
      are also displayed together with the starting time of the computation.
  '''
  # Fit the RandomizedSearchCV object to the data
  grid_search.fit(X_train, y_train)

  # Get the best estimator
  best_estimator = grid_search.best_estimator_

  # Make predictions using the best estimator
  y_pred = best_estimator.predict(X_test)

  # Calculate R^2 score
  r2 = r2_score(y_test, y_pred)

  # Calculate MAE
  mae = mean_absolute_error(y_test, y_pred)

  # Calculate MAPE
  mape = mean_absolute_percentage_error(y_test, y_pred)

  # Calculate RMSE
  rmse = np.sqrt(mean_squared_error(y_test, y_pred))

  # Print the best parameters, best score, and evaluation metrics
  print('Best Parameters: ', grid_search.best_params_)
  print('Training Score (MAPE): ', round(grid_search.best_score_, 3))
  print(10*'-' + 'Test scores' + 10*'-')
  print('R^2 Score:', round(r2, 3))
  print('MAE:', round(mae, 3))
  print('MAPE:', round(mape, 3))
  print('RMSE:', round(rmse, 3))
  print()


  # Create the new row as a DataFrame
  new_row = pd.DataFrame({'HOSPI_CODE_UCD': ['CODE_UCD_'+str(medicine)],
                          'R2': [r2],
                          'RMSE': [rmse],
                          'MAE': [mae],
                          'MAPE': [mape]})

  # Append the new row to the DataFrame
  df_scores = pd.concat([df_scores, new_row], ignore_index=True)

  # plot pred x test
  plot_pred(y_pred, y_test, medicine)
  print()


  plot_mape(y_pred, y_test, medicine)
  print()

  # Return the updated DataFrame
  return df_scores


In [27]:
df_prediction_scores = pd.DataFrame(columns=['HOSPI_CODE_UCD', 'R2', 'RMSE', 'MAE', 'MAPE'])

for medicine in medicines:
  df_prediction_scores = test_1_baseline(df, medicine, df_prediction_scores)

df_prediction_scores


----------------------------------------------------------------------------------------------------
Medicine:3400892088310
Size of data set: 171
Size of training set: 153
Size of test set: 18
Size of grid search: 84
Best Parameters:  {'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -2.046
----------Test scores----------
R^2 Score: 0.766
MAE: 655.776
MAPE: 0.174
RMSE: 984.019








----------------------------------------------------------------------------------------------------
Medicine:3400892075761
Size of data set: 204
Size of training set: 183
Size of test set: 21
Size of grid search: 102
Best Parameters:  {'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -1.021
----------Test scores----------
R^2 Score: 0.702
MAE: 678.679
MAPE: 0.33
RMSE: 1526.633








----------------------------------------------------------------------------------------------------
Medicine:3400892203645
Size of data set: 200
Size of training set: 180
Size of test set: 20
Size of grid search: 96
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 8}
Training Score (MAPE):  -0.687
----------Test scores----------
R^2 Score: 0.931
MAE: 735.497
MAPE: 1.108
RMSE: 984.944








----------------------------------------------------------------------------------------------------
Medicine:3400892065366
Size of data set: 201
Size of training set: 180
Size of test set: 21
Size of grid search: 96
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 5}
Training Score (MAPE):  -1.09
----------Test scores----------
R^2 Score: 0.918
MAE: 871.008
MAPE: 0.673
RMSE: 1257.679








----------------------------------------------------------------------------------------------------
Medicine:3400892052120
Size of data set: 195
Size of training set: 175
Size of test set: 20
Size of grid search: 96
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 5}
Training Score (MAPE):  -1.465
----------Test scores----------
R^2 Score: 0.948
MAE: 488.867
MAPE: 11.749
RMSE: 735.763








----------------------------------------------------------------------------------------------------
Medicine:3400891996128
Size of data set: 205
Size of training set: 184
Size of test set: 21
Size of grid search: 102
Best Parameters:  {'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 12}
Training Score (MAPE):  -3.119
----------Test scores----------
R^2 Score: 0.957
MAE: 8252.249
MAPE: 20.432
RMSE: 11776.003








----------------------------------------------------------------------------------------------------
Medicine:3400893826706
Size of data set: 201
Size of training set: 180
Size of test set: 21
Size of grid search: 96
Best Parameters:  {'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 4}
Training Score (MAPE):  -16.124
----------Test scores----------
R^2 Score: 0.782
MAE: 959.205
MAPE: 2.888
RMSE: 1271.869








----------------------------------------------------------------------------------------------------
Medicine:3400893736135
Size of data set: 197
Size of training set: 177
Size of test set: 20
Size of grid search: 96
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 12}
Training Score (MAPE):  -0.354
----------Test scores----------
R^2 Score: 0.874
MAE: 502.109
MAPE: 0.113
RMSE: 736.505








----------------------------------------------------------------------------------------------------
Medicine:3400893875490
Size of data set: 201
Size of training set: 180
Size of test set: 21
Size of grid search: 96
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -3.105
----------Test scores----------
R^2 Score: 0.799
MAE: 1600.887
MAPE: 0.597
RMSE: 2585.18








----------------------------------------------------------------------------------------------------
Medicine:3400890837149
Size of data set: 207
Size of training set: 186
Size of test set: 21
Size of grid search: 102
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.447
----------Test scores----------
R^2 Score: 0.587
MAE: 2075.746
MAPE: 4.326
RMSE: 3969.761








----------------------------------------------------------------------------------------------------
Medicine:3400891235203
Size of data set: 188
Size of training set: 169
Size of test set: 19
Size of grid search: 90
Best Parameters:  {'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.016
----------Test scores----------
R^2 Score: 0.632
MAE: 532.28
MAPE: 12.211
RMSE: 995.526








----------------------------------------------------------------------------------------------------
Medicine:3400891225037
Size of data set: 209
Size of training set: 188
Size of test set: 21
Size of grid search: 102
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 14}
Training Score (MAPE):  -4.803
----------Test scores----------
R^2 Score: 0.746
MAE: 2333.844
MAPE: 1.94
RMSE: 3430.639








----------------------------------------------------------------------------------------------------
Medicine:3400891191226
Size of data set: 200
Size of training set: 180
Size of test set: 20
Size of grid search: 96
Best Parameters:  {'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -1.28
----------Test scores----------
R^2 Score: 0.933
MAE: 532.504
MAPE: 0.567
RMSE: 737.563








----------------------------------------------------------------------------------------------------
Medicine:3400892729589
Size of data set: 194
Size of training set: 174
Size of test set: 20
Size of grid search: 96
Best Parameters:  {'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 8}
Training Score (MAPE):  -2.8
----------Test scores----------
R^2 Score: 0.837
MAE: 1190.874
MAPE: 1.834
RMSE: 2047.178








----------------------------------------------------------------------------------------------------
Medicine:3400892745848
Size of data set: 175
Size of training set: 157
Size of test set: 18
Size of grid search: 84
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 4}
Training Score (MAPE):  -1.317
----------Test scores----------
R^2 Score: -0.039
MAE: 2260.273
MAPE: 0.563
RMSE: 4799.022








----------------------------------------------------------------------------------------------------
Medicine:3400892697789
Size of data set: 204
Size of training set: 183
Size of test set: 21
Size of grid search: 102
Best Parameters:  {'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.783
----------Test scores----------
R^2 Score: 0.433
MAE: 942.391
MAPE: 3.421
RMSE: 2337.1








----------------------------------------------------------------------------------------------------
Medicine:3400892761527
Size of data set: 200
Size of training set: 180
Size of test set: 20
Size of grid search: 96
Best Parameters:  {'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -2.273
----------Test scores----------
R^2 Score: 0.957
MAE: 1899.991
MAPE: 0.798
RMSE: 2341.785








----------------------------------------------------------------------------------------------------
Medicine:3400893022634
Size of data set: 156
Size of training set: 140
Size of test set: 16
Size of grid search: 72
Best Parameters:  {'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 13}
Training Score (MAPE):  -18.817
----------Test scores----------
R^2 Score: 0.739
MAE: 632.661
MAPE: 64.81
RMSE: 842.85








----------------------------------------------------------------------------------------------------
Medicine:3400892761695
Size of data set: 201
Size of training set: 180
Size of test set: 21
Size of grid search: 96
Best Parameters:  {'max_depth': 4, 'max_features': 'sqrt', 'n_estimators': 8}
Training Score (MAPE):  -2.105
----------Test scores----------
R^2 Score: 0.985
MAE: 1018.034
MAPE: 1.585
RMSE: 1310.235








----------------------------------------------------------------------------------------------------
Medicine:3400892669236
Size of data set: 173
Size of training set: 155
Size of test set: 18
Size of grid search: 84
Best Parameters:  {'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -8.466
----------Test scores----------
R^2 Score: 0.975
MAE: 919.819
MAPE: 2.389
RMSE: 1173.202








----------------------------------------------------------------------------------------------------
Medicine:3400892508566
Size of data set: 190
Size of training set: 171
Size of test set: 19
Size of grid search: 96
Best Parameters:  {'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -0.879
----------Test scores----------
R^2 Score: 0.82
MAE: 740.37
MAPE: 0.257
RMSE: 1017.162









Unnamed: 0,HOSPI_CODE_UCD,R2,RMSE,MAE,MAPE
0,CODE_UCD_3400892088310,0.766258,984.018697,655.775996,0.174475
1,CODE_UCD_3400892075761,0.701579,1526.63256,678.678975,0.330208
2,CODE_UCD_3400892203645,0.93136,984.94378,735.496763,1.107614
3,CODE_UCD_3400892065366,0.917614,1257.679148,871.00759,0.672914
4,CODE_UCD_3400892052120,0.948413,735.762539,488.866909,11.749488
5,CODE_UCD_3400891996128,0.956614,11776.003342,8252.248558,20.432358
6,CODE_UCD_3400893826706,0.78164,1271.869451,959.204784,2.887557
7,CODE_UCD_3400893736135,0.873737,736.504814,502.10869,0.113065
8,CODE_UCD_3400893875490,0.799387,2585.180451,1600.886595,0.597244
9,CODE_UCD_3400890837149,0.58718,3969.761338,2075.746457,4.32637


In [38]:
def train_test_split_modified(df, random_state = 42, test_size = 0.1, Shuffle = False):
  df_iter = df.copy()

  np.random.seed(random_state)

  X_train = []
  y_train = []

  X_test = []
  y_test = []

  columns = df.columns

  df_train = pd.DataFrame(columns=columns)
  df_test = pd.DataFrame(columns=columns)
  i = 0
  for medicine in df_iter.HOSPI_CODE_UCD.unique():
    df_temp = df_iter[df_iter['HOSPI_CODE_UCD'] == medicine]
    i += len(df_temp)
    n_samples = len(df_temp)


    if n_samples == 1:
      print('Not enough samples in cluster')
      test_samples = 1
    else:
      test_samples = max(m.ceil(n_samples * test_size), 2)


    for _ in range(test_samples):
      random_index = np.random.choice(df_temp.index)
      random_row = df_temp.loc[random_index].to_frame().T

      # Concatenate the new row to the original DataFrame
      df_test = pd.concat([df_test, random_row], ignore_index=False)

      # Remove the selected index from df_temp
      df_temp = df_temp.drop(random_index)

    df_iter = df_iter.drop(df_iter[df_iter['HOSPI_CODE_UCD'] == medicine].index)

  # Get the indices of rows to exclude from df
  exclude_indices = df_test.index

  # Create df_train by dropping the rows specified by the exclude_indices from df
  df_train = df.drop(exclude_indices)

  if shuffle:
    X_train, y_train = shuffle(df_train.drop(['QUANTITY', 'CLUSTER'], axis=1), df_train.QUANTITY.values, random_state = random_state)
    X_test, y_test = shuffle(df_test.drop(['QUANTITY', 'CLUSTER'], axis=1).values, df_test.QUANTITY.values, random_state = random_state)

  return X_train, X_test, y_train, y_test

In [43]:
def test_2_clustering(df, df_scores, medicines):
  for cluster in df.CLUSTER.unique():
    print()
    print(100*'-')
    print(f'Cluster: {cluster}')

    # Perform the train-test split with shuffled samples
    X_train, X_test, y_train, y_test = train_test_split_modified(df[df['CLUSTER'] == cluster].drop(['DATE', 'QUANTITY_MA', 'RESID', 'SEASONAL', 'TREND'], axis=1))
    print(f'Size of data set: {len(X_train) + len(X_test)}')
    print(f'Size of training set: {len(X_train)}')
    print(f'Size of test set: {len(X_test)}')

    df_test = pd.DataFrame(X_test, columns = df.drop(['QUANTITY','CLUSTER','DATE', 'QUANTITY_MA', 'RESID', 'SEASONAL', 'TREND'], axis=1).copy().columns)
    df_test['QUANTITY'] = y_test


    # Define the parameter distributions for RandomizedSearchCV
    param_grid = {
        'max_depth': np.arange(2, 8, 1),
        'n_estimators': np.arange(2, max(int(m.ceil(len(X_train)*0.1)),3), 1),
        'max_features': ['sqrt']
    }

    depth_len = param_grid['max_depth'].size
    estimators_len = param_grid['n_estimators'].size

    print(f'Size of grid search: {depth_len * estimators_len}')

    # Create the RandomizedSearchCV object
    grid_search = GridSearchCV(estimator=RandomForestRegressor(random_state=42),
                              param_grid=param_grid,
                              scoring = 'neg_mean_absolute_percentage_error',
                              cv = 5,
                              n_jobs = -1)

    ''' >3 : the fold and candidate parameter indexes
        are also displayed together with the starting time of the computation.
    '''

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)

    # Fit the RandomizedSearchCV object to the data
    grid_search.fit(X_train_scaled, y_train)
    print("Finished training")

    # Get the best estimator
    best_estimator = grid_search.best_estimator_

    for medicine in df_test.HOSPI_CODE_UCD.unique():
      print()
      print(100*'-')
      print('Medicine:' + str(medicine))

      X_test_medicine = df_test[df_test['HOSPI_CODE_UCD'] == medicine].drop(['QUANTITY'], axis=1).copy().values

      scaler = StandardScaler()
      X_test_scaled = scaler.fit_transform(X_test_medicine)

      y_test_medicine = df_test[df_test['HOSPI_CODE_UCD'] == medicine]['QUANTITY'].copy().values

      # Make predictions using the best estimator
      y_pred = best_estimator.predict(X_test_scaled)

      # Calculate R^2 score
      r2 = r2_score(y_test_medicine, y_pred)

      # Calculate MAE
      mae = mean_absolute_error(y_test_medicine, y_pred)

      # Calculate MAPE
      mape = mean_absolute_percentage_error(y_test_medicine, y_pred)

      # Calculate RMSE
      rmse = np.sqrt(mean_squared_error(y_test_medicine, y_pred))

      # Print the best parameters, best score, and evaluation metrics

      # Print the best parameters, best score, and evaluation metrics
      print('Best Parameters: ', grid_search.best_params_)
      print('Training Score (MAPE): ', round(grid_search.best_score_, 3))
      print(10*'-' + 'Test scores' + 10*'-')
      print('R^2 Score:', round(r2, 3))
      print('MAE:', round(mae, 3))
      print('MAPE:', round(mape, 3))
      print('RMSE:', round(rmse, 3))
      print()


      # Create the new row as a DataFrame
      new_row = pd.DataFrame({'CLUSTER': [cluster],
                              'HOSPI_CODE_UCD': ['CODE_UCD_'+str(int(medicine))],
                              'R2': [r2],
                              'RMSE': [rmse],
                              'MAE': [mae],
                              'MAPE': [mape]})

      # Append the new row to the DataFrame
      df_scores = pd.concat([df_scores, new_row], ignore_index=True)

      plot_pred(y_pred, y_test_medicine, medicine)
      print()

      plot_mape(y_pred, y_test_medicine, medicine)
      print()

  # Return the updated DataFrame
  return df_scores

In [44]:
df_prediction_scores_agg = pd.DataFrame(columns=[ 'HOSPI_CODE_UCD', 'CLUSTER', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_agg = test_2_clustering(df_agg_clusters, df_prediction_scores_agg, medicines)

df_prediction_scores_agg


----------------------------------------------------------------------------------------------------
Cluster: 0
Size of data set: 2323
Size of training set: 2082
Size of test set: 241
Size of grid search: 1242
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400892065366
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -4.981
----------Test scores----------
R^2 Score: 0.433
MAE: 3889.309
MAPE: 2.764
RMSE: 5243.081








----------------------------------------------------------------------------------------------------
Medicine:3400891191226
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -4.981
----------Test scores----------
R^2 Score: 0.665
MAE: 964.545
MAPE: 0.214
RMSE: 1679.668








----------------------------------------------------------------------------------------------------
Medicine:3400892729589
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -4.981
----------Test scores----------
R^2 Score: 0.554
MAE: 3744.349
MAPE: 0.321
RMSE: 5219.876








----------------------------------------------------------------------------------------------------
Medicine:3400892052120
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -4.981
----------Test scores----------
R^2 Score: 0.835
MAE: 1109.744
MAPE: 7.591
RMSE: 1334.973








----------------------------------------------------------------------------------------------------
Medicine:3400892745848
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -4.981
----------Test scores----------
R^2 Score: 0.651
MAE: 2378.071
MAPE: 0.271
RMSE: 2960.362








----------------------------------------------------------------------------------------------------
Medicine:3400892508566
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -4.981
----------Test scores----------
R^2 Score: 0.206
MAE: 1937.157
MAPE: 16.169
RMSE: 2400.454








----------------------------------------------------------------------------------------------------
Medicine:3400891235203
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -4.981
----------Test scores----------
R^2 Score: -1.018
MAE: 2085.948
MAPE: 5.787
RMSE: 2891.866








----------------------------------------------------------------------------------------------------
Medicine:3400892697789
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -4.981
----------Test scores----------
R^2 Score: 0.51
MAE: 1593.148
MAPE: 1.549
RMSE: 2003.739








----------------------------------------------------------------------------------------------------
Medicine:3400892075761
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -4.981
----------Test scores----------
R^2 Score: 0.655
MAE: 1911.802
MAPE: 4.146
RMSE: 2183.461








----------------------------------------------------------------------------------------------------
Medicine:3400890837149
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -4.981
----------Test scores----------
R^2 Score: 0.757
MAE: 2021.133
MAPE: 4.532
RMSE: 2875.329








----------------------------------------------------------------------------------------------------
Medicine:3400893826706
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -4.981
----------Test scores----------
R^2 Score: -1.231
MAE: 2010.422
MAPE: 58.663
RMSE: 2812.439








----------------------------------------------------------------------------------------------------
Medicine:3400893022634
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -4.981
----------Test scores----------
R^2 Score: -4.056
MAE: 3747.678
MAPE: 56.771
RMSE: 4467.493








----------------------------------------------------------------------------------------------------
Medicine:3400891225037
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -4.981
----------Test scores----------
R^2 Score: 0.212
MAE: 6576.533
MAPE: 0.41
RMSE: 8806.901








----------------------------------------------------------------------------------------------------
Medicine:3400892669236
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -4.981
----------Test scores----------
R^2 Score: -0.3
MAE: 10477.973
MAPE: 91.188
RMSE: 12034.231








----------------------------------------------------------------------------------------------------
Medicine:3400892761527
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -4.981
----------Test scores----------
R^2 Score: -1.221
MAE: 4427.39
MAPE: 5.295
RMSE: 5247.035








----------------------------------------------------------------------------------------------------
Medicine:3400892761695
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -4.981
----------Test scores----------
R^2 Score: 0.33
MAE: 2700.306
MAPE: 6.711
RMSE: 3346.9








----------------------------------------------------------------------------------------------------
Medicine:3400892203645
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -4.981
----------Test scores----------
R^2 Score: 0.744
MAE: 1194.102
MAPE: 1.044
RMSE: 1583.508








----------------------------------------------------------------------------------------------------
Medicine:3400893736135
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -4.981
----------Test scores----------
R^2 Score: -0.543
MAE: 1726.2
MAPE: 0.454
RMSE: 2194.537








----------------------------------------------------------------------------------------------------
Medicine:3400892088310
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -4.981
----------Test scores----------
R^2 Score: 0.085
MAE: 2329.391
MAPE: 2.257
RMSE: 3002.656








----------------------------------------------------------------------------------------------------
Medicine:3400893875490
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 6}
Training Score (MAPE):  -4.981
----------Test scores----------
R^2 Score: 0.238
MAE: 6591.217
MAPE: 2.812
RMSE: 8099.069








----------------------------------------------------------------------------------------------------
Cluster: 2
Not enough samples in cluster
Not enough samples in cluster
Size of data set: 1195
Size of training set: 1070
Size of test set: 125
Size of grid search: 630
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400891225037
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.747
----------Test scores----------
R^2 Score: -3.633
MAE: 4226.691
MAPE: 0.442
RMSE: 4788.145








----------------------------------------------------------------------------------------------------
Medicine:3400892075761
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.747
----------Test scores----------
R^2 Score: -293.525
MAE: 2567.971
MAPE: 0.976
RMSE: 3675.88








----------------------------------------------------------------------------------------------------
Medicine:3400892065366
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.747
----------Test scores----------
R^2 Score: -14.844
MAE: 2444.361
MAPE: 0.356
RMSE: 3033.307








----------------------------------------------------------------------------------------------------
Medicine:3400892697789
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.747
----------Test scores----------
R^2 Score: -65.885
MAE: 3091.767
MAPE: 1.596
RMSE: 3736.764








----------------------------------------------------------------------------------------------------
Medicine:3400892088310
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.747
----------Test scores----------
R^2 Score: -24.241
MAE: 1679.167
MAPE: 0.342
RMSE: 1952.898








----------------------------------------------------------------------------------------------------
Medicine:3400892745848
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.747
----------Test scores----------
R^2 Score: -775.853
MAE: 3389.245
MAPE: 9.608
RMSE: 3470.731








----------------------------------------------------------------------------------------------------
Medicine:3400892203645
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.747
----------Test scores----------
R^2 Score: -9.227
MAE: 1151.132
MAPE: 0.215
RMSE: 1357.254








----------------------------------------------------------------------------------------------------
Medicine:3400893022634
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.747
----------Test scores----------
R^2 Score: -81.567
MAE: 1972.293
MAPE: 0.557
RMSE: 2284.132








----------------------------------------------------------------------------------------------------
Medicine:3400890837149
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.747
----------Test scores----------
R^2 Score: -50.611
MAE: 1303.326
MAPE: 0.348
RMSE: 1899.503








----------------------------------------------------------------------------------------------------
Medicine:3400892052120
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.747
----------Test scores----------
R^2 Score: -730.458
MAE: 4127.726
MAPE: 3.987
RMSE: 4641.518








----------------------------------------------------------------------------------------------------
Medicine:3400893875490
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.747
----------Test scores----------
R^2 Score: -47.309
MAE: 8547.177
MAPE: 0.626
RMSE: 8738.534








----------------------------------------------------------------------------------------------------
Medicine:3400891191226
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.747
----------Test scores----------
R^2 Score: -4.135
MAE: 1758.049
MAPE: 0.32
RMSE: 1972.048








----------------------------------------------------------------------------------------------------
Medicine:3400893736135
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.747
----------Test scores----------
R^2 Score: -0.678
MAE: 1502.217
MAPE: 0.348
RMSE: 1855.506








----------------------------------------------------------------------------------------------------
Medicine:3400891235203
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.747
----------Test scores----------
R^2 Score: 0.134
MAE: 1814.223
MAPE: 35.325
RMSE: 1969.34








----------------------------------------------------------------------------------------------------
Medicine:3400892508566
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.747
----------Test scores----------
R^2 Score: -20.401
MAE: 2414.111
MAPE: 0.574
RMSE: 2639.252








----------------------------------------------------------------------------------------------------
Medicine:3400893826706
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.747
----------Test scores----------
R^2 Score: -0.608
MAE: 1574.543
MAPE: 0.239
RMSE: 2077.827








----------------------------------------------------------------------------------------------------
Medicine:3400892729589
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.747
----------Test scores----------
R^2 Score: -27.833
MAE: 1867.958
MAPE: 0.312
RMSE: 2490.286








----------------------------------------------------------------------------------------------------
Medicine:3400892669236
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.747
----------Test scores----------
R^2 Score: -109.332
MAE: 5979.15
MAPE: 0.582
RMSE: 6097.185








----------------------------------------------------------------------------------------------------
Medicine:3400892761527
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.747
----------Test scores----------
R^2 Score: nan
MAE: 4398.173
MAPE: 0.56
RMSE: 4398.173




R^2 score is not well-defined with less than two samples.








----------------------------------------------------------------------------------------------------
Medicine:3400892761695
Best Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -1.747
----------Test scores----------
R^2 Score: nan
MAE: 3023.173
MAPE: 0.467
RMSE: 3023.173




R^2 score is not well-defined with less than two samples.








----------------------------------------------------------------------------------------------------
Cluster: 3
Size of data set: 122
Size of training set: 108
Size of test set: 14
Size of grid search: 54
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400892761695
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 7}
Training Score (MAPE):  -0.063
----------Test scores----------
R^2 Score: -2.629
MAE: 2195.446
MAPE: 0.086
RMSE: 2268.528








----------------------------------------------------------------------------------------------------
Medicine:3400892761527
Best Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'n_estimators': 7}
Training Score (MAPE):  -0.063
----------Test scores----------
R^2 Score: 0.329
MAE: 2006.255
MAPE: 0.072
RMSE: 2513.245








----------------------------------------------------------------------------------------------------
Cluster: 1
Size of data set: 179
Size of training set: 159
Size of test set: 20
Size of grid search: 84
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400891225037
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.162
----------Test scores----------
R^2 Score: -1.133
MAE: 2729.367
MAPE: 0.153
RMSE: 3629.409








----------------------------------------------------------------------------------------------------
Medicine:3400892761695
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.162
----------Test scores----------
R^2 Score: -23.282
MAE: 3667.351
MAPE: 0.224
RMSE: 4281.21








----------------------------------------------------------------------------------------------------
Medicine:3400892669236
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.162
----------Test scores----------
R^2 Score: -9.575
MAE: 2681.267
MAPE: 0.308
RMSE: 2906.049








----------------------------------------------------------------------------------------------------
Medicine:3400893875490
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.162
----------Test scores----------
R^2 Score: -11.043
MAE: 3295.565
MAPE: 0.425
RMSE: 3690.234








----------------------------------------------------------------------------------------------------
Medicine:3400892761527
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.162
----------Test scores----------
R^2 Score: -10.976
MAE: 1143.161
MAPE: 0.094
RMSE: 1260.573








----------------------------------------------------------------------------------------------------
Medicine:3400892065366
Best Parameters:  {'max_depth': 5, 'max_features': 'sqrt', 'n_estimators': 3}
Training Score (MAPE):  -0.162
----------Test scores----------
R^2 Score: -181.672
MAE: 4420.97
MAPE: 0.565
RMSE: 4433.121








----------------------------------------------------------------------------------------------------
Cluster: 4
Size of data set: 36
Size of training set: 32
Size of test set: 4
Size of grid search: 12
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400891225037
Best Parameters:  {'max_depth': 6, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.119
----------Test scores----------
R^2 Score: 0.164
MAE: 3046.922
MAPE: 0.184
RMSE: 3299.71








----------------------------------------------------------------------------------------------------
Cluster: 5
Size of data set: 12
Size of training set: 8
Size of test set: 4
Size of grid search: 6
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400892761527
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.123
----------Test scores----------
R^2 Score: -2.055
MAE: 3469.0
MAPE: 0.105
RMSE: 4229.7








----------------------------------------------------------------------------------------------------
Medicine:3400892761695
Best Parameters:  {'max_depth': 2, 'max_features': 'sqrt', 'n_estimators': 2}
Training Score (MAPE):  -0.123
----------Test scores----------
R^2 Score: -22.892
MAE: 6683.5
MAPE: 0.297
RMSE: 6684.286









Unnamed: 0,HOSPI_CODE_UCD,CLUSTER,R2,RMSE,MAE,MAPE
0,CODE_UCD_3400892065366,0,0.433055,5243.081071,3889.309052,2.763921
1,CODE_UCD_3400891191226,0,0.665021,1679.668376,964.544618,0.214112
2,CODE_UCD_3400892729589,0,0.554111,5219.876216,3744.349213,0.320623
3,CODE_UCD_3400892052120,0,0.834798,1334.972724,1109.744214,7.590683
4,CODE_UCD_3400892745848,0,0.650616,2960.362305,2378.07112,0.270986
5,CODE_UCD_3400892508566,0,0.205655,2400.453849,1937.157071,16.16863
6,CODE_UCD_3400891235203,0,-1.017779,2891.865903,2085.948174,5.787366
7,CODE_UCD_3400892697789,0,0.51032,2003.738799,1593.147503,1.548932
8,CODE_UCD_3400892075761,0,0.65527,2183.461402,1911.80179,4.146268
9,CODE_UCD_3400890837149,0,0.757009,2875.32946,2021.133211,4.531936


In [None]:
df_prediction_scores_pca = pd.DataFrame(columns=[ 'HOSPI_CODE_UCD', 'CLUSTER', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_pca = test_2_clustering(df_clustered_pca, df_prediction_scores_pca, medicines)

df_prediction_scores_pca

In [45]:
from sklearn.neural_network import MLPRegressor

def test_2_clustering_MLPR(df, df_scores, medicines):
  for cluster in df.CLUSTER.unique():
    print()
    print(100*'-')
    print(f'Cluster: {cluster}')

    # Perform the train-test split with shuffled samples
    X_train, X_test, y_train, y_test = train_test_split_modified(df[df['CLUSTER'] == cluster].drop(['DATE', 'QUANTITY_MA', 'RESID', 'SEASONAL', 'TREND'], axis=1))
    print(f'Size of data set: {len(X_train) + len(X_test)}')
    print(f'Size of training set: {len(X_train)}')
    print(f'Size of test set: {len(X_test)}')

    df_test = pd.DataFrame(X_test, columns = df.drop(['QUANTITY','CLUSTER','DATE', 'QUANTITY_MA', 'RESID', 'SEASONAL', 'TREND'], axis=1).copy().columns)
    df_test['QUANTITY'] = y_test

    # Define the parameter grid for MLPRegressor
    param_grid = {
        'hidden_layer_sizes': [(100,), (50, 50), (20, 10)],
        'activation': ['relu', 'tanh'],
        'solver': ['adam', 'lbfgs'],
        'max_iter': [1000],
    }

    depth_len = len(param_grid['hidden_layer_sizes']) * len(param_grid['activation']) * \
                len(param_grid['solver']) * len(param_grid['max_iter'])

    print(f'Size of grid search: {depth_len}')

    # Create the MLPRegressor object
    mlp_regressor = MLPRegressor(random_state=42)

    # Create the GridSearchCV object
    grid_search = GridSearchCV(estimator=mlp_regressor, param_grid=param_grid,
                                scoring='neg_mean_absolute_percentage_error',
                                cv=5, n_jobs=-1, verbose=3)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)

    # Fit the GridSearchCV object to the data
    grid_search.fit(X_train_scaled, y_train)
    print("Finished training")

    # Get the best estimator
    best_estimator = grid_search.best_estimator_

    for medicine in df_test.HOSPI_CODE_UCD.unique():
        print()
        print(100*'-')
        print('Medicine:' + str(medicine))

        X_test_medicine = df_test[df_test['HOSPI_CODE_UCD'] == medicine].drop(['QUANTITY'], axis=1).copy().values

        X_test_scaled = scaler.transform(X_test_medicine)

        y_test_medicine = df_test[df_test['HOSPI_CODE_UCD'] == medicine]['QUANTITY'].copy().values

        # Make predictions using the best estimator
        y_pred = best_estimator.predict(X_test_scaled)

        # Calculate R^2 score
        r2 = r2_score(y_test_medicine, y_pred)

        # Calculate MAE
        mae = mean_absolute_error(y_test_medicine, y_pred)

        # Calculate MAPE
        mape = mean_absolute_percentage_error(y_test_medicine, y_pred)

        # Calculate RMSE
        rmse = np.sqrt(mean_squared_error(y_test_medicine, y_pred))

        # Print the best parameters, best score, and evaluation metrics
        print('Best Parameters: ', grid_search.best_params_)
        print('Training Score (MAPE): ', round(grid_search.best_score_, 3))
        print(10*'-' + 'Test scores' + 10*'-')
        print('R^2 Score:', round(r2, 3))
        print('MAE:', round(mae, 3))
        print('MAPE:', round(mape, 3))
        print('RMSE:', round(rmse, 3))
        print()

        # Create the new row as a DataFrame
        new_row = pd.DataFrame({'CLUSTER': [cluster],
                                'HOSPI_CODE_UCD': ['CODE_UCD_'+str(int(medicine))],
                                'R2': [r2],
                                'RMSE': [rmse],
                                'MAE': [mae],
                                'MAPE': [mape]})

        # Append the new row to the DataFrame
        df_scores = pd.concat([df_scores, new_row], ignore_index=True)

        plot_pred(y_pred, y_test_medicine, medicine)
        print()

        plot_mape(y_pred, y_test_medicine, medicine)
        print()

  # Return the updated DataFrame
  return df_scores

In [46]:
df_prediction_scores_agg = pd.DataFrame(columns=[ 'HOSPI_CODE_UCD', 'CLUSTER', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_agg = test_2_clustering_MLPR(df_agg_clusters, df_prediction_scores_agg, medicines)

df_prediction_scores_agg


----------------------------------------------------------------------------------------------------
Cluster: 0
Size of data set: 2323
Size of training set: 2082
Size of test set: 241
Size of grid search: 12
Fitting 5 folds for each of 12 candidates, totalling 60 fits
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400892065366
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.261
----------Test scores----------
R^2 Score: -1.248
MAE: 7794.453
MAPE: 1.113
RMSE: 10440.504




Stochastic Optimizer: Maximum iterations (1000) reached and the optimization hasn't converged yet.


X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400891191226
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.261
----------Test scores----------
R^2 Score: -1.854
MAE: 3951.573
MAPE: 0.958
RMSE: 4902.776




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892729589
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.261
----------Test scores----------
R^2 Score: -1.705
MAE: 10207.183
MAPE: 0.967
RMSE: 12856.667




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892052120
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.261
----------Test scores----------
R^2 Score: -0.881
MAE: 3100.007
MAPE: 1.758
RMSE: 4504.118




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892745848
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.261
----------Test scores----------
R^2 Score: -2.769
MAE: 8333.917
MAPE: 0.978
RMSE: 9723.044




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892508566
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.261
----------Test scores----------
R^2 Score: -0.909
MAE: 2622.555
MAPE: 1.492
RMSE: 3721.287




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400891235203
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.261
----------Test scores----------
R^2 Score: -0.954
MAE: 2007.083
MAPE: 4.002
RMSE: 2845.552




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892697789
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.261
----------Test scores----------
R^2 Score: -0.484
MAE: 1992.716
MAPE: 0.874
RMSE: 3488.565




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892075761
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.261
----------Test scores----------
R^2 Score: -1.033
MAE: 3790.266
MAPE: 0.954
RMSE: 5302.191




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400890837149
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.261
----------Test scores----------
R^2 Score: -0.836
MAE: 5348.123
MAPE: 1.253
RMSE: 7903.428




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400893826706
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.261
----------Test scores----------
R^2 Score: -2.116
MAE: 2770.103
MAPE: 9.939
RMSE: 3324.223




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400893022634


X does not have valid feature names, but StandardScaler was fitted with feature names




Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.261
----------Test scores----------
R^2 Score: -2.58
MAE: 3227.955
MAPE: 1.552
RMSE: 3759.274








----------------------------------------------------------------------------------------------------
Medicine:3400891225037
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.261
----------Test scores----------


X does not have valid feature names, but StandardScaler was fitted with feature names




R^2 Score: -1.711
MAE: 12979.2
MAPE: 0.975
RMSE: 16338.013








----------------------------------------------------------------------------------------------------
Medicine:3400892669236
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.261
----------Test scores----------
R^2 Score: -3.529
MAE: 19859.031
MAPE: 6.725
RMSE: 22457.853




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892761527
Best Parameters:  


X does not have valid feature names, but StandardScaler was fitted with feature names



{'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.261
----------Test scores----------
R^2 Score: -1.368
MAE: 4130.981
MAPE: 0.957
RMSE: 5417.005








----------------------------------------------------------------------------------------------------
Medicine:3400892761695



X does not have valid feature names, but StandardScaler was fitted with feature names



Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.261
----------Test scores----------
R^2 Score: -0.892
MAE: 3882.319
MAPE: 1.074
RMSE: 5622.335








----------------------------------------------------------------------------------------------------
Medicine:3400892203645



X does not have valid feature names, but StandardScaler was fitted with feature names



Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.261
----------Test scores----------
R^2 Score: -0.911
MAE: 2986.644
MAPE: 0.864
RMSE: 4325.754








----------------------------------------------------------------------------------------------------
Medicine:3400893736135



X does not have valid feature names, but StandardScaler was fitted with feature names



Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.261
----------Test scores----------
R^2 Score: -3.631
MAE: 3366.259
MAPE: 0.95
RMSE: 3801.656








----------------------------------------------------------------------------------------------------
Medicine:3400892088310



X does not have valid feature names, but StandardScaler was fitted with feature names



Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.261
----------Test scores----------
R^2 Score: -1.347
MAE: 3643.371
MAPE: 0.869
RMSE: 4809.559








----------------------------------------------------------------------------------------------------
Medicine:3400893875490
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.261
----------Test scores----------
R^2 Score: -2.038
MAE: 13262.612


X does not have valid feature names, but StandardScaler was fitted with feature names




MAPE: 0.967
RMSE: 16177.158








----------------------------------------------------------------------------------------------------
Cluster: 2
Not enough samples in cluster
Not enough samples in cluster
Size of data set: 1195
Size of training set: 1070
Size of test set: 125
Size of grid search: 12
Fitting 5 folds for each of 12 candidates, totalling 60 fits
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400891225037
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -0.944
----------Test scores----------
R^2 Score: -16.46
MAE: 9049.276
MAPE: 0.943
RMSE: 9295.05




Stochastic Optimizer: Maximum iterations (1000) reached and the optimization hasn't converged yet.


X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892075761
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -0.944
----------Test scores----------
R^2 Score: -88.781
MAE: 2020.256
MAPE: 0.828
RMSE: 2029.52




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892065366
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -0.944
----------Test scores----------
R^2 Score: -73.94
MAE: 6558.742
MAPE: 0.925
RMSE: 6597.024




X does not have valid feature names, but StandardScaler was fitted with feature names







X does not have valid feature names, but StandardScaler was fitted with feature names





----------------------------------------------------------------------------------------------------
Medicine:3400892697789
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -0.944
----------Test scores----------
R^2 Score: -13.65
MAE: 1658.191
MAPE: 0.775
RMSE: 1748.847








----------------------------------------------------------------------------------------------------
Medicine:3400892088310
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -0.944
----------Test scores----------
R^2 Score: -142.254
MAE: 4630.581
MAPE: 0.907
RMSE: 4652.385




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892745848
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -0.944
----------Test scores----------
R^2 Score: -0.974
MAE: 150.337
MAPE: 0.45
RMSE: 174.937




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892203645



X does not have valid feature names, but StandardScaler was fitted with feature names



Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -0.944
----------Test scores----------
R^2 Score: -141.242
MAE: 5042.231
MAPE: 0.914
RMSE: 5061.675








----------------------------------------------------------------------------------------------------
Medicine:3400893022634
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -0.944
----------Test scores----------
R^2 Score: -145.025
MAE: 3026.486
MAPE:


X does not have valid feature names, but StandardScaler was fitted with feature names



 0.829
RMSE: 3037.603








----------------------------------------------------------------------------------------------------
Medicine:3400890837149
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -0.944
----------Test scores----------
R^2 Score: -135.845
MAE: 3080.496
MAPE: 0.847
RMSE: 3093.032




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892052120
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -0.944
----------Test scores----------
R^2 Score: -7.376
MAE: 453.867
MAPE: 0.423
RMSE: 496.683




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400893875490
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -0.944
----------Test scores----------
R^2 Score: -109.676


X does not have valid feature names, but StandardScaler was fitted with feature names




MAE: 13180.958
MAPE: 0.964
RMSE: 13226.733








----------------------------------------------------------------------------------------------------
Medicine:3400891191226
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE): 


X does not have valid feature names, but StandardScaler was fitted with feature names



 -0.944
----------Test scores----------
R^2 Score: -32.676
MAE: 4978.144
MAPE: 0.915
RMSE: 5050.193








----------------------------------------------------------------------------------------------------
Medicine:3400893736135
Best Parameters:  


X does not have valid feature names, but StandardScaler was fitted with feature names



{'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -0.944
----------Test scores----------
R^2 Score: -8.819
MAE: 4300.249
MAPE: 0.913
RMSE: 4488.656








----------------------------------------------------------------------------------------------------
Medicine:3400891235203



X does not have valid feature names, but StandardScaler was fitted with feature names



Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -0.944
----------Test scores----------
R^2 Score: -1.038
MAE: 2315.899
MAPE: 10.361
RMSE: 3021.95








----------------------------------------------------------------------------------------------------
Medicine:3400892508566
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -0.944
----------Test scores----------
R^2 Score: -49.8
MAE: 4024.739
MAPE: 0.912
RMSE: 4066.233




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400893826706
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -0.944
----------Test scores----------
R^2 Score: -14.26
MAE: 6234.791
MAPE: 0.935
RMSE: 6400.4




X does not have valid feature names, but StandardScaler was fitted with feature names







X does not have valid feature names, but StandardScaler was fitted with feature names





----------------------------------------------------------------------------------------------------
Medicine:3400892729589
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -0.944
----------Test scores----------
R^2 Score: -135.805
MAE: 5404.472
MAPE: 0.896
RMSE: 5424.408








----------------------------------------------------------------------------------------------------
Medicine:3400892669236



X does not have valid feature names, but StandardScaler was fitted with feature names



Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -0.944
----------Test scores----------
R^2 Score: -286.051
MAE: 9816.995
MAPE: 0.952
RMSE: 9834.617








----------------------------------------------------------------------------------------------------
Medicine:3400892761527



X does not have valid feature names, but StandardScaler was fitted with feature names


R^2 score is not well-defined with less than two samples.



Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -0.944
----------Test scores----------
R^2 Score: nan
MAE: 7798.742
MAPE: 0.993
RMSE: 7798.742










X does not have valid feature names, but StandardScaler was fitted with feature names


R^2 score is not well-defined with less than two samples.




----------------------------------------------------------------------------------------------------
Medicine:3400892761695
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -0.944
----------Test scores----------
R^2 Score: nan
MAE: 6424.027
MAPE: 0.992
RMSE: 6424.027








----------------------------------------------------------------------------------------------------
Cluster: 3
Size of data set: 122
Size of training set: 108
Size of test set: 14
Size of grid search: 12
Fitting 5 folds for each of 12 candidates, totalling 60 fits
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400892761695
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'lbfgs'}
Training Score (MAPE):  -0.083
----------Test scores----------
R^2 Score: -3.496
MAE: 2058.091
MAPE: 0.082
RMSE: 2524.826




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892761527
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'lbfgs'}
Training Score (MAPE):  -0.083
----------Test scores----------
R^2 Score: 0.105
MAE: 2555.299
MAPE: 0.089
RMSE: 2902.062




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Cluster: 1
Size of data set: 179
Size of training set: 159
Size of test set: 20
Size of grid search: 12
Fitting 5 folds for each of 12 candidates, totalling 60 fits
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400891225037
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'lbfgs'}
Training Score (MAPE):  -0.22
----------Test scores----------
R^2 Score: -2.662
MAE: 4054.748
MAPE: 0.238
RMSE: 4755.65




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892761695
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'lbfgs'}
Training Score (MAPE):  -0.22
----------Test scores----------
R^2 Score: -24.932
MAE: 4338.081
MAPE: 0.267
RMSE: 4424.226




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892669236
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'lbfgs'}
Training Score (MAPE):  -0.22
----------Test scores----------
R^2 Score: -8.729
MAE: 2512.284
MAPE: 0.281
RMSE: 2787.319




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400893875490
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'lbfgs'}
Training Score (MAPE):  -0.22
----------Test scores----------
R^2 Score: -5.298
MAE: 2428.329
MAPE: 0.309
RMSE: 2668.774




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892761527
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'lbfgs'}
Training Score (MAPE):  -0.22
----------Test scores----------
R^2 Score: -0.568
MAE: 407.624
MAPE: 0.034
RMSE: 456.105




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892065366
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'lbfgs'}
Training Score (MAPE):  -0.22
----------Test scores----------
R^2 Score: -140.094
MAE: 3882.252
MAPE: 0.496
RMSE: 3896.084




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Cluster: 4
Size of data set: 36
Size of training set: 32
Size of test set: 4
Size of grid search: 12
Fitting 5 folds for each of 12 candidates, totalling 60 fits
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400891225037
Best Parameters:  {'activation': 'relu', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'lbfgs'}
Training Score (MAPE):  -0.155
----------Test scores----------
R^2 Score: -0.278
MAE: 3880.176
MAPE: 0.228
RMSE: 4080.652




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Cluster: 5
Size of data set: 12
Size of training set: 8
Size of test set: 4
Size of grid search: 12
Fitting 5 folds for each of 12 candidates, totalling 60 fits
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400892761527
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'lbfgs'}
Training Score (MAPE):  -0.088
----------Test scores----------
R^2 Score: -2.867
MAE: 4097.379
MAPE: 0.125
RMSE: 4758.667




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892761695
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'lbfgs'}
Training Score (MAPE):  -0.088
----------Test scores----------
R^2 Score: -12.27
MAE: 4790.121
MAPE: 0.217
RMSE: 4981.498




X does not have valid feature names, but StandardScaler was fitted with feature names









Unnamed: 0,HOSPI_CODE_UCD,CLUSTER,R2,RMSE,MAE,MAPE
0,CODE_UCD_3400892065366,0,-1.248076,10440.504077,7794.453363,1.113324
1,CODE_UCD_3400891191226,0,-1.854001,4902.775939,3951.572621,0.958188
2,CODE_UCD_3400892729589,0,-1.704977,12856.667327,10207.182505,0.967373
3,CODE_UCD_3400892052120,0,-0.880574,4504.118133,3100.00651,1.757897
4,CODE_UCD_3400892745848,0,-2.768928,9723.044149,8333.916765,0.978477
5,CODE_UCD_3400892508566,0,-0.909013,3721.287473,2622.555379,1.491649
6,CODE_UCD_3400891235203,0,-0.953666,2845.55179,2007.083432,4.002285
7,CODE_UCD_3400892697789,0,-0.484307,3488.565471,1992.715509,0.873539
8,CODE_UCD_3400892075761,0,-1.032818,5302.191148,3790.266332,0.954298
9,CODE_UCD_3400890837149,0,-0.835884,7903.42805,5348.12347,1.252839


In [None]:
df_prediction_scores_pca = pd.DataFrame(columns=[ 'HOSPI_CODE_UCD', 'CLUSTER', 'R2', 'RMSE', 'MAE', 'MAPE'])

df_prediction_scores_pca = test_2_clustering_MLPR(df_clustered_pca, df_prediction_scores_pca, medicines)

df_prediction_scores_pca


----------------------------------------------------------------------------------------------------
Cluster: 0
Size of data set: 2440
Size of training set: 2180
Size of test set: 260
Size of grid search: 12
Fitting 5 folds for each of 12 candidates, totalling 60 fits
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400891225037.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -7.322333777874806e+16
----------Test scores----------
R^2 Score: -0.747
MAE: 6260.384
MAPE: 1.3042869419241098e+17
RMSE: 9426.293




Stochastic Optimizer: Maximum iterations (1000) reached and the optimization hasn't converged yet.


X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892745848.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -7.322333777874806e+16
----------Test scores----------
R^2 Score: -0.59
MAE: 2287.385
MAPE: 1.3043351503823277e+17
RMSE: 3595.572




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400893736135.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -7.322333777874806e+16
----------Test scores----------
R^2 Score: -0.805
MAE: 1622.868
MAPE: 1.302657072220243e+17
RMSE: 2298.507




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892761527.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -7.322333777874806e+16
----------Test scores----------
R^2 Score: -0.622
MAE: 4175.274
MAPE: 1.3036329130566912e+17
RMSE: 6601.419




X does not have valid feature names, but StandardScaler was fitted with feature names







X does not have valid feature names, but StandardScaler was fitted with feature names





----------------------------------------------------------------------------------------------------
Medicine:3400893022634.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -7.322333777874806e+16
----------Test scores----------
R^2 Score: -0.598
MAE: 1599.066
MAPE: 4.351415384354005e+16
RMSE: 2453.827








----------------------------------------------------------------------------------------------------
Medicine:3400893875490.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -7.322333777874806e+16
----------Test scores----------
R^2 Score: -0.577
MAE: 3249.056
MAPE: 1.3025119750883058e+17
RMSE: 5193.785




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892669236.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -7.322333777874806e+16
----------Test scores----------
R^2 Score: -0.607
MAE: 3186.755
MAPE: 4.349556647377175e+16
RMSE: 5072.759




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400890837149.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -7.322333777874806e+16
----------Test scores----------
R^2 Score: -1.808
MAE: 1758.831
MAPE: 4.35776759966149e+16
RMSE: 2124.669




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400891191226.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -7.322333777874806e+16
----------Test scores----------
R^2 Score: -1.124
MAE: 1460.657
MAPE: 273702170766550.16
RMSE: 1952.474




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892065366.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -7.322333777874806e+16
----------Test scores----------
R^2 Score: -1.332
MAE: 2877.271
MAPE: 4.348464958865238e+16
RMSE: 3729.386




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892761695.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -7.322333777874806e+16
----------Test scores----------
R^2 Score: -0.332
MAE: 3336.419
MAPE: 4.354174845012771e+16
RMSE: 6583.067




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400893826706.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -7.322333777874806e+16
----------Test scores----------
R^2 Score: -1.581
MAE: 1660.839
MAPE: 136264200036086.86
RMSE: 2083.733




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892203645.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -7.322333777874806e+16
----------Test scores----------
R^2 Score: -0.875
MAE: 1164.283
MAPE: 4.3537217995863704e+16
RMSE: 1616.649




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892508566.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -7.322333777874806e+16
----------Test scores----------
R^2 Score: -0.503
MAE: 1184.602
MAPE: 1.3019839780932565e+17
RMSE: 1869.71




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400891235203.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -7.322333777874806e+16
----------Test scores----------
R^2 Score: -0.322
MAE: 541.95
MAPE: 1.7376985051173725e+17
RMSE: 852.319




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892697789.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -7.322333777874806e+16
----------Test scores----------
R^2 Score: -1.112
MAE: 596.79
MAPE: 4.348054339608494e+16
RMSE: 751.507




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892088310.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -7.322333777874806e+16
----------Test scores----------
R^2 Score: -0.468
MAE: 985.538
MAPE: 1.3034321155421626e+17
RMSE: 1551.607




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892729589.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -7.322333777874806e+16
----------Test scores----------
R^2 Score: -2.627
MAE: 3273.549
MAPE: 4.352093580321864e+16
RMSE: 3800.782




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892075761.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -7.322333777874806e+16
----------Test scores----------
R^2 Score: -0.322
MAE: 455.732
MAPE: 4.356956366451137e+16
RMSE: 723.767




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892052120.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -7.322333777874806e+16
----------Test scores----------
R^2 Score: -0.271
MAE: 687.363
MAPE: 1.737212497428749e+17
RMSE: 1190.591




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Cluster: 3
Size of data set: 1580
Size of training set: 1420
Size of test set: 160
Size of grid search: 12
Fitting 5 folds for each of 12 candidates, totalling 60 fits
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400892745848.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.2527465018069748e+16
----------Test scores----------
R^2 Score: -0.396
MAE: 205.703
MAPE: 356124549718216.5
RMSE: 302.341




Stochastic Optimizer: Maximum iterations (1000) reached and the optimization hasn't converged yet.


X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400893736135.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.2527465018069748e+16
----------Test scores----------
R^2 Score: -5.773
MAE: 4827.491
MAPE: 527677041235386.4
RMSE: 5216.092




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892075761.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.2527465018069748e+16
----------Test scores----------
R^2 Score: -0.802
MAE: 1240.803
MAPE: 1.0607465146208064e+17
RMSE: 1753.315




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892697789.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.2527465018069748e+16
----------Test scores----------
R^2 Score: -2.389
MAE: 1386.927
MAPE: 5.259429686898969e+16
RMSE: 1609.812




X does not have valid feature names, but StandardScaler was fitted with feature names







X does not have valid feature names, but StandardScaler was fitted with feature names





----------------------------------------------------------------------------------------------------
Medicine:3400891235203.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.2527465018069748e+16
----------Test scores----------
R^2 Score: -0.53
MAE: 1150.249
MAPE: 1.757
RMSE: 1918.831








----------------------------------------------------------------------------------------------------
Medicine:3400892729589.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.2527465018069748e+16
----------Test scores----------
R^2 Score: -90.19
MAE: 5901.999
MAPE: 0.984
RMSE: 5934.628




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400891225037.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.2527465018069748e+16
----------Test scores----------
R^2 Score: -26.413
MAE: 10096.999
MAPE: 0.99
RMSE: 10286.358




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892669236.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.2527465018069748e+16
----------Test scores----------
R^2 Score: -0.965
MAE: 5280.141
MAPE: 1.5713997769735027e+17
RMSE: 7418.733




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400891191226.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.2527465018069748e+16
----------Test scores----------
R^2 Score: -2.572
MAE: 4084.138
MAPE: 625411270696816.8
RMSE: 4785.19




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892203645.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.2527465018069748e+16
----------Test scores----------
R^2 Score: -39.444
MAE: 5054.499
MAPE: 0.981
RMSE: 5118.17




X does not have valid feature names, but StandardScaler was fitted with feature names







X does not have valid feature names, but StandardScaler was fitted with feature names





----------------------------------------------------------------------------------------------------
Medicine:3400893022634.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.2527465018069748e+16
----------Test scores----------
R^2 Score: -1.465
MAE: 2131.761
MAPE: 1.047633076358537e+17
RMSE: 2689.617








----------------------------------------------------------------------------------------------------
Medicine:3400893826706.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.2527465018069748e+16
----------Test scores----------
R^2 Score: -2.889
MAE: 5917.124
MAPE: 5.2917344173437e+16
RMSE: 6824.541




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892508566.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.2527465018069748e+16
----------Test scores----------
R^2 Score: -212.898
MAE: 5045.749
MAPE: 0.982
RMSE: 5057.585




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892088310.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.2527465018069748e+16
----------Test scores----------
R^2 Score: -6.138
MAE: 4449.4
MAPE: 119196164793241.36
RMSE: 4785.499




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892761527.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.2527465018069748e+16
----------Test scores----------
R^2 Score: -6.887
MAE: 25683.442
MAPE: 305005812547624.9
RMSE: 27472.265




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400890837149.0



X does not have valid feature names, but StandardScaler was fitted with feature names



Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.2527465018069748e+16
----------Test scores----------
R^2 Score: -6.34
MAE: 2958.676
MAPE: 232356625961884.88
RMSE: 3170.697








----------------------------------------------------------------------------------------------------
Medicine:3400892761695.0



X does not have valid feature names, but StandardScaler was fitted with feature names



Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.2527465018069748e+16
----------Test scores----------
R^2 Score: -375.062
MAE: 26710.999
MAPE: 0.997
RMSE: 26746.584








----------------------------------------------------------------------------------------------------
Medicine:3400892065366.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.2527465018069748e+16
----------Test scores----------
R^2 Score: -2.818
MAE: 5477.928
MAPE: 5.3161528254295864e+16
RMSE: 6335.267




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892052120.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.2527465018069748e+16
----------Test scores----------
R^2 Score: -1.166
MAE: 957.266
MAPE: 5.242881265931915e+16
RMSE: 1239.728




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400893875490.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.2527465018069748e+16
----------Test scores----------
R^2 Score: -143.72
MAE: 13778.499
MAPE: 0.993
RMSE: 13826.351




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Cluster: 2
Size of data set: 1580
Size of training set: 1420
Size of test set: 160
Size of grid search: 12
Fitting 5 folds for each of 12 candidates, totalling 60 fits
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400892745848.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.283021622815903e+16
----------Test scores----------
R^2 Score: -0.236
MAE: 2701.242
MAPE: 538735670762277.25
RMSE: 5408.987




Stochastic Optimizer: Maximum iterations (1000) reached and the optimization hasn't converged yet.


X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400893736135.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.283021622815903e+16
----------Test scores----------
R^2 Score: -0.442
MAE: 2942.038
MAPE: 1598031103843717.5
RMSE: 4808.885




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892075761.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.283021622815903e+16
----------Test scores----------
R^2 Score: -0.798
MAE: 4397.38
MAPE: 610551028340027.0
RMSE: 6273.021




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892697789.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.283021622815903e+16
----------Test scores----------
R^2 Score: -0.203
MAE: 2199.393
MAPE: 3470669580826802.5
RMSE: 4507.312




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400891235203.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.283021622815903e+16
----------Test scores----------
R^2 Score: -1.994
MAE: 4582.351
MAPE: 1055103467068761.2
RMSE: 5481.433




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892729589.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.283021622815903e+16
----------Test scores----------
R^2 Score: -5.639
MAE: 14741.456
MAPE: 129072857205746.86
RMSE: 15938.134




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400891225037.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.283021622815903e+16
----------Test scores----------
R^2 Score: -2.79
MAE: 18173.757


X does not have valid feature names, but StandardScaler was fitted with feature names




MAPE: 1761639208322394.0
RMSE: 21059.103








----------------------------------------------------------------------------------------------------
Medicine:3400892669236.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.283021622815903e+16
----------Test scores----------
R^2 Score: -0.523
MAE: 8079.307
MAPE: 559621869941127.9
RMSE: 13331.357




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400891191226.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.283021622815903e+16
----------Test scores----------
R^2 Score: -0.806


X does not have valid feature names, but StandardScaler was fitted with feature names




MAE: 4124.822
MAPE: 352529060443731.0
RMSE: 5845.944








----------------------------------------------------------------------------------------------------
Medicine:3400892203645.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.283021622815903e+16
----------Test scores----------
R^2 Score: -0.686
MAE: 4442.375
MAPE: 590370292690413.5
RMSE: 6621.193




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400893022634.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.283021622815903e+16
----------Test scores----------
R^2 Score: 0.06
MAE: 561.763
MAPE: 2.7310342822732256e+17
RMSE: 1059.406




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400893826706.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.283021622815903e+16
----------Test scores----------
R^2 Score: -0.069
MAE: 890.065
MAPE: 870317424191170.8
RMSE: 1780.841




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892508566.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.283021622815903e+16
----------Test scores----------
R^2 Score: -0.351
MAE: 1925.771
MAPE: 1520548754433600.2
RMSE: 3203.005




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892088310.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.283021622815903e+16
----------Test scores----------
R^2 Score: -0.786
MAE: 3847.575
MAPE: 2.660759974462094e+17
RMSE: 5383.713




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892761527.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.283021622815903e+16
----------Test scores----------
R^2 Score: -0.693
MAE: 3292.588
MAPE: 1547919237876395.5
RMSE: 4799.179




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400890837149.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.283021622815903e+16
----------Test scores----------
R^2 Score: -0.249


X does not have valid feature names, but StandardScaler was fitted with feature names




MAE: 3646.512
MAPE: 1755930251632270.0
RMSE: 7425.418








----------------------------------------------------------------------------------------------------
Medicine:3400892761695.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.283021622815903e+16
----------Test scores----------
R^2 Score: -0.795
MAE: 4188.708
MAPE: 963582234126671.0
RMSE: 5965.195


X does not have valid feature names, but StandardScaler was fitted with feature names












----------------------------------------------------------------------------------------------------
Medicine:3400892065366.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.283021622815903e+16
----------Test scores----------
R^2 Score: -0.897
MAE: 9977.57
MAPE: 1467486250774029.0
RMSE: 14200.438




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892052120.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.283021622815903e+16
----------Test scores----------
R^2 Score: -0.217
MAE: 2217.706
MAPE: 1502751438349080.0
RMSE: 4435.258




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400893875490.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (50, 50), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -2.283021622815903e+16
----------Test scores----------
R^2 Score: -5.998
MAE: 19455.303
MAPE: 1251172710687.859
RMSE: 20958.012




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Cluster: 5
Size of data set: 238
Size of training set: 198
Size of test set: 40
Size of grid search: 12
Fitting 5 folds for each of 12 candidates, totalling 60 fits
Finished training

----------------------------------------------------------------------------------------------------
Medicine:3400892508566.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.7350600623423034e+16
----------Test scores----------
R^2 Score: -0.976
MAE: 1322.372
MAPE: 1213582209192067.2
RMSE: 1869.736




Stochastic Optimizer: Maximum iterations (1000) reached and the optimization hasn't converged yet.


X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892203645.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.7350600623423034e+16
----------Test scores----------
R^2 Score: -0.96
MAE: 773.892
MAPE: 3.0597830286323624e+16
RMSE: 1084.883




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892088310.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.7350600623423034e+16
----------Test scores----------
R^2 Score: -1.019
MAE: 1567.797
MAPE: 1.204
RMSE: 2187.419




X does not have valid feature names, but StandardScaler was fitted with feature names







X does not have valid feature names, but StandardScaler was fitted with feature names





----------------------------------------------------------------------------------------------------
Medicine:3400892745848.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.7350600623423034e+16
----------Test scores----------
R^2 Score: -0.988
MAE: 2700.375
MAPE: 1.4779600297059648e+16
RMSE: 3814.269








----------------------------------------------------------------------------------------------------
Medicine:3400891225037.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.7350600623423034e+16
----------Test scores----------
R^2 Score: -0.996
MAE: 8509.877
MAPE: 1.2542554106422636e+16
RMSE: 12030.846




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892075761.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.7350600623423034e+16
----------Test scores----------
R^2 Score: -0.958
MAE: 745.806
MAPE: 2.344943708730836e+16
RMSE: 1047.392




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400893826706.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.7350600623423034e+16
----------Test scores----------
R^2 Score: -0.973
MAE: 1153.753
MAPE: 2.5492402176537668e+16
RMSE: 1623.667




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400893875490.0



X does not have valid feature names, but StandardScaler was fitted with feature names



Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.7350600623423034e+16
----------Test scores----------
R^2 Score: -0.993
MAE: 4553.252
MAPE: 2.7735080412438544e+16
RMSE: 6430.568








----------------------------------------------------------------------------------------------------
Medicine:3400891235203.0



X does not have valid feature names, but StandardScaler was fitted with feature names



Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.7350600623423034e+16
----------Test scores----------
R^2 Score: -653.817
MAE: 1534.191
MAPE: 0.99
RMSE: 1535.364








----------------------------------------------------------------------------------------------------
Medicine:3400892729589.0



X does not have valid feature names, but StandardScaler was fitted with feature names



Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.7350600623423034e+16
----------Test scores----------
R^2 Score: -0.871
MAE: 235.036
MAPE: 3.558092951701572e+16
RMSE: 321.412








----------------------------------------------------------------------------------------------------
Medicine:3400892052120.0
Best Parameters: 


X does not have valid feature names, but StandardScaler was fitted with feature names



 {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.7350600623423034e+16
----------Test scores----------
R^2 Score: -0.97
MAE: 1044.682
MAPE: 3.4135320127012292e+16
RMSE: 1466.723








----------------------------------------------------------------------------------------------------
Medicine:3400892761695.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.7350600623423034e+16
----------Test scores----------
R^2 Score: -1.003
MAE: 8656.726
MAPE: 0.737
RMSE: 12232.373




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400893736135.0



X does not have valid feature names, but StandardScaler was fitted with feature names



Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.7350600623423034e+16
----------Test scores----------
R^2 Score: -0.985
MAE: 2083.105
MAPE: 3.3835593861819196e+16
RMSE: 2935.35








----------------------------------------------------------------------------------------------------
Medicine:3400890837149.0



X does not have valid feature names, but StandardScaler was fitted with feature names



Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.7350600623423034e+16
----------Test scores----------
R^2 Score: 0.0
MAE: 11.474
MAPE: 5.167618693241123e+16
RMSE: 11.475








----------------------------------------------------------------------------------------------------
Medicine:3400893022634.0



X does not have valid feature names, but StandardScaler was fitted with feature names



Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.7350600623423034e+16
----------Test scores----------
R^2 Score: -17.702
MAE: 4091.686
MAPE: 0.996
RMSE: 4205.668








----------------------------------------------------------------------------------------------------
Medicine:3400892065366.0



X does not have valid feature names, but StandardScaler was fitted with feature names



Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.7350600623423034e+16
----------Test scores----------
R^2 Score: -1.365
MAE: 165.839
MAPE: 1.151
RMSE: 203.787








----------------------------------------------------------------------------------------------------
Medicine:3400892761527.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.7350600623423034e+16
----------Test scores----------



X does not have valid feature names, but StandardScaler was fitted with feature names



R^2 Score: -1.94
MAE: 7388.705
MAPE: 0.996
RMSE: 9095.628








----------------------------------------------------------------------------------------------------
Medicine:3400892669236.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.7350600623423034e+16
----------Test scores----------
R^2 Score: -2.002
MAE: 35.586
MAPE: 8796939694974705.0
RMSE: 47.644




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400891191226.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.7350600623423034e+16
----------Test scores----------
R^2 Score: -1.138
MAE: 118.357
MAPE: 1.277
RMSE: 146.203




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Medicine:3400892697789.0
Best Parameters:  {'activation': 'tanh', 'hidden_layer_sizes': (20, 10), 'max_iter': 1000, 'solver': 'adam'}
Training Score (MAPE):  -1.7350600623423034e+16
----------Test scores----------
R^2 Score: 0.0
MAE: 8.213
MAPE: 3.698860866973618e+16
RMSE: 8.376




X does not have valid feature names, but StandardScaler was fitted with feature names








----------------------------------------------------------------------------------------------------
Cluster: 1
Size of data set: 244
Size of training set: 204
Size of test set: 40
Size of grid search: 12
Fitting 5 folds for each of 12 candidates, totalling 60 fits
