In [9]:
# Import libraries 
import os
import pandas as pd
import numpy as np
import json

import mlflow

import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

from pathlib import Path
from functools import reduce

from datetime import datetime
from hts import HTSRegressor
import hts.functions
import collections
from hts.hierarchy import HierarchyTree
from sklearn.metrics import mean_squared_error

import warnings
warnings.simplefilter("ignore")

# settings
plt.style.use('seaborn')
plt.rcParams["figure.figsize"] = (20, 8)

## Utility functions 

In [34]:
# function to fix the ags 
def fix_ags5(x):
    if len((str(x))) == 4:
        return '0' + str(x)
    else: 
        return x

In [83]:
# Function to add the column to the main data
def add_column_to_main_data(data, cluster_data, col_name):
    
    cluster_data['ags5'] = cluster_data['ags5'].apply(fix_ags5)
    
    # Get the cluster and ags 5 and set ags 5 as index 
    cluster_info = cluster_data.set_index('ags5').to_dict()[col_name]
    
    data['cluster'] = '0'
    data['cluster'] = data['ags5'].map(cluster_info)

    # check if the cluster have been allotted correctly 
    print("original cluster data")
    print(cluster_data[col_name].value_counts())
    print("New data")
    print(data.drop_duplicates(subset=['ags5'])['cluster'].value_counts())
    
    return data 

## Read the data

In [74]:
# Read the data
df = pd.read_csv('data_from_2010_to_2019_unemployment_rate.csv', converters={'ags2': str, 'ags5': str})
df.shape

(48120, 3)

In [75]:
df.head()

Unnamed: 0,ags5,date,unemployment_rate
0,1001,2010-01-31,13.7
1,1001,2010-02-28,14.1
2,1001,2010-03-31,13.6
3,1001,2010-04-30,13.1
4,1001,2010-05-31,12.5


In [76]:
df.tail()

Unnamed: 0,ags5,date,unemployment_rate
48115,16077,2019-08-31,7.0
48116,16077,2019-09-30,6.5
48117,16077,2019-10-31,6.5
48118,16077,2019-11-30,6.3
48119,16077,2019-12-31,6.5


## Data Preparation

In [77]:
# Add AGS 2
def get_ags2(x):
    return x[0:2]

df['ags2'] = df['ags5'].apply(get_ags2)
df.head()

Unnamed: 0,ags5,date,unemployment_rate,ags2
0,1001,2010-01-31,13.7,1
1,1001,2010-02-28,14.1,1
2,1001,2010-03-31,13.6,1
3,1001,2010-04-30,13.1,1
4,1001,2010-05-31,12.5,1


## ML Flow Experiment Setup 

In [13]:
def train_heirarchical_cluster_model(data, agregate_col, params, cluster_type="cluster"):
    
    ''' Generate a run name '''
    run_name = 'hierarchical_' + '_'.join(list(params.values())[0:2])
    
    with mlflow.start_run(run_name=run_name):
        
        # Create a list of kreis
        kreis_list = list(data['ags5'].unique())
        
        ''' Generate the dataset from the cluster with the ags and total summation '''
        print("Generating the hierarchical dataset...")
    
        # Filter Data by relevant columns 
        relevant_cols = ['ags5', 'unemployment_rate', 'date']
        relevant_cols.append(agregate_col)
        df = data[relevant_cols]
    
        # Get bottom level data - ags5
        df_ags5 = df.pivot(index="date", columns="ags5", values="unemployment_rate")
        
        # Get middle level data - aggregate_col
        df_middle = df.groupby(["date", agregate_col]).sum().reset_index(drop=False).pivot(index="date", 
                                                                           columns=agregate_col, 
                                                                           values="unemployment_rate")
        
        print(f"Got {df_middle.shape[1]} clusters..")
        
        # Get the top level data
        df_total = df.groupby("date")["unemployment_rate"].sum().to_frame().rename(columns={"unemployment_rate": "total"})
        
        # Join the data frames
        hdf = df_ags5.join(df_middle).join(df_total)

        # Set the index in datetime format
        hdf.index = pd.to_datetime(hdf.index)
        
        print("The dataset size is", hdf.shape)
        
        # Create the hierarchical cluster set 
        cluster_set = df.groupby(agregate_col)['ags5'].apply(lambda x: list(set(x))).to_dict()
        
        # Add total to the dictionary
        cluster_set['total'] = list(cluster_set.keys())
    
        ''' Model Fitting '''
        
        # Get the params
        model_type = params['model']
        rev_type = params['revision_method']
        time_steps = params['time_steps']
        
        # Divide the data into train and test sets
        train_hdf = hdf.head(len(hdf) - time_steps)
        test_hdf = hdf.tail(time_steps)
        
        print(f"Fitting the model {model_type} with revision method {rev_type}.")
        
        # Fit the model 
        hts_model = HTSRegressor(model=model_type, revision_method=rev_type, n_jobs=0)
        hts_model.fit(train_hdf, cluster_set)
        
        print(f"Predicting for the next {time_steps} time steps.")
        
        # Get the predictions 
        preds = hts_model.predict(steps_ahead=time_steps)
        
        ''' Model Evaluation '''
        
        # Get the predicted vales 
        actual_preds = preds.tail(time_steps)
        
        # Check if there are negative values in the predictions 
        negative_pred = (actual_preds < 0).values.any()
        if negative_pred:
            print("There are negative values in the predictions.")
        else: 
            print("No negative values found in the predictions")
            
        # Check if the prediction and test have the same size
        assert actual_preds.shape[0] == test_hdf.shape[0]
        
        # Calculate the mse for each kreis
        total_mse = 0
        total_rmse = 0
        for kreis in kreis_list: 
            total_mse  += mean_squared_error(y_pred=actual_preds[kreis].values, y_true=test_hdf[kreis].values, squared=True)
            total_rmse += mean_squared_error(y_pred=actual_preds[kreis].values, y_true=test_hdf[kreis].values, squared=False)
#             print(total_mse, total_rmse)
        
        # Calculate average mse 
        average_mse = total_mse/len(kreis_list)
        average_rmse = total_rmse/len(kreis_list)
        print("The average error is:", average_mse)
        
        
        ''' Log experiment details in ML Flow '''
        # Log params
        mlflow.log_params(params)
        mlflow.log_params(internal_params)
        mlflow.log_param("Cluster Type", cluster_type)
        mlflow.log_param("Cluster Set", cluster_set)
        
        # Log metrics
        mlflow.log_metric("mse", average_mse)
        mlflow.log_metric("rmse", average_rmse)
        
        negative_pred = 1 if negative_pred else 0 
        mlflow.log_metric("negative_preds", negative_pred)        
        
        return preds
        
        
        

## Model Testing and Parameter tuning

In [65]:
# Set the params 
params = {
    'model':'sarimax',
    'revision_method':'BU',
    'time_steps': 12,
    'model_params': {
        'order': (2, 2, 2)
    }
}

# Run the function 
predictions = train_heirarchical_cluster_model(data=df,
                                 agregate_col='ags2', 
                                 params=params,
                                 cluster_type="ags2")

Generating the hierarchical dataset...
Got 16 clusters..
The dataset size is (120, 418)
Fitting the model sarimax with revision method BU.


Fitting models: 100%|████████████████████████████████████████████████████████████████| 418/418 [02:24<00:00,  2.90it/s]
Fitting models:   8%|████▉                                                           | 32/418 [00:00<00:02, 163.91it/s]

Predicting for the next 12 time steps.


Fitting models: 100%|███████████████████████████████████████████████████████████████| 418/418 [00:02<00:00, 183.24it/s]


No negative values found in the predictions
The average error is: 0.7136935475057907


Revision types to the model.

* **AHP** — average historical proportions (top-down approach),
* **PHA** — proportions of historical averages (top-down approach),
* **FP** — the forecasted proportions (top-down approach),
* **OLS** — the optimal combination using OLS,
* **WLSS** - optimal combination using structurally weighted OLS,
* **WLSV** - optimal combination using variance-weighted OLS.

### Run revision iterations

In [5]:
# Run all combinations for models 
model_types = ['sarimax']
revisions = ['BU', 'AHP', 'PHA', 'FP', 'OLS', 'WLSS', 'WLSV']

# Set the params 
params = {
    'model':'sarimax',
    'revision_method':'BU',
    'time_steps': 12,
    'model_params': {
        'order': (2, 1, 2)
    }
}

for m in model_types:
    for r in revisions:
        print(f"Model: {m} and Revision: {r}")
        
        # Change params 
        params['model'] = m
        params['revision_method'] = r
        
        # Run the prediction model  
        predictions = train_heirarchical_cluster_model(data=df,
                                         agregate_col='ags2', 
                                         params=params)

Model: sarimax and Revision: BU


NameError: name 'df' is not defined

## Models with custom clusters

In [15]:
# read the pca clusters by Amit 
df = pd.read_csv('./../Prakhar_drafts/data_from_2010_to_2019_unemployment_rate.csv', converters={'ags2': str, 'ags5': str})
df2 = pd.read_csv('./../final_dfs/for_modeling/df_final_stationery.csv', converters={'cluster': str, 'ags5': str}) 
df = pd.merge(df, df2[['cluster','ags5']], on = 'ags5')

In [19]:
# Run the code for hts

# Run all combinations for models 
model_types = ['prophet']
revisions = ['WLSS']

# Set the params 
params = {
    'model':'prophet',
    'revision_method':'BU',
    'time_steps': 12,
    'model_params': {
    }
}

for m in model_types:
    for r in revisions:
        print(f"Model: {m} and Revision: {r}")
        
        # Change params 
        params['model'] = m
        params['revision_method'] = r
        
        # Run the prediction model  
        predictions = train_heirarchical_cluster_model(data=df,
                                         agregate_col='cluster', 
                                         params=params,
                                         cluster_type="clusters by Amit")

Model: prophet and Revision: WLSS
Generating the hierarchical dataset...
Got 3 clusters..
The dataset size is (120, 405)
Fitting the model prophet with revision method WLSS.


Fitting models:   0%|          | 0/405 [00:00<?, ?it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:   0%|          | 1/405 [00:00<01:05,  6.12it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:   0%|          | 2/405 [00:00<00:57,  6.97it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override t

Fitting models:   8%|▊         | 34/405 [00:03<00:45,  8.21it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:   9%|▊         | 35/405 [00:04<00:50,  7.34it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:   9%|▉         | 36/405 [00:04<00:51,  7.11it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:   9%|▉         | 37/405 [00:04<00:51,  7.12it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabli

Fitting models:  16%|█▋        | 66/405 [00:07<00:33,  9.97it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  17%|█▋        | 68/405 [00:08<00:36,  9.15it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  17%|█▋        | 69/405 [00:08<00:37,  9.08it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to

Fitting models:  24%|██▍       | 99/405 [00:13<01:01,  4.97it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  25%|██▍       | 100/405 [00:13<00:53,  5.65it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  25%|██▌       | 102/405 [00:13<00:47,  6.44it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True 

Fitting models:  32%|███▏      | 130/405 [00:18<00:43,  6.39it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  33%|███▎      | 132/405 [00:18<00:34,  7.88it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  33%|███▎      | 133/405 [00:18<00:35,  7.72it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  40%|████      | 162/405 [00:22<00:33,  7.17it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  40%|████      | 163/405 [00:22<00:32,  7.40it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  41%|████      | 165/405 [00:23<00:37,  6.47it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True

INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  49%|████▊     | 197/405 [00:25<00:16, 12.33it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  49%|████▉     | 199/405 [00:25<00:16, 12.72it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. R

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  57%|█████▋    | 231/405 [00:28<00:17,  9.94it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  58%|█████▊    | 233/405 [00:28<00:20,  8.55it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  58%|█████▊    | 234/405 [00:29<00:20,  8.18it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True

Fitting models:  65%|██████▌   | 265/405 [00:31<00:11, 12.64it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  66%|██████▌   | 267/405 [00:31<00:10, 13.78it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  66%|██████▋   | 269/405 [00:3

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  74%|███████▍  | 299/405 [00:35<00:27,  3.91it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  74%|███████▍  | 300/405 [00:36<00:25,  4.11it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  74%|███████▍  | 301/405 [00:36<00:22,  4.54it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  75%|███████▍  | 302/405 [00:36<00:20,  5.08it/s]INFO:fbprophet:Disab

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  82%|████████▏ | 332/405 [00:40<00:06, 10.71it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  82%|████████▏ | 334/405 [00:40<00:06, 10.69it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Ru

INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  90%|█████████ | 366/405 [00:43<00:03, 12.10it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  91%|█████████ | 368/405 [00:43<00:03, 12.09it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. R

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  99%|█████████▉| 400/405 [00:46<00:00,  9.10it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models:  99%|█████████▉| 401/405 [00:46<00:00,  9.12it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Fitting models: 100%|█████████▉| 403/405 [00:46<00:00, 10.18it/s]INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True

Predicting for the next 12 time steps.


Fitting models: 100%|██████████| 405/405 [10:05<00:00,  1.50s/it]
INFO:hts.core.regressor:Reconciling forecasts using <hts.revision.RevisionMethod object at 0x7fc6446175e0>


There are negative values in the predictions.
The average error is: 10.992515571281032


NameError: name 'internal_params' is not defined

### Cluster 2: k-Modes Clusters

In [2]:
# Read the data 
cluster2 = pd.read_csv('kmodes3.csv', converters={'ags5':str, 'cluster':str})
print(cluster2.shape)
cluster2.head()

FileNotFoundError: [Errno 2] No such file or directory: 'kmodes3.csv'

In [92]:
cluster2_input = add_column_to_main_data(df, cluster2, 'cluster')

original cluster data
2    190
0    114
1     97
Name: cluster, dtype: int64
New data
2    190
0    114
1     97
Name: cluster, dtype: int64


In [94]:
# Run the code for hts

# Run all combinations for models 
model_types = ['sarimax']
revisions = ['BU', 'AHP', 'PHA', 'FP', 'OLS', 'WLSS', 'WLSV']

# Set the params 
params = {
    'model':'sarimax',
    'revision_method':'BU',
    'time_steps': 12,
    'model_params': {
        'order': (2, 1, 2)
    }
}

for m in model_types:
    for r in revisions:
        print(f"Model: {m} and Revision: {r}")
        
        # Change params 
        params['model'] = m
        params['revision_method'] = r
        
        # Run the prediction model  
        predictions = train_heirarchical_cluster_model(data=cluster2_input,
                                         agregate_col='cluster', 
                                         params=params,
                                         cluster_type="clusters by Cinny kModes")

Model: sarimax and Revision: BU
Generating the hierarchical dataset...
Got 3 clusters..
The dataset size is (120, 405)
Fitting the model sarimax with revision method BU.


Fitting models: 100%|████████████████████████████████████████████████████████████████| 405/405 [03:07<00:00,  2.16it/s]
Fitting models:  10%|██████▍                                                         | 41/405 [00:00<00:01, 200.22it/s]

Predicting for the next 12 time steps.


Fitting models: 100%|███████████████████████████████████████████████████████████████| 405/405 [00:01<00:00, 206.12it/s]


No negative values found in the predictions
The average error is: 0.14641924858639638
Model: sarimax and Revision: AHP
Generating the hierarchical dataset...
Got 3 clusters..
The dataset size is (120, 405)
Fitting the model sarimax with revision method AHP.


Fitting models: 100%|████████████████████████████████████████████████████████████████| 405/405 [02:49<00:00,  2.39it/s]
Fitting models:   6%|███▋                                                            | 23/405 [00:00<00:01, 225.61it/s]

Predicting for the next 12 time steps.


Fitting models: 100%|███████████████████████████████████████████████████████████████| 405/405 [00:01<00:00, 213.18it/s]


No negative values found in the predictions
The average error is: 0.4131294997850292
Model: sarimax and Revision: PHA
Generating the hierarchical dataset...
Got 3 clusters..
The dataset size is (120, 405)
Fitting the model sarimax with revision method PHA.


Fitting models: 100%|████████████████████████████████████████████████████████████████| 405/405 [03:31<00:00,  1.92it/s]
Fitting models:   2%|█▌                                                               | 10/405 [00:00<00:03, 99.02it/s]

Predicting for the next 12 time steps.


Fitting models: 100%|███████████████████████████████████████████████████████████████| 405/405 [00:02<00:00, 146.27it/s]


No negative values found in the predictions
The average error is: 0.4528342008754722
Model: sarimax and Revision: FP
Generating the hierarchical dataset...
Got 3 clusters..
The dataset size is (120, 405)
Fitting the model sarimax with revision method FP.


Fitting models: 100%|████████████████████████████████████████████████████████████████| 405/405 [02:49<00:00,  2.39it/s]
Fitting models:   6%|███▊                                                            | 24/405 [00:00<00:01, 236.97it/s]

Predicting for the next 12 time steps.


Fitting models: 100%|███████████████████████████████████████████████████████████████| 405/405 [00:01<00:00, 263.26it/s]


(3, 120)
No negative values found in the predictions
The average error is: 26.527558187863697
Model: sarimax and Revision: OLS
Generating the hierarchical dataset...
Got 3 clusters..
The dataset size is (120, 405)
Fitting the model sarimax with revision method OLS.


Fitting models: 100%|████████████████████████████████████████████████████████████████| 405/405 [02:10<00:00,  3.10it/s]
Fitting models:   6%|███▉                                                            | 25/405 [00:00<00:01, 242.55it/s]

Predicting for the next 12 time steps.


Fitting models: 100%|███████████████████████████████████████████████████████████████| 405/405 [00:02<00:00, 176.87it/s]


There are negative values in the predictions.
The average error is: 148.35508389347328
Model: sarimax and Revision: WLSS
Generating the hierarchical dataset...
Got 3 clusters..
The dataset size is (120, 405)
Fitting the model sarimax with revision method WLSS.


Fitting models: 100%|████████████████████████████████████████████████████████████████| 405/405 [02:55<00:00,  2.31it/s]
Fitting models:   5%|███                                                             | 19/405 [00:00<00:02, 187.87it/s]

Predicting for the next 12 time steps.


Fitting models: 100%|███████████████████████████████████████████████████████████████| 405/405 [00:01<00:00, 203.87it/s]


There are negative values in the predictions.
The average error is: 1.7256816016677556
Model: sarimax and Revision: WLSV
Generating the hierarchical dataset...
Got 3 clusters..
The dataset size is (120, 405)
Fitting the model sarimax with revision method WLSV.


Fitting models: 100%|████████████████████████████████████████████████████████████████| 405/405 [02:19<00:00,  2.91it/s]
Fitting models:   5%|███▎                                                            | 21/405 [00:00<00:01, 209.29it/s]

Predicting for the next 12 time steps.


Fitting models: 100%|███████████████████████████████████████████████████████████████| 405/405 [00:01<00:00, 225.78it/s]


There are negative values in the predictions.
The average error is: 1460.5643121908786


## Cluster 3 - tsne

In [None]:
# Read the tsne data
cluster3 = pd.read_csv('kmodes3.csv', converters={'ags5':str, 'cluster':str})
print(cluster3.shape)
cluster3.head()