In [1]:
import os
import io
import sys
import json
import time
import random
import logging

import numpy as np
import pandas as pd
import datetime

import plotly.io as pio
import streamlit as st
import seaborn as sns
import zipfile

import plotly.graph_objects as go
from sklearn.metrics import mean_squared_error

from utils.manager.login import *
from utils.inputs.validation import *
from utils.inputs.preprocess import *
from utils.inputs.ads import *
from utils.modeling.search import *
from utils.modeling.general import *
from utils.modeling.plot import *
from utils.analysis.tables import *
from utils.analysis.plot import *

# Set up the logging configuration for cmdstanpy
logger = logging.getLogger()

# Add NullHandler with CRITICAL log level
null_handler = logging.NullHandler()
null_handler.setLevel(logging.CRITICAL)
logger.addHandler(null_handler)

# Add StreamHandler with INFO log level
stream_handler = logging.StreamHandler()
stream_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
stream_handler.setLevel(logging.INFO)
logger.addHandler(stream_handler)

logger.propagate = False

## Inputs

In [2]:
# Add dropdown for Country
country_name = "CA"

# Add radio for weekend
weekend_drop = False

# Add dropdown for frequency
forecast_freq = "D"

# Add dropdown for data selection
data_selection = True

# Add dropdown for data selection
external_features = True

# Add file uploader to the sidebar
uploaded_file = 'Agency Services_multi.csv'

In [3]:
if forecast_freq == "D":
    forecast_period = 92
elif forecast_freq == "W":
    forecast_period = 26
elif forecast_freq == "M":
    forecast_period = 12

## Validation

In [4]:
try:
    # Validate the input file
    data_full, error_message = validate_input_file(uploaded_file, external_features)
    logging.info(f"Data Size: {data_full.shape}")
    if error_message:
        raise ValueError(error_message)
except Exception as e:
    # Log this exception or handle it further up the call stack
    raise ValueError(f"An error occurred while validating the file: {str(e)}")

In [5]:
data_full

Unnamed: 0,ds,y,Insurance_Actual
0,2021-01-01,0.0,9315434.00
1,2021-02-01,0.0,9315434.00
2,2021-03-01,0.0,9315434.00
3,2021-04-01,228.0,9315434.00
4,2021-05-01,186.0,9315434.00
...,...,...,...
1426,2024-11-27,,31824538.23
1427,2024-11-28,,31824538.23
1428,2024-11-29,,31824538.23
1429,2024-11-30,,31824538.23


## Processing

In [6]:
try:
    # Process the input file
    train_full, forecast_full, error_message = process_input_file(data_full)
    logging.info(f"Train Data Size: {train_full.shape}")
    logging.info(f"Forecast Data Size: {forecast_full.shape}")
    if error_message:
        raise ValueError(error_message)
except Exception as e:
    # Log this exception or handle it further up the call stack
    raise ValueError(f"An error occurred while processing the file: {str(e)}")

In [7]:
train_full

Unnamed: 0,ds,y,Insurance_Actual
0,2021-01-01,0.0,9315434.0
1,2021-01-02,232.0,6393191.0
2,2021-01-03,190.0,6619175.0
3,2021-01-04,116.0,7161452.0
4,2021-01-05,0.0,7674701.0
...,...,...,...
1066,2023-12-03,1.0,19675217.0
1067,2023-12-04,286.0,15853787.0
1068,2023-12-05,398.0,16167232.0
1069,2023-12-06,410.0,16550565.0


In [8]:
forecast_full

Unnamed: 0,ds,y,Insurance_Actual
0,2023-01-08,0.0,19162757.00
1,2023-01-09,0.0,21046220.00
2,2023-01-10,0.0,23271787.00
3,2023-01-11,0.0,22844500.00
4,2023-01-12,0.0,24809700.00
...,...,...,...
699,2024-12-07,0.0,23964801.09
700,2024-12-08,0.0,25984869.89
701,2024-12-09,0.0,28553607.89
702,2024-12-10,0.0,32407222.93


## Automated Data Selection

In [9]:
try:
    if data_selection:
        
        # Find optimal window 
        optimal_window_size, error_message = find_optimal_window(train_full)

        if error_message:
            raise ValueError(error_message)
            
        logging.info(f"Optimal Window Size: {optimal_window_size}")

        # Add 180 days for feature engineering to optimal window
        optimal_window_size += 180

        # Truncate the train set based on optimal window
        train_optimal = train_full[optimal_window_size:].copy(deep=True)
        
        logging.info(f"Optimal Train Data Size: {train_optimal.shape}")

        # Find the min data for optimal train data
        optimal_window_date = train_optimal['ds'].min()
    else:
        # Truncate the train set based on optimal window
        train_optimal = train_full.copy(deep=True)
        
        logging.info(f"Optimal Train Data Size: {train_optimal.shape}")

        # Find the min data for optimal train data
        optimal_window_date = train_optimal['ds'].min()
except Exception as e:
    # Log this exception or handle it further up the call stack
    raise ValueError(f"An error occurred while finding the optimal window: {str(e)}")

3090 180
 60
180 360
180 270
60 90
90 120
270 540
360 540
270 360
120 150
150 180
360 720
540 720
360 450
180 210
720 900
450 540
210 240
540 810
720 1071
240 270
900 1071
270 300
300 330
540 630
330 360
360 390
810 1071
390 420
630 720
420 450
720 810
450 480
480 510
510 540
540 570
810 900
570 600
600 630
630 660
660 690
900 990
690 720
720 750
750 780
990 1071
780 810
810 840
840 870
870 900
900 930
930 960
960 990
990 1020
1020 1050
1050 1071


In [10]:
optimal_window_size

450

In [11]:
train_optimal

Unnamed: 0,ds,y,Insurance_Actual
450,2022-03-27,0.0,20450979.25
451,2022-03-28,367.0,20450979.25
452,2022-03-29,371.0,20450979.25
453,2022-03-30,386.0,20450979.25
454,2022-03-31,321.0,20450979.25
...,...,...,...
1066,2023-12-03,1.0,19675217.00
1067,2023-12-04,286.0,15853787.00
1068,2023-12-05,398.0,16167232.00
1069,2023-12-06,410.0,16550565.00


In [12]:
error_message