In [1]:
import os
import io
import sys
import json
import time
import random
import logging

import numpy as np
import pandas as pd
import datetime

import plotly.io as pio
import streamlit as st
import seaborn as sns
import zipfile

import plotly.graph_objects as go
from sklearn.metrics import mean_squared_error

from utils.manager.login import *
from utils.inputs.validation import *
from utils.inputs.preprocess import *
from utils.inputs.ads import *
from utils.modeling.search import *
from utils.modeling.general import *
from utils.modeling.plot import *
from utils.analysis.tables import *
from utils.analysis.plot import *

# Set up the logging configuration for cmdstanpy
logger = logging.getLogger()

# Add NullHandler with CRITICAL log level
null_handler = logging.NullHandler()
null_handler.setLevel(logging.CRITICAL)
logger.addHandler(null_handler)

# Add StreamHandler with INFO log level
stream_handler = logging.StreamHandler()
stream_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
stream_handler.setLevel(logging.INFO)
logger.addHandler(stream_handler)

logger.propagate = False

## Inputs

In [2]:
# Add dropdown for Country
country_name = "CA"

# Add radio for weekend
weekend_drop = False

# Add dropdown for frequency
forecast_freq = "D"

# Add dropdown for data selection
data_selection = True

# Add dropdown for data selection
external_features = False

# Add file uploader to the sidebar
uploaded_file = 'Agency Services.csv'

In [3]:
if forecast_freq == "D":
    forecast_period = 92
elif forecast_freq == "W":
    forecast_period = 26
elif forecast_freq == "M":
    forecast_period = 12

## Validation

In [4]:
try:
    # Validate the input file
    data_full, error_message = validate_input_file(uploaded_file, external_features)
    logging.info(f"Data Size: {data_full.shape}")
    if error_message:
        raise ValueError(error_message)
except Exception as e:
    # Log this exception or handle it further up the call stack
    raise ValueError(f"An error occurred while validating the file: {str(e)}")

## Processing

In [5]:
try:
    # Process the input file
    train_full, forecast_full, error_message = process_input_file(data_full)
    logging.info(f"Train Data Size: {train_full.shape}")
    logging.info(f"Forecast Data Size: {forecast_full.shape}")
    if error_message:
        raise ValueError(error_message)
except Exception as e:
    # Log this exception or handle it further up the call stack
    raise ValueError(f"An error occurred while processing the file: {str(e)}")

## Automated Data Selection

In [6]:
try:
    if data_selection:
        
        # Find optimal window 
        optimal_window_size, error_message = find_optimal_window(train_full)
        
        logging.info(f"Optimal Window Size: {optimal_window_size}")

        if error_message:
            raise ValueError(error_message)

        # Add 180 days for feature engineering to optimal window
        optimal_window_size += 180

        # Truncate the train set based on optimal window
        train_optimal = train_full[optimal_window_size:].copy(deep=True)
        
        logging.info(f"Optimal Train Data Size: {train_optimal.shape}")

        # Find the min data for optimal train data
        optimal_window_date = train_optimal['ds'].min()
    else:
        # Truncate the train set based on optimal window
        train_optimal = train_full.copy(deep=True)
        
        logging.info(f"Optimal Train Data Size: {train_optimal.shape}")

        # Find the min data for optimal train data
        optimal_window_date = train_optimal['ds'].min()
except Exception as e:
    # Log this exception or handle it further up the call stack
    raise ValueError(f"An error occurred while finding the optimal window: {str(e)}")