# Log and Outputs Dir Config 

#### DONT CHANGE

In [12]:
import logging
import os
import sys
import pandas as pd
import numpy as np
import optuna
from forecasting_pipeline import forecast_pipeline_debug, add_hist_range

class SuppressFilter(logging.Filter):
    def __init__(self, name=''):
        super().__init__(name)

    def filter(self, record):
        return not record.name.startswith(self.name)

## Set the current Notebook Path and Experiment Number

#### CHANGE

In [13]:

# --- CONFIGURATION (Stays in the notebook) ---
notebook_path ='.' # <- Outputs would be saved here #c2avi
os.chdir(notebook_path)
print(f"Current working directory: {os.getcwd()}")

EXPERIMENT_NUMBER = "Template" # <- Change this for each experiment

Current working directory: C:\Users\aksha\FastAPI


#### DONT CHANGE

In [14]:
# --- Step 2: Define the log file name and path ---
log_filename = f'exp_{EXPERIMENT_NUMBER}_pipeline.log'
log_file_path = os.path.join(notebook_path, log_filename)

# --- Step 3: Configure the logger ---
logger = logging.getLogger()
if logger.hasHandlers():
    logger.handlers.clear()

logger.setLevel(logging.INFO)
log_file = open(log_file_path, 'w', buffering=1)

# Create file and console handlers
file_handler = logging.StreamHandler(log_file)
file_handler.setLevel(logging.INFO)
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(logging.INFO)

# Create and apply the formatter
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)

# Step 2: Create an instance of our filter for 'cmdstanpy'
suppress_cmdstanpy_filter = SuppressFilter('cmdstanpy')
suppress_prophet_filter = SuppressFilter('prophet') 
# Step 3: Add the filter to the handlers
# This will stop 'cmdstanpy' logs from reaching both the file and the console output
file_handler.addFilter(suppress_cmdstanpy_filter)
console_handler.addFilter(suppress_cmdstanpy_filter)
file_handler.addFilter(suppress_prophet_filter) #<-- ADD THIS LINE
console_handler.addFilter(suppress_prophet_filter) #<-- ADD THIS LINE


# Add the handlers to the logger
logger.addHandler(file_handler)
logger.addHandler(console_handler)

logger.info(f"Logging configured. Log file: {log_file_path}")

2025-10-13 17:04:24 - INFO - Logging configured. Log file: .\exp_Template_pipeline.log


# Data Loading

In [15]:
df_data = pd.read_csv("/Users/aksha/FastAPI/Data/Anonymized_PB-full.csv") #c1avi

### Ensure Date column is named 'date'

In [16]:
df_data.rename(columns={"Date": "date"}, inplace=True) # Rename the data's date column to lowercase 'date'

### Main Config: 

#### CHANGE

#### Key Hierarchy and Target Variable

In [17]:
# EDIT THESE AS PER DATA
KEY_COLS = ['Channel','Chain','Depot','SubCat','SKU'] #Optional, can use predefined key col also
TARGET_COL = 'UnitsSold'

# DONT CHANGE THESE (DATE_COL and key_col)
DATE_COL = 'date'
key_col = 'key'

In [18]:
# DONT CHANGE THESE
DATE_COL = 'date'
key_col = 'key'

### - Creating key
### - Creating Product Mapping
### - Cleaning Duplicates
### - Adding Historical Range values for each key
### - Formatting dtypes for input dataframe

#### DONT CHANGE (IDEALLY)

In [19]:
# Nothing to change here as such
joiner = lambda x:'_'.join(map(str,x))
df_data[key_col]=df_data[KEY_COLS].apply(joiner,axis=1)
df_data['key'] = df_data['key'].astype(str)

product_mapping = df_data[['key'] + KEY_COLS].drop_duplicates().reset_index(drop=True)

df = df_data[[key_col, DATE_COL, TARGET_COL]].copy()

df.groupby([key_col, DATE_COL])[TARGET_COL].sum().reset_index()

df = add_hist_range(df, key_col=key_col, date_col=DATE_COL)

#df[DATE_COL] = pd.to_datetime(df[DATE_COL]) #avi3
df[DATE_COL] = pd.to_datetime(df[DATE_COL], format='%Y-%m-%d')  
df[key_col] = df[key_col].astype(str)
df[TARGET_COL] = pd.to_numeric(df[TARGET_COL], errors='coerce')

  df[DATE_COL] = pd.to_datetime(df[DATE_COL])


### Define key-wise Seasonality, each key must have either a 'Y' or 'N' value

#### CHANGE

In [None]:
# Assign random 'Y' or 'N' to each unique key  
np.random.seed(42)  # For reproducibility
key_to_seasonal = {k: np.random.choice(['Y', 'N']) for k in df['key'].unique()}
# Map to the dataframe
df['seasonal'] = df['key'].map(key_to_seasonal)

#### DONT CHANGE

In [None]:
# Setting Params for Forecast Function, DONT CHANGE THESE
parameters = {
    "date_col": DATE_COL,
    "target_col": TARGET_COL,
    "key_col": "key",
    "seasonal_col": "seasonal",
    "hist_range_col": "hist_range",
    # "output_dir": notebook_path
    }

# Pipeline Run

### CHANGE THE CUTOFFS HERE AND FORECAST HORIZON

In [None]:
import warnings
# logging.getLogger('prophet').setLevel(logging.WARNING)
# logging.getLogger('cmdstanpy').setLevel(logging.WARNING)
optuna.logging.set_verbosity(optuna.logging.WARNING)


# df_test = df[df['hist_range'] == '<6'].copy().reset_index(drop=True)
df_test = df[df['key'].isin(df['key'].sample(2))].copy().reset_index(drop=True)


results, detailed_results, feature_importance_df = forecast_pipeline_debug(df = df_test, 
                                parameters = parameters, 
                                validation_cutoff="2024-05-01", #Train Uptill (Included)
                                test_cutoff="2024-08-01", # Validation Cut-off (Included)
                                forecast_cutoff="2024-11-01", # Test Cut-off (Included)
                                forecasting_horizon=3 # Number of Months to forecast after forecast_cutoff
                )


# 2. Save the results from the notebook
logger.info("Pipeline finished. Saving output files...")
results.to_csv(os.path.join(notebook_path, f"{EXPERIMENT_NUMBER}_pipeline_output.csv"), index=False)
detailed_results.to_csv(os.path.join(notebook_path, f"{EXPERIMENT_NUMBER}_detailed_pipeline_output.csv"), index=False)
if not feature_importance_df.empty:
    feature_importance_df.to_excel(os.path.join(notebook_path, f"{EXPERIMENT_NUMBER}_feature_importances.xlsx"), index=True)

logger.info("===================================")
logger.info(">>> FORECASTING PIPELINE COMPLETE <<<")
logger.info("===================================")
logging.shutdown()

# Analysis Results as you want

In [None]:
results.groupby('date')[['actual_value', 
                         'fcst_best_raw_model_unadjusted', 
                         'fcst_best_raw_model_adjusted', 
                         'fcst_best_adj_model_adjusted'
                         ]].sum(min_count=1).plot()