use newpycaret environment

In [2]:
# Packages
import pandas as pd
import numpy as np
import snowflake.connector
import os
from sklearn.impute import SimpleImputer
from neuralprophet import NeuralProphet, set_log_level
from datetime import datetime

2. Load Data
_______________________________________________________________

### To Do: copy and paste in to a new chunk, enter credentials and run to save in environment. Then delete chunk
%env snowflakeuser=<your_snowflake_username> <br>
%env snowflakepass=<your_snowflake_password>

In [4]:
# Query Snowflake

# Snowflake connection parameters
connection_params = {
    "user": os.environ['snowflakeuser'],
    "password": os.environ['snowflakepass'],
    "account": "zib52348.us-east-1",
    "role": "ACCOUNTADMIN",
    "warehouse": "REPORTING",
    "database": "ANALYTICS",
    "schema": "FORECASTING",
}

# Establish a connection to Snowflake
conn = snowflake.connector.connect(**connection_params)

with open('net_sales_query.sql', 'r') as query:
    # connection == the connection to your database, in your case prob_db
    df_in = pd.read_sql_query(query.read(),conn)

# Close the connection
conn.close()

  df_in = pd.read_sql_query(query.read(),conn)



In [5]:
### 1. Data prep ###

df = df_in

# convert month field to date
df["MONTH"] = pd.to_datetime(df["MONTH"])

# ***** ADJUSTABLE ***** #
# current params - want to get forecasts for 2023 (Jan - Dec) so holding out data 
df = df[df['MONTH'] <= pd.Timestamp(2022,12,1)]

# Some random months will have data that we want to remove (* Want to test without July though)
df = df[["DEP_ENT", "MONTH", "NET_SALES"]] # select fields of interest
df = df.sort_values(['DEP_ENT', 'MONTH']) # reorder dataframe


### 2. Spot checks ###

# check data types
print(df.dtypes)
print("-----")

# check the unique time_series. 14 different department-entity combinations
print(df['DEP_ENT'].nunique())
print("-----")

# check how many months for each dep-ent. 3 dep-ent do not have all months of data
print(df.groupby(['DEP_ENT']).size())
print("-----")

# check for nulls. No nulls
print(df.isnull().sum())

DEP_ENT              object
MONTH        datetime64[ns]
NET_SALES           float64
dtype: object
-----
14
-----
DEP_ENT
160_155    84
170_155    84
200_155    84
200_310    84
210_155    84
210_165    84
210_310    84
220_155    84
220_310    78
240_155    84
250_155    84
250_165    50
250_310    84
260_155    47
dtype: int64
-----
DEP_ENT      0
MONTH        0
NET_SALES    0
dtype: int64


# Multiple Series with Neural Prophet
https://neuralprophet.com/tutorials/tutorial09.html

In [7]:
### Additional data prep ###

# create series
df_s = df.set_index(['DEP_ENT','MONTH'])['NET_SALES']
# convert back to dataframe
df_d = df_s.to_frame()
# impute
mean_imputer = SimpleImputer(strategy='mean')
df_d['NET_SALES'] = mean_imputer.fit_transform(df_d['NET_SALES'].values.reshape(-1,1))
#reset index
df_d.reset_index(inplace=True)
# rename cols
df_d.rename(columns={'MONTH': 'ds', 'NET_SALES': 'y', 'DEP_ENT': 'ID'}, inplace=True)

In [10]:
# Disable logging messages unless there is an error
set_log_level("ERROR")

# Declare number of forecasting periods ahead
fh = 12

# confidence intervals
confidence_level = 0.9
boundaries = round((1 - confidence_level) / 2, 2)
# NeuralProphet only accepts quantiles value in between 0 and 1
quantiles = [boundaries, confidence_level + boundaries]

# Initialize a dictionary to store individual models and forecasts
id_models = {}
id_forecasts = {}

# Group data by ID
grouped = df_d.groupby('ID')

# Loop through each group (ID) and create models and forecasts
for group_name, group_data in grouped:
    group_data = group_data.rename(columns={'ds': 'ds', 'y': 'y'})
    
    # Initialize and train the model
    model = NeuralProphet(
        quantiles=quantiles,
    )
    model.fit(group_data, freq="MS")
    
    # Store the model for the current ID
    id_models[group_name] = model
    
    # Create a dataframe with future dates for prediction
    future = model.make_future_dataframe(group_data, periods=fh, n_historic_predictions=len(group_data))
    
    # Make predictions for the next 12 periods
    forecast = model.predict(future)
    
    # Store the forecast for the current ID
    id_forecasts[group_name] = forecast

Missing logger folder: c:\Users\crudek\Github\practical\python_fcast\lightning_logs


Finding best initial lr:   0%|          | 0/206 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")



Predicting: 1it [00:00, ?it/s]

Finding best initial lr:   0%|          | 0/206 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]

Predicting: 6it [00:00, ?it/s]

Finding best initial lr:   0%|          | 0/206 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]

Predicting: 6it [00:00, ?it/s]

Finding best initial lr:   0%|          | 0/206 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]

Predicting: 6it [00:00, ?it/s]

Finding best initial lr:   0%|          | 0/206 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]

Predicting: 6it [00:00, ?it/s]

Finding best initial lr:   0%|          | 0/206 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]

Predicting: 6it [00:00, ?it/s]

Finding best initial lr:   0%|          | 0/206 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]

Predicting: 6it [00:00, ?it/s]

Finding best initial lr:   0%|          | 0/206 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]

Predicting: 6it [00:00, ?it/s]

Finding best initial lr:   0%|          | 0/206 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]

Predicting: 5it [00:00, ?it/s]

Finding best initial lr:   0%|          | 0/206 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]

Predicting: 6it [00:00, ?it/s]

Finding best initial lr:   0%|          | 0/206 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]

Predicting: 6it [00:00, ?it/s]

Finding best initial lr:   0%|          | 0/204 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]

Predicting: 4it [00:00, ?it/s]

Finding best initial lr:   0%|          | 0/206 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]

Predicting: 6it [00:00, ?it/s]

Finding best initial lr:   0%|          | 0/204 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]

Predicting: 3it [00:00, ?it/s]

In [9]:
# Initialize a dataframe to store final predictions
all_np_preds = pd.DataFrame()

# Loop through each group (ID) and get final pred
for group_name, group_data in grouped:
    
    # create temp table with fcasts for id (dep-ent)
    temp_fcast = id_forecasts[group_name]
    
    # filter to just the predictions in the future
    temp_fcast = temp_fcast.tail(fh)[["ds", "yhat1", "yhat1 5.0%", "yhat1 95.0%"]]
    
    # add id (dep-ent)
    temp_fcast['ID'] = group_name

    # create master list of predictions    
    all_np_preds = pd.concat([all_np_preds, temp_fcast])

In [53]:
#initial sales pull
sales = df_in
# rename cols
sales.rename(columns={'MONTH': 'ds', 'NET_SALES': 'y', 'DEP_ENT': 'ID'}, inplace=True)
# select cols
sales = sales[["ID", "y", "ds"]]
# only include sales data for the full months we have
first_of_month = datetime.today().replace(day=1).date()
sales = sales[sales['ds'] < pd.to_datetime(first_of_month)]

# combine prediction data and original sales data
merged = pd.merge(all_np_preds, sales, how = 'left', on =['ID', 'ds'])

In [63]:
### Budget ###

# Establish a connection to Snowflake
conn = snowflake.connector.connect(**connection_params)

with open('budget_query.sql', 'r') as query:
    # connection == the connection to your database, in your case prob_db
    budg = pd.read_sql_query(query.read(),conn)

# Close the connection
conn.close()

# convert month to datetime
budg["MONTH"] = pd.to_datetime(budg["MONTH"])
# select cols
budg = budg[["MONTH", "BUDGET_AMOUNT", "DEP_ENT"]]
# rename cols
budg.rename(columns={'MONTH': 'ds', 'DEP_ENT': 'ID'}, inplace=True)

  budg = pd.read_sql_query(query.read(),conn)



In [64]:
# combine prediction/sales data with budget data
merged2 = pd.merge(merged, budg, how = 'left', on =['ID', 'ds'])

In [65]:
# write csv
merged2.to_csv('neuro_prophet_results.csv')