In [None]:
# ──────────────────────────────────────────────────────────────────────────────
# 0.  Imports & basic config
# ──────────────────────────────────────────────────────────────────────────────
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import ipywidgets as widgets

# Handy helpers
def dim(n):          # embedding rule-of-thumb
    return min(100, n // 2 + 5)

# -----------------------------------------------------------------------------    
# 1.  Load the saved model
# -----------------------------------------------------------------------------    
MODEL_PATH   = r" ... Sales_Forecasting_LSTM_Model.h5"
model        = load_model(MODEL_PATH, compile=False)      # compile=False → faster

# -----------------------------------------------------------------------------
# 2.  Load raw data & recreate preprocessing objects
# -----------------------------------------------------------------------------
CSV_PATH     = r" ... csv"
df           = pd.read_csv(CSV_PATH)

# ───── column taxonomy (same names as at training) ───────────────────────────
time_varying_categorical = ['Rain?','Name','Puasa','Public Holiday','Day','Month']
static_categorical       = ['Store_No','State','CODE (subcluster 1)',
                            'CODE FY26 1 (subcluster 2)',
                            'CODE FY26 2 (subcluster 3)']
categorical_cols         = time_varying_categorical + static_categorical
numeric_cols             = ['Net_Amount','TC','Days_after_Opening',
                            'Average Daily Temperature (°C)']

# ───── tidy up NA / space fillers exactly as before ──────────────────────────
df['CODE (subcluster 1)'] = df['CODE (subcluster 1)'].fillna('blank').replace('', 'blank')
df['Name']               = df['Name'].fillna('no PH').replace('', 'no PH')
df['Puasa']              = df['Puasa'].fillna(0).replace('', 0)
df['Public Holiday']     = df['Public Holiday'].fillna(0).replace('', 0)
df['Rain?']              = df['Rain?'].map({'Yes': 1, ' No': 0})        # => 0/1

# ───── label encoders (refitted on full dataframe) ───────────────────────────
encoders = {}
embed_cols = []
for col in categorical_cols:
    n_unique = df[col].nunique()
    if col in static_categorical or n_unique >= 7:
        embed_cols.append(col)

for col in embed_cols:
    le = LabelEncoder()

    if col == 'Month':
        le.fit([
            'January', 'February', 'March', 'April', 'May', 'June',
            'July', 'August', 'September', 'October', 'November', 'December'
        ])
    elif col == 'Day':
        le.fit([
            'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'
        ])
    else:
        le.fit(df[col])

    df[col + '_enc'] = le.transform(df[col])
    encoders[col] = le

# binary cols already 0/1, remaining small-cardinality ‘one-hot’ cols are not
# needed by our model (they were simply left as numeric 0/1).

# ───── numeric scaling (refit on full dataframe) ─────────────────────────────
scaler = MinMaxScaler()
df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

# -----------------------------------------------------------------------------
# 3.  Shared variables & convenience functions
# -----------------------------------------------------------------------------
# Columns used inside the sequential numeric tensor given to the LSTM
time_numeric_cols = [
    'Net_Amount','TC','Days_after_Opening','Average Daily Temperature (°C)',
    'Rain?','Puasa','Public Holiday'
]
window  = 14                                    # must match training
static_cols = [c + '_enc' for c in static_categorical if c in embed_cols]

# Inverse-scale helpers (two versions – by *index* & by *name*)
def inverse_scale_idx(scaled_vec, col_idx):
    """Undo MinMax scaling for a numeric_cols column by integer index."""
    tmp = np.zeros((len(scaled_vec), len(numeric_cols)))
    tmp[:, col_idx] = scaled_vec
    return scaler.inverse_transform(tmp)[:, col_idx]

def inverse_scale_name(scaled_vec, col_name):
    """Undo MinMax scaling by original column name (convenience)."""
    return inverse_scale_idx(scaled_vec, numeric_cols.index(col_name))

# Build the dict of inputs expected by Keras functional model
def build_input_dict(seq_num, seq_name, seq_day, seq_month, static_vec):
    inp = {
        'num_in'      : seq_num[np.newaxis].astype(np.float32),
        'name_seq_in' : seq_name[np.newaxis].astype(np.int32),
        'day_seq_in'  : seq_day[np.newaxis].astype(np.int32),
        'month_seq_in': seq_month[np.newaxis].astype(np.int32),
    }
    for i, base in enumerate([c.replace('_enc','') for c in static_cols]):
        inp[f'{base}_in'] = static_vec[[i]].reshape(1,1).astype(np.int32)
    return inp

# -----------------------------------------------------------------------------
# 4-A.  ***GLOBAL*** 200-step autoregressive forecast
# -----------------------------------------------------------------------------
future_horizon = 200
df['Date']     = pd.to_datetime(df['Date'])          # ensure datetime
df = df.sort_values('Date')

tail_df   = df.tail(window).copy()
last_date = tail_df['Date'].max()

seq_num   = tail_df[time_numeric_cols].values.astype(np.float32)
seq_name  = tail_df['Name_enc'].values.astype(np.int32)
seq_day   = tail_df['Day_enc'].values.astype(np.int32)
seq_month = tail_df['Month_enc'].values.astype(np.int32)
static_vec= tail_df.iloc[-1][static_cols].values.astype(np.int32)

dates_f, net_f, tc_f = [], [], []
for step in range(future_horizon):
    yhat = model.predict(build_input_dict(seq_num, seq_name,
                                          seq_day, seq_month,
                                          static_vec), verbose=0)[0]
    net_s, tc_s = yhat
    net_v = inverse_scale_name([net_s], 'Net_Amount')[0]
    tc_v  = inverse_scale_name([tc_s],  'TC')[0]

    curr_date = last_date + pd.Timedelta(days=step+1)
    dates_f.append(curr_date); net_f.append(net_v); tc_f.append(tc_v)

    # roll the categorical day/month encodings one step forward
    seq_day   = np.roll(seq_day,   -1)
    seq_month = np.roll(seq_month, -1)
    seq_day[-1]   = encoders['Day'].transform([curr_date.day_name()])[0]
    seq_month[-1] = encoders['Month'].transform([ curr_date.month_name() ])[0]

    # roll numeric tensor (overwrite Net & TC with *scaled* predictions)
    new_row = seq_num[-1].copy()
    j_net, j_tc = numeric_cols.index('Net_Amount'), numeric_cols.index('TC')
    new_row[[j_net, j_tc]] = [net_s, tc_s]
    seq_num = np.vstack([seq_num[1:], new_row])

df_forecast_global = pd.DataFrame({"Date": dates_f,
                                   "Net_Amount": net_f,
                                   "TC": tc_f})
print("Global 200-day forecast ready → df_forecast_global")

# -----------------------------------------------------------------------------
# 4-B.  Per-store 21-day forecasts  (interactive Plotly)
# -----------------------------------------------------------------------------
future_horizon = 21
all_forecasts  = []

for store_id, one in df.groupby('Store_No', sort=True):
    if len(one) < window:        # skip tiny history
        continue

    one = one.sort_values('Date')
    tail_df   = one.tail(window).copy()
    last_date = tail_df['Date'].max()

    seq_num   = tail_df[time_numeric_cols].values.astype(np.float32)
    seq_name  = tail_df['Name_enc'].values.astype(np.int32)
    seq_day   = tail_df['Day_enc'].values.astype(np.int32)
    seq_month = tail_df['Month_enc'].values.astype(np.int32)
    static_vec= tail_df.iloc[-1][static_cols].values.astype(np.int32)

    dates_f, net_f, tc_f = [], [], []
    for step in range(future_horizon):
        yhat = model.predict(build_input_dict(seq_num, seq_name,
                                              seq_day, seq_month,
                                              static_vec), verbose=0)[0]
        net_s, tc_s = yhat
        net_v = inverse_scale_name([net_s], 'Net_Amount')[0]
        tc_v  = inverse_scale_name([tc_s],  'TC')[0]

        curr_date = last_date + pd.Timedelta(days=step+1)
        dates_f.append(curr_date); net_f.append(net_v); tc_f.append(tc_v)

        # roll forward one time‐step
        seq_day   = np.roll(seq_day,   -1)
        seq_month = np.roll(seq_month, -1)

        # update with the new weekday and **month name** (not number)
        seq_day[-1]   = encoders['Day'].transform([curr_date.day_name()])[0]
        seq_month[-1] = encoders['Month'].transform([curr_date.month_name()])[0]


        new_row            = seq_num[-1].copy()
        new_row[j_net]     = net_s
        new_row[j_tc]      = tc_s
        seq_num            = np.vstack([seq_num[1:], new_row])

    all_forecasts.append(pd.DataFrame({
        "Date": dates_f, "Net_Amount": net_f, "TC": tc_f,
        "Store_No": store_id
    }))

df_forecast_store = pd.concat(all_forecasts, ignore_index=True)
print("Per-store 21-day forecasts ready → df_forecast_store")

# -----------------------------------------------------------------------------
# 5.  OPTIONAL: compare against actuals CSV & show interactive widget
# -----------------------------------------------------------------------------
# – skip this block if you **only** want future forecasts –
ACTUALS_CSV = r" ... csv"
df_actual   = (pd.read_csv(ACTUALS_CSV)
                 .rename(columns={'Store No':'Store_No',
                                  'Net Amount':'Net_Amount'})
                 .assign(Date=lambda d: pd.to_datetime(d['Date']),
                         Net_Amount=lambda d: -d['Net_Amount'])   # your data was negative
                 .sort_values(['Store_No','Date']))

def make_fig(store_id):
    act = df_actual.query("Store_No == @store_id")
    fc  = df_forecast_store.query("Store_No == @store_id")

    fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
                        subplot_titles=("Net_Amount", "TC"),
                        vertical_spacing=0.08)

    # Net_Amount
    fig.add_trace(go.Scatter(x=fc['Date'], y=fc['Net_Amount'],
                             mode='lines+markers', name='Predicted Net',
                             line=dict(color='orange')), row=1,col=1)
    fig.add_trace(go.Scatter(x=act['Date'], y=act['Net_Amount'],
                             mode='lines+markers', name='True Net',
                             line=dict(color='blue')), row=1,col=1)

    # TC
    fig.add_trace(go.Scatter(x=fc['Date'], y=fc['TC'],
                             mode='lines+markers', name='Predicted TC',
                             line=dict(color='orange'), showlegend=False),
                  row=2,col=1)
    fig.add_trace(go.Scatter(x=act['Date'], y=act['TC'],
                             mode='lines+markers', name='True TC',
                             line=dict(color='blue'), showlegend=False),
                  row=2,col=1)

    fig.update_layout(height=600,width=900,
                      hovermode='x unified',
                      title=f"Store {store_id}: 21-Day Forecast vs Actual")
    fig.update_xaxes(title_text="Date", row=2,col=1)
    fig.update_yaxes(title_text="RM", row=1,col=1)
    fig.update_yaxes(title_text="Transactions", row=2,col=1)
    fig.show()

store_widget = widgets.Dropdown(options=sorted(df_forecast_store['Store_No'].unique()),
                                description='Select Store_No:',
                                style={'description_width':'initial'})
out = widgets.interactive_output(make_fig, {'store_id': store_widget})
display(store_widget, out)

# -----------------------------------------------------------------------------
# 6. 95 % CIs, as in step 12
# -----------------------------------------------------------------------------
# Merge predictions & actuals on dates you already have both to estimate σ
df_merged = (df_actual.rename(columns={'Net_Amount':'Net_Amount_act',
                                       'TC':'TC_act'})
                      .merge(df_forecast_store.rename(columns={'Net_Amount':'Net_Amount_pred',
                                                                'TC':'TC_pred'}),
                             on=['Store_No','Date'], how='inner'))
sigma_net = np.std(df_merged['Net_Amount_act'] - df_merged['Net_Amount_pred'], ddof=1)
sigma_tc  = np.std(df_merged['TC_act']         - df_merged['TC_pred'], ddof=1)
ci_half_net = 1.96 * sigma_net
ci_half_tc  = 1.96 * sigma_tc
print(f"Global 95 % CI half-widths → Net={ci_half_net:.0f}  |  TC={ci_half_tc:.1f}")

df_all = pd.concat(all_forecasts, ignore_index=True)
# Interactive figure:  "True"  (blue)  vs  "Predicted"  (orange)
import numpy as np   # add at top of your file

def make_fig(store_id):
    # just grab your preds & actuals for the display
    act = df_actual.query("Store_No == @store_id")
    fc  = df_all   .query("Store_No == @store_id")

    fig = make_subplots(
        rows=2, cols=1, shared_xaxes=True,
        subplot_titles=("Net_Amount", "TC"),
        vertical_spacing=0.08
    )

    # --- Net_Amount with *constant* CI ---
    fig.add_trace(
        go.Scatter(
            x=fc['Date'], y=fc['Net_Amount'],
            mode='lines+markers',
            name='Predicted Net',
            line=dict(color='orange'),
            error_y=dict(
                type='data',
                array=[ci_half_net]*len(fc),  # constant half-width
                visible=True,
                thickness=1.5,
                width=4,
            )
        ),
        row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=act['Date'], y=act['Net_Amount'],
            mode='lines+markers',
            name='True Net',
            line=dict(color='blue'),
            marker=dict(color='blue'),
        ),
        row=1, col=1
    )

    # --- TC with *constant* CI ---
    fig.add_trace(
        go.Scatter(
            x=fc['Date'], y=fc['TC'],
            mode='lines+markers',
            name='Predicted TC',
            line=dict(color='orange'),
            showlegend=False,
            error_y=dict(
                type='data',
                array=[ci_half_tc]*len(fc),
                visible=True,
                thickness=1.5,
                width=4,
            )
        ),
        row=2, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=act['Date'], y=act['TC'],
            mode='lines+markers',
            name='True TC',
            line=dict(color='blue'),
            marker=dict(color='blue'),
            showlegend=False,
        ),
        row=2, col=1
    )

    # layout tweaks
    fig.update_layout(
        height=600, width=900,
        hovermode='x unified',
        title=f"Store {store_id}: True vs Predicted (21-Day Horizon)",
        legend_title_text=''
    )
    fig.update_xaxes(title_text="Date", row=2, col=1)
    fig.update_yaxes(title_text="RM",           row=1, col=1)
    fig.update_yaxes(title_text="Transactions", row=2, col=1)

    fig.show()

# Widget – same as before
store_widget = widgets.Dropdown(
    options=sorted(df_all['Store_No'].unique()),
    description='Select Store_No:',
    style={'description_width': 'initial'}
)
out = widgets.interactive_output(make_fig, {'store_id': store_widget})
display(store_widget, out)