In [7]:
import numpy as np
import pandas as pd
import random
import copy

sales_dir = '../assets/processed_sales.csv'
cal_dir = '../assets/calendar_week.csv'
events_dir = '../assets/events.csv'
zscore_dir = '../assets/Z_scores.csv'
dd_coeff_dir = '../assets/Coefficients.csv'
prices_dir = '../assets/prices.csv'

### Historical Analysis

In [8]:
def fitness_demand_sensitive(ga_output: pd.DataFrame, sku_list: list, start: float, period: int) -> float:
    global sales, cal_week, events, zscore, all_skus

    histr = start - 1
    end =  start + period

    idx_frame = [(SKU, Time_ID) for SKU in sku_list for Time_ID in range(start, end)]
    idx_frame = pd.DataFrame(idx_frame, columns=['SKU', 'Time_ID'])

    sales_hist = sales[(sales['SKU'].isin(sku_list)) & (sales['Time_ID']>=histr-period-5) & (sales['Time_ID']<=histr)].copy()

    ## Preapare GA dataframe
    ga_df = ga_output.copy()

    ## Create Competitor Matrix
    comp_matrix_columns = [f'{sku}_{promo}' for sku in all_skus for promo in ['Discount', 'Display', 'Feature', 'Sales']]
    comp_matrix = pd.DataFrame(columns=comp_matrix_columns, index=range(len(sku_list)*period))
    comp_matrix = pd.concat([idx_frame, comp_matrix], axis=1)
    for sku in sku_list:
        for promo in ['Discount', 'Display', 'Feature']:
            neg = 1 #-1 if promo in ['Display', 'Feature'] else 1
            tmp = list(ga_df[ga_df['SKU']==sku][promo] * neg) * period
            tmp = pd.DataFrame(tmp)
            comp_matrix[sku + "_" + promo] = tmp
            comp_matrix.loc[comp_matrix['SKU'] == sku, [sku + "_" + promo]] = 0
    comp_matrix.fillna(0, inplace=True)

    # comp_matrix.head()


    revenue = []

    ## Iterate through each week through the demand function
    ## Obtain sales prediction and feed back into historical sales for picking\
    for week in range(start, end):

        dd_coeff_val = dd_coeff[sku_list].values
        year = time_year[time_year["Time_ID"] == week]["Year"].values[0]
        ga_tmp = ga_df[ga_df["Time_ID"] == week].copy()
        for promo in ["Discount", "Feature", "Display"]:
            merge = sales_hist[sales_hist["Time_ID"] == week - 1][
                ["SKU", promo]
            ].copy()
            merge = merge.rename(columns={promo: promo + "lag"})
            ga_tmp = pd.merge(ga_tmp, merge, on=["SKU"], how="left")

        ### CHANGE Log_sls to Sales
        merge = sales_hist[sales_hist["Time_ID"] == week - 1][
            ["SKU", "Sales", "Lag8w_avg_sls"]
        ].copy()
        merge = merge.rename(
            columns={"Sales": "Saleslag", "Lag8w_avg_sls": "Sales_mov_avg"}
        )
        ga_tmp = pd.merge(ga_tmp, merge, on=["SKU"], how="left")

        events_tmp = (
            events[events["Time_ID"] == week]
            .drop(columns="Time_ID")
            .copy()
        )
        events_tmp = pd.concat([events_tmp] * len(sku_list), ignore_index=True)
        ga_tmp = pd.concat([ga_tmp, events_tmp], axis=1)

        comp_tmp = (
            comp_matrix[comp_matrix["Time_ID"] == week]
            .drop(columns="Time_ID")
            .copy()
        )
        for sku in sku_list:
            tmp = sales_hist[
                (sales_hist["SKU"] == sku) & (sales_hist["Time_ID"] == week - 1)
            ]["Sales"].item()
            comp_tmp[sku + "_Sales"] = tmp
            comp_tmp.loc[comp_tmp["SKU"] == sku, [sku + "_Sales"]] = 0

        ga_tmp = pd.merge(ga_tmp, comp_tmp, on=["SKU"], how="left")

        ga_val = ga_tmp.drop(columns=["SKU", "Time_ID"]).values

        sales_output = np.diag(ga_val.dot(dd_coeff_val))

        ### ADD MODEL BIAS
        bias_tmp = dd_bias[dd_bias["SKU"].isin(sku_list)]["bias"].values
        sales_output = sales_output + bias_tmp
        sales_output[sales_output < 0] = 0

        prices_tmp = prices[
            (prices["SKU"].isin(sku_list)) & (prices["Year"] == year)
        ]["med_price"].values

        discounts_tmp = 2 - (ga_tmp['Discount'].values * zscore_tmp['Std_deviation'].values + zscore_tmp['Mean'].values)
        other_costs_tmp = ga_tmp['Feature'].values * 5 + ga_tmp['Display'].values * 10

        revenue.append(sum(sales_output * prices_tmp * discounts_tmp - other_costs_tmp))

        ## Prep for historical insert
        prep_tmp = sales_hist[sales_hist["Time_ID"] == week - 1][
            ["SKU", "Lag7w_sum_sls"]
        ].copy()
        hist_prep = sales_hist[sales_hist["Time_ID"] == week - 7][["SKU", "Sales"]]
        hist_prep = hist_prep.rename(columns={"Sales": "Lag7w_sls"})
        hist_prep = pd.merge(hist_prep, prep_tmp, on=["SKU"], how="left")
        hist_prep = hist_prep.drop(columns=["SKU"])

        ## Build historical insert
        hist_insert = ga_tmp[["SKU", "Time_ID", "Discount", "Display", "Feature"]]
        hist_insert["Year"] = year
        hist_insert["Sales"] = sales_output
        # hist_insert["Log_sls"] = -np.log(hist_insert["Sales"]) ## NOT USED ANYMORE
        hist_insert = pd.concat([hist_insert, hist_prep], axis=1)
        hist_insert["Lag8w_avg_sls"] = (
            hist_insert["Lag7w_sum_sls"] + hist_insert["Sales"]
        ) / 8
        hist_insert["Lag7w_sum_sls_upd"] = (
            hist_insert["Lag7w_sum_sls"]
            - hist_insert["Lag7w_sls"]
            + hist_insert["Sales"]
        )
        hist_insert = hist_insert[
            [
                "SKU",
                "Time_ID",
                "Year",
                "Sales",
                "Discount",
                "Display",
                "Feature",
                # "Log_sls",  ## NOT USED ANYMORE
                "Lag8w_avg_sls",
                "Lag7w_sum_sls_upd",
            ]
        ]
        hist_insert = hist_insert.rename(
            columns={"Lag7w_sum_sls_upd": "Lag7w_sum_sls"}
        )
        hist_insert.fillna(0, inplace=True)

        ## Insert results into historical
        sales_hist = pd.concat([sales_hist, hist_insert], ignore_index=True)

        # print(sales_output)

    return sum(revenue)

##### Forecasted Actual Revenue

In [9]:
import warnings
warnings.filterwarnings('ignore')

sales = pd.read_csv(sales_dir)
cal_week = pd.read_csv(cal_dir)
events = pd.read_csv(events_dir)
zscore = pd.read_csv(zscore_dir)
dd_coeff = pd.read_csv(dd_coeff_dir).drop(columns='Unnamed: 0')
prices = pd.read_csv(prices_dir)

all_skus = sorted(sales['SKU'].unique())

time_year = sales[['Time_ID', 'Year']].copy()
time_year = time_year.drop_duplicates()
zscore_tmp= pd.read_csv(zscore_dir)
zscore_tmp = zscore_tmp.rename(columns={'Unnamed: 0': 'SKU'})
zscore_tmp = zscore_tmp.sort_values(by='SKU')
zscore = zscore_tmp[['SKU', 'Mean', 'Std_deviation']]
dd_bias = zscore_tmp[['SKU', 'bias']]

sku_list = all_skus
start = 1376
period = 8


df_full = pd.read_csv("../assets/processed_sales.csv")
 
df_filtered = df_full[(df_full['Time_ID'] >= start) & (df_full['Time_ID'] <= start + period)]
df_filtered = df_filtered[df_filtered['SKU'].isin(sku_list)]
df_filtered = df_filtered.sort_values(by = ['Time_ID', 'SKU'], ascending = [True, True])
df_filtered = df_filtered[['SKU', 'Time_ID', 'Discount', 'Feature', 'Display']]

revenue = fitness_demand_sensitive(df_filtered, sku_list, start, period)
cost = df_filtered[['Display', 'Feature']].sum().sum() * 20

print(f'Forecast Actual: {revenue}')

Forecast Actual: 80149.33991616753


##### Actual Revenue

In [11]:
# Selecct data within period
df_actual = df_full[(df_full['Time_ID'] >= start) & (df_full['Time_ID'] <= start + period -1)]
df_actual = df_actual[df_actual['SKU'].isin(sku_list)]

# Get price from processed data
df_price = pd.read_csv("../assets/combined_milk_final.csv")
df_price = df_price[df_price['Store_ID'] == 236117]
prices_filtered = df_price[['Time_ID','SKU', 'Sales', 'Price']]
df_actual = df_actual[['SKU', 'Time_ID', 'Discount', 'Feature', 'Display']]
df_actual = df_actual.merge(prices_filtered, on = ['Time_ID', 'SKU'])
df_actual = df_actual.merge(zscore, on = 'SKU')
df_actual['Discount_updated'] = df_actual['Discount'] * df_actual['Std_deviation'] + df_actual['Mean'] - 1

df_actual['Revenue'] = df_actual['Sales']*df_actual['Price']

# Calculate promotion metrics
avg_weekly_feature = df_actual.groupby(['Time_ID'])['Feature'].sum().mean()
avg_weekly_display = df_actual.groupby(['Time_ID'])['Display'].sum().mean()

avg_weekly_discount = df_actual[df_actual['Discount_updated'] > 0].groupby('Time_ID').size().mean()
avg_discount_values = df_actual[df_actual['Discount_updated'] > 0].groupby('Time_ID')['Discount'].mean().mean()

revenue = sum(df_actual['Revenue'])

print(f'Actual Revenue: {revenue}')
print(f"Average Weekly Features: {avg_weekly_feature}")
print(f"Average Weekly Display: {avg_weekly_display}")
print(f"Average Weekly Discount: {avg_weekly_discount}")
print(f"Average Weekly Discount Amount: {avg_discount_values}")

Actual Revenue: 80213.26306342347
Average Weekly Features: 4.875
Average Weekly Display: 0.0
Average Weekly Discount: 18.75
Average Weekly Discount Amount: 0.35073869212754794
