In [105]:
import numpy as np
import pandas as pd
import random
import copy

sales_dir = 'processed_sales.csv'
cal_dir = 'calendar_week.csv'
events_dir = 'events.csv'
zscore_dir = 'Z_scores.csv'
dd_coeff_dir = 'Coefficients.csv'
prices_dir = 'prices.csv'

sales = pd.read_csv(sales_dir)
cal_week = pd.read_csv(cal_dir)
events = pd.read_csv(events_dir)
zscore_tmp= pd.read_csv(zscore_dir)
zscore_tmp = zscore_tmp.rename(columns={'Unnamed: 0': 'SKU'})
zscore_tmp = zscore_tmp.sort_values(by='SKU')
zscore = zscore_tmp[['SKU', 'Mean', 'Std_deviation']]
dd_coeff = pd.read_csv(dd_coeff_dir).drop(columns='Unnamed: 0')
dd_bias = zscore_tmp[['SKU', 'bias']]
prices = pd.read_csv(prices_dir)

all_skus = sorted(sales['SKU'].unique())

time_year = sales[['Time_ID', 'Year']].copy()
time_year = time_year.drop_duplicates()

## To comment out later parameters come from GA function
# sku_list = ['7_1_42365_22800', '88_6_99998_59504', '88_6_99998_59509','88_6_99998_59597', '7_1_42365_26400']
sku_list = all_skus

start = 1375
period = 8

## To comment later ga_output comes from GA function
ga_output = {
    'Discount': np.random.choice(np.arange(5, 55, 5)/100, len(sku_list)*period) ,  # Random discounts
    'Feature': np.random.choice([0, 1], len(sku_list)*period),  # Random features
    'Display': np.random.choice([0, 1], len(sku_list)*period) # Random displays
}
## To comment later ga_output comes from GA function
ga_output = {
    'Discount': [10] * len(sku_list)*period,
    'Feature': [1] * len(sku_list)*period,
    'Display': [1] * len(sku_list)*period
}
ga_output = pd.DataFrame(ga_output)

In [106]:
def fitness_demand(ga_output: pd.DataFrame, sku_list: list, start: float, period: int) -> float:
    global sales, cal_week, events, zscore, all_skus

    histr = start - 1
    end =  start + period

    sku_list = sorted(sku_list)
    idx_frame = [(SKU, Time_ID) for Time_ID in range(start, end) for SKU in sku_list]
    idx_frame = pd.DataFrame(idx_frame, columns=['SKU', 'Time_ID'])

    sales_hist = sales[(sales['SKU'].isin(sku_list)) & (sales['Time_ID']>=histr-period-5) & (sales['Time_ID']<=histr)].copy()

    ## Preapare GA dataframe
    ga_df = ga_output.copy()
    ga_df['Discount'] = 1 + ga_df['Discount']
    ga_df = pd.concat([idx_frame, ga_df], axis=1)
    ga_df_tmp = ga_df.copy()
    ga_df = pd.merge(ga_df, zscore, on=['SKU'], how='left')
    ga_df['z_disc'] = (ga_df['Discount'] - ga_df['Mean']) / ga_df['Std_deviation']
    # ga_df['z_disc'] = ga_df['z_disc'] * -1
    ga_df['z_disc'] = ga_df['z_disc']
    ga_df = ga_df[['SKU', 'Time_ID', 'z_disc', 'Feature', 'Display']]
    ga_df = ga_df.rename(columns={'z_disc': 'Discount'})

    ## Create Competitor Matrix
    comp_matrix_columns = [f'{sku}_{promo}' for sku in all_skus for promo in ['Discount', 'Display', 'Feature', 'Sales']]
    comp_matrix = pd.DataFrame(columns=comp_matrix_columns, index=range(len(sku_list)*period))
    comp_matrix = pd.concat([idx_frame, comp_matrix], axis=1)
    for sku in sku_list:
        for promo in ['Discount', 'Display', 'Feature']:
            neg = 1 #-1 if promo in ['Display', 'Feature'] else 1
            tmp = list(ga_df[ga_df['SKU']==sku][promo]) * period
            tmp = pd.DataFrame(tmp)
            comp_matrix[sku + "_" + promo] = tmp
            comp_matrix.loc[comp_matrix['SKU'] == sku, [sku + "_" + promo]] = 0
    comp_matrix.fillna(0, inplace=True)


    zscore_tmp_ = zscore[zscore['SKU'].isin(sku_list)]

    # comp_matrix.head()


    revenue = []

    ## Iterate through each week through the demand function
    ## Obtain sales prediction and feed back into historical sales for picking\

    for week in range(start, end):

        dd_coeff_val = dd_coeff[sku_list].values
        year = time_year[time_year['Time_ID']==week]['Year'].values[0]
        ga_tmp = ga_df[ga_df['Time_ID']==week].copy()
        for promo in ['Discount', 'Feature', 'Display']:
            merge = sales_hist[sales_hist['Time_ID']==week-1][['SKU', promo]].copy()
            merge = merge.rename(columns={promo: promo+"lag"})
            ga_tmp = pd.merge(ga_tmp, merge, on=['SKU'], how='left')

        # merge = sales_hist[sales_hist['Time_ID']==week-1][['SKU', 'Log_sls', 'Lag8w_avg_sls']].copy()
        merge = sales_hist[sales_hist['Time_ID']==week-1][['SKU', 'Sales', 'Lag8w_avg_sls']].copy()
        merge['Lag8w_avg_sls'] = merge['Lag8w_avg_sls']
        # merge = merge.rename(columns={'Log_sls': 'Saleslag', 'Lag8w_avg_sls': 'Sales_mov_avg'})
        merge = merge.rename(columns={'Sales': 'Saleslag', 'Lag8w_avg_sls': 'Sales_mov_avg'})
        ga_tmp = pd.merge(ga_tmp, merge, on=['SKU'], how='left')

        events_tmp = events[events['Time_ID']==week].drop(columns = 'Time_ID').copy()
        events_tmp = pd.concat([events_tmp]*len(sku_list), ignore_index=True)
        ga_tmp = pd.concat([ga_tmp, events_tmp], axis=1)

        comp_tmp = comp_matrix[comp_matrix['Time_ID']==week].drop(columns = 'Time_ID').copy()
        for sku in sku_list:
            tmp = sales_hist[(sales_hist['SKU']==sku) & (sales_hist['Time_ID']==week-1)]['Sales'].item()
            comp_tmp[sku+"_Sales"] = tmp
            comp_tmp.loc[comp_tmp['SKU'] == sku, [sku + "_Sales"]] = 0

        ga_tmp = pd.merge(ga_tmp, comp_tmp, on=['SKU'], how='left')

        ga_val = ga_tmp.drop(columns=['SKU', 'Time_ID']).values

        sales_output = np.diag(ga_val.dot(dd_coeff_val))


        """change"""
        bias_tmp = dd_bias[dd_bias['SKU'].isin(sku_list)]['bias'].values
        sales_output = sales_output + bias_tmp
        sales_output[sales_output < 0] = 0
        # sales_output = np.exp(abs(sales_output))
        """"""
        prices_tmp = prices[(prices['SKU'].isin(sku_list)) & (prices['Year']==year)]['med_price'].values

        discounts_tmp = 2 - (ga_tmp['Discount'].values * zscore_tmp_['Std_deviation'].values + zscore_tmp_['Mean'].values)
        other_costs_tmp = ga_tmp['Feature'].values * 5 + ga_tmp['Display'].values * 10

        revenue.append(sum(sales_output * prices_tmp * discounts_tmp - other_costs_tmp))
        # revenue.append(sum(sales_output * prices_tmp))

        ## Prep for historical insert
        prep_tmp = sales_hist[sales_hist['Time_ID']==week-1][['SKU', 'Lag7w_sum_sls']].copy()
        hist_prep = sales_hist[sales_hist['Time_ID']==week-7][['SKU', 'Sales']]
        hist_prep = hist_prep.rename(columns={'Sales': 'Lag7w_sls'})
        hist_prep = pd.merge(hist_prep, prep_tmp, on=['SKU'], how='left')
        hist_prep = hist_prep.drop(columns=['SKU'])

        ## Build historical insert
        hist_insert = ga_tmp[['SKU', 'Time_ID', 'Discount', 'Display', 'Feature']]
        hist_insert['Year'] = year
        hist_insert['Sales'] = sales_output
        hist_insert['Log_sls'] =  -np.log(hist_insert['Sales'])  ## NOT USED ANYMORE
        hist_insert = pd.concat([hist_insert, hist_prep], axis=1)
        hist_insert['Lag8w_avg_sls'] = ( hist_insert['Lag7w_sum_sls'] + hist_insert['Sales'] ) / 8
        hist_insert['Lag7w_sum_sls_upd'] = hist_insert['Lag7w_sum_sls'] - hist_insert['Lag7w_sls'] + hist_insert['Sales']
        hist_insert = hist_insert[['SKU', 'Time_ID', 'Year', 'Sales', 'Discount', 'Display', 'Feature', 'Log_sls', 'Lag8w_avg_sls', 'Lag7w_sum_sls_upd']]
        hist_insert = hist_insert.rename(columns={'Lag7w_sum_sls_upd': 'Lag7w_sum_sls'})
        hist_insert.fillna(0, inplace=True)

        ## Insert results into historical
        sales_hist = pd.concat([sales_hist, hist_insert], ignore_index=True)

    # print(sales_output)

    return sum(revenue)

#### TESTING FUNCITON

In [107]:
revenue = fitness_demand(ga_output, sku_list, start, period)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hist_insert['Year'] = year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hist_insert['Sales'] = sales_output
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hist_insert['Log_sls'] =  -np.log(hist_insert['Sales'])  ## NOT USED ANYMORE
A value is trying to be set on a copy of a slice from a DataFrame

In [108]:
revenue

86798.51949604199

In [None]:
85544.40886746236