In [1]:
import pandas as pd
import numpy as np
import datetime
from calendar import monthrange
from functools import reduce
# import reconcilation
import warnings
warnings.filterwarnings('ignore')

# Input

In [2]:
PRODUCT = pd.read_csv('data/DPS_PRODUCT.csv')
LOCATION = pd.read_csv('data/DPS_LOCATION.csv')
CUSTOMER = pd.read_csv('data/DPS_CUSTOMER.csv')
DISTR_CHANNEL = pd.read_csv('data/DPS_DISTR_CHANNEL.csv')
PROMO = pd.read_csv('data/DPS_PROMO.csv')
PROMO_TYPE = pd.read_csv('data/DPS_PROMO_TYPE.csv')
PRICE = pd.read_csv('data/DPS_PRICE.csv')
STOCK = pd.read_csv('data/DPS_STOCK.csv')

PRODUCT.columns = PRODUCT.columns.str.lower()
LOCATION.columns = LOCATION.columns.str.lower()
CUSTOMER.columns = CUSTOMER.columns.str.lower()
DISTR_CHANNEL.columns = DISTR_CHANNEL.columns.str.lower()
PROMO.columns  = PROMO.columns.str.lower()
PROMO_TYPE.columns  = PROMO_TYPE.columns.str.lower()
PRICE.columns  = PRICE.columns.str.lower()
STOCK.columns  = STOCK.columns.str.lower()

In [3]:
DISTR_CHANNEL.head()

Unnamed: 0,distr_channel_lvl_id1,distr_channel_lvl_nm1,distr_channel_lvl_desc1,distr_channel_id,distr_channel_nm,distr_channel_desc,open_dttm,close_dttm,modified_dttm,delete_flg
0,2,nm 2,desc 2,1,nm 1,desc 1,01JAN1960:00:00:01,,01JUL2021:14:31:06,0


# Config_Parameters

In [4]:
hierarchies = {
    'product' : PRODUCT,
    'location' : LOCATION,
    'customer' : CUSTOMER,
    'distr_channel' : DISTR_CHANNEL
}
config_alert = {
    'tgt_type' : 'POS',
    'alert_id' : [x for x in range(1, 10000)],
    'al_product_lvl' : 7,
    'al_location_lvl' : 5,
    'al_customer_lvl' : 3,
    'al_distr_channel_lvl' : 1,
    'al_time_lvl' : 'WEEK',
    'alert_threshold_val' : 100,
    'Input_table' : 'ACC_AGG_HYBRID_FORECAST_',
    'Input_column' : 'hybrid_forecast_value',
}
config_config = {
    'IB_ALERT_GRANULARITY' : 'WEEK',
    'IB_ALERT_FORECAST_LIST' : datetime.timedelta(days=14),
    'IB_ALERT_MINCRITICAL_VALUE' : 0.1,
    'IB_ALERT_MAXCRITICAL_RATIO' : 1.3,
    'IB_ALERT_MINCRITICAL_RATIO' : 1.1,
    'IB_ALERT_BASE_PAST_PERIOD' : 'WEEK'
}

config = {
    'IB_FF_ACTIVE_STATUS_LIST' : 'active',
    'IB_FC_HORIZ' : datetime.timedelta(days=14),
    'IB_ALERT_MIN_VAL' : 0.1,
    'IB_ALERT_MIN_OBS' : 10,
    'IB_MAX_NP_HISTORY' : datetime.timedelta(days=30)
}

IB_HIST_END_DT = datetime.datetime(2022, 7, 15)
IB_FCST_HORIZON = datetime.timedelta(days=30)

In [5]:
IB_HIST_END_DT, IB_FCST_HORIZON

(datetime.datetime(2022, 7, 15, 0, 0), datetime.timedelta(days=30))

In [6]:
def generate_data(config_alert : dict,
                  hierarchies : dict,
                  IB_HIST_END_DT : datetime.datetime, 
                  PROMO : pd.DataFrame) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame):
    """
    Function generating input data
    
    Parameters
    ----------
    config_file : dict
        Configuration parameters used within the step
    hierarchies : dict
        Dictionary containg matches of key names with the relevant hierarchical tables
    IB_HIST_END_DT : datetime
        Last known date (i.e. sales and stock information is known)
    PROMO : pd.DataFrame
        PROMO table
       
    Returns
    -------
    pd.DataFrame
        ACC_AGG_HYBRID_FORECAST_ used as input data of the algorithm
    pd.DataFrame
        RESTORED_DEMAND demand information regarding 
        the past till last known day of the history.
    pd.DataFrame
        FORECAST_FLAG used as input data of the algorithm
    """
    
    freq = config_alert['al_time_lvl'][0]
    timerange = pd.date_range(IB_HIST_END_DT, IB_HIST_END_DT + datetime.timedelta(weeks=52), freq=freq)
    timerange += datetime.timedelta(1)
    ACC_AGG_HYBRID_FORECAST_ = pd.DataFrame(timerange, columns=['period_dt'])
    for key in ['product', 'location', 'customer', 'distr_channel']:
        column_name = f"{key}_lvl_id{config_alert[f'al_{key}_lvl']}"
        keys_df = pd.DataFrame(hierarchies[key][column_name]).drop_duplicates()
        ACC_AGG_HYBRID_FORECAST_ = pd.merge(ACC_AGG_HYBRID_FORECAST_, keys_df, 'cross')

    ACC_AGG_HYBRID_FORECAST_['segment_name'] = "Name of segment " + ACC_AGG_HYBRID_FORECAST_.index.astype(str)
    ACC_AGG_HYBRID_FORECAST_['vf_forecast_value'] = np.abs(np.random.normal(500, 300, 
                                                                            ACC_AGG_HYBRID_FORECAST_.shape[0]))
    ACC_AGG_HYBRID_FORECAST_['demand_type'] = np.random.choice(['promo', 'regular'], 
                                                               ACC_AGG_HYBRID_FORECAST_.shape[0])
    ACC_AGG_HYBRID_FORECAST_['assortment_type'] = np.random.choice(['new', 'old'], 
                                                               ACC_AGG_HYBRID_FORECAST_.shape[0])
    ACC_AGG_HYBRID_FORECAST_['ml_forecast_value'] = np.abs(np.random.normal(500, 300, 
                                                                            ACC_AGG_HYBRID_FORECAST_.shape[0]))
    ACC_AGG_HYBRID_FORECAST_['hybrid_forecast_value'] = np.abs(np.random.normal(500, 300, 
                                                                            ACC_AGG_HYBRID_FORECAST_.shape[0]))
    
    RESTORED_DEMAND = pd.DataFrame()
    RESTORED_DEMAND['period_dt'] = pd.date_range(IB_HIST_END_DT - datetime.timedelta(days=14), IB_HIST_END_DT)
    RESTORED_DEMAND = pd.merge(RESTORED_DEMAND, PRODUCT['product_id'], 'cross')
    RESTORED_DEMAND = pd.merge(RESTORED_DEMAND, LOCATION['location_id'], 'cross')
    RESTORED_DEMAND = pd.merge(RESTORED_DEMAND, CUSTOMER['customer_id'], 'cross')
    RESTORED_DEMAND = pd.merge(RESTORED_DEMAND, DISTR_CHANNEL['distr_channel_id'], 'cross')
    RESTORED_DEMAND['stock_qty'] = abs(np.random.normal(500, 300, RESTORED_DEMAND.shape[0]))
    RESTORED_DEMAND['sales_qty'] = np.random.randint(1000, size=RESTORED_DEMAND.shape[0])
    RESTORED_DEMAND['sales_qty_r'] = RESTORED_DEMAND['sales_qty'] + np.random.normal(50, 30, RESTORED_DEMAND.shape[0])
    RESTORED_DEMAND['promo_flg'] = np.random.choice([0, 1], p=[0.8, 0.2], size=RESTORED_DEMAND.shape[0])
    RESTORED_DEMAND['promo_type'] = np.nan
    RESTORED_DEMAND.loc[(RESTORED_DEMAND['product_id'] == PROMO['product_id'][0]) &
                     (RESTORED_DEMAND['location_id'] == PROMO['location_id'][0]) &
                     (RESTORED_DEMAND['customer_id'] == PROMO['customer_id'][0]) &
                     (RESTORED_DEMAND['distr_channel_id'] == PROMO['distr_channel_id'][0]), 'promo_flg'] = 1

    RESTORED_DEMAND.loc[RESTORED_DEMAND['promo_flg'] == 1, 'promo_type'] = np.random.choice(
        PROMO['promo_type'].tolist(), size=(RESTORED_DEMAND['promo_flg'] == 1).sum()
    )

    RESTORED_DEMAND = pd.merge(RESTORED_DEMAND, PROMO[['promo_id', 'promo_type']], on='promo_type', how='left')

    RESTORED_DEMAND['deficit_flg1'] = np.random.choice([0, 1], p=[0.8, 0.2], size=RESTORED_DEMAND.shape[0])
    RESTORED_DEMAND['deficit_flg2'] = np.random.choice([0, 1], p=[0.8, 0.2], size=RESTORED_DEMAND.shape[0])
    
    
    FORECAST_FLAG = pd.DataFrame(timerange, columns=['period_dt'])
#     FORECAST_FLAG['period_start_dt'] = [IB_HIST_END_DT - datetime.timedelta(days=30)]
#     FORECAST_FLAG['period_end_dt'] = [IB_HIST_END_DT]
    FORECAST_FLAG['period_end_dt'] = FORECAST_FLAG['period_dt'].apply(
        lambda x : pd.date_range(x, periods=1, freq = config_alert['al_time_lvl'][0])[0]
    )

    for key in hierarchies:
        key_df = hierarchies[key][f"{key}_id"]
        FORECAST_FLAG = pd.merge(FORECAST_FLAG, key_df, 'cross')
    FORECAST_FLAG['status'] = np.random.choice(['active', 'blocked', 'out-of-sale'], size = FORECAST_FLAG.shape[0])
    
    return ACC_AGG_HYBRID_FORECAST_, RESTORED_DEMAND, FORECAST_FLAG

In [7]:
ACC_AGG_HYBRID_FORECAST_, RESTORED_DEMAND, FORECAST_FLAG = generate_data(config_alert, 
                                                                         hierarchies, 
                                                                         IB_HIST_END_DT,
                                                                         PROMO)

In [8]:
ACC_AGG_HYBRID_FORECAST_

Unnamed: 0,period_dt,product_lvl_id7,location_lvl_id5,customer_lvl_id3,distr_channel_lvl_id1,segment_name,vf_forecast_value,demand_type,assortment_type,ml_forecast_value,hybrid_forecast_value
0,2022-07-18,70001,500015,3000000,2,Name of segment 0,504.525867,promo,old,874.173459,159.086944
1,2022-07-18,70001,500014,3000000,2,Name of segment 1,646.607944,promo,old,353.307388,200.122143
2,2022-07-18,70001,500016,3000000,2,Name of segment 2,365.263969,regular,new,631.268931,570.952938
3,2022-07-18,70001,500013,3000000,2,Name of segment 3,664.792099,regular,old,510.240452,589.742552
4,2022-07-18,70001,500017,3000000,2,Name of segment 4,311.589664,regular,old,746.472756,823.801243
...,...,...,...,...,...,...,...,...,...,...,...
21575,2023-07-10,70083,500015,3000000,2,Name of segment 21575,803.456959,regular,old,192.202161,186.665224
21576,2023-07-10,70083,500014,3000000,2,Name of segment 21576,976.986688,promo,old,1051.820243,1386.787465
21577,2023-07-10,70083,500016,3000000,2,Name of segment 21577,322.292494,promo,new,561.047313,544.993817
21578,2023-07-10,70083,500013,3000000,2,Name of segment 21578,140.837413,promo,new,222.544678,646.451659


In [9]:
RESTORED_DEMAND.drop(['promo_type', 'promo_id'], axis=1, inplace=True)

In [10]:
RESTORED_DEMAND

Unnamed: 0,period_dt,product_id,location_id,customer_id,distr_channel_id,stock_qty,sales_qty,sales_qty_r,promo_flg,deficit_flg1,deficit_flg2
0,2022-07-01,80001,600002,6000002,1,1123.355266,999,1078.026894,0,0,1
1,2022-07-01,80001,600002,6000003,1,264.287939,261,288.830063,0,0,0
2,2022-07-01,80001,600002,6000004,1,409.698245,228,280.139032,0,1,0
3,2022-07-01,80001,600002,6000005,1,328.149554,908,995.992985,1,0,0
4,2022-07-01,80001,600002,6000006,1,690.781491,953,973.686618,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
273895,2022-07-15,80083,600012,6000017,1,777.815957,704,731.089475,0,0,1
273896,2022-07-15,80083,600012,6000018,1,593.662287,660,737.894405,0,0,1
273897,2022-07-15,80083,600012,6000019,1,248.386953,653,668.122381,0,1,0
273898,2022-07-15,80083,600012,6000020,1,411.704006,179,225.592128,0,0,0


In [11]:
FORECAST_FLAG

Unnamed: 0,period_dt,period_end_dt,product_id,location_id,customer_id,distr_channel_id,status
0,2022-07-18,2022-07-24,80001,600002,6000002,1,active
1,2022-07-18,2022-07-24,80001,600002,6000003,1,active
2,2022-07-18,2022-07-24,80001,600002,6000004,1,out-of-sale
3,2022-07-18,2022-07-24,80001,600002,6000005,1,blocked
4,2022-07-18,2022-07-24,80001,600002,6000006,1,active
...,...,...,...,...,...,...,...
949515,2023-07-10,2023-07-16,80083,600012,6000017,1,blocked
949516,2023-07-10,2023-07-16,80083,600012,6000018,1,out-of-sale
949517,2023-07-10,2023-07-16,80083,600012,6000019,1,out-of-sale
949518,2023-07-10,2023-07-16,80083,600012,6000020,1,active


In [21]:
def alert_1(config_alert : dict,
            hierarchies : dict,
            config : dict,
            config_config : dict, 
            IB_HIST_END_DT :  datetime.datetime,
            IB_FCST_HORIZON : datetime.datetime,
            ACC_AGG_HYBRID_FORECAST_ : pd.DataFrame, 
#             ALERT_PARAMETERS : pd.DataFrame, 
            RESTORED_DEMAND : pd.DataFrame,
            FORECAST_FLAG : pd.DataFrame) -> (pd.DataFrame):
    
    """
    Function generating input data
    
    Parameters
    ----------
    config_file : dict
        Configuration parameters used within the step
    hierarchies : dict
        Dictionary containg matches of key names with the relevant hierarchical tables
    config : dict
        Configuration parameters used within the step
    config_config : dict
        Configuration parameters used within the step
    IB_HIST_END_DT : datetime
        Last known date (i.e. sales and stock information is known)
    IB_FCST_HORIZON : datetime
        Range of dates
    ACC_AGG_HYBRID_FORECAST_ : pd.DataFrame
        Generated table
    RESTORED_DEMAND : pd.DataFrame
        Generated table
    FORECAST_FLAG : pd.DataFrame
        Generated table
       
    Returns
    -------
    pd.DataFrame
        T1 used as input data of the algorithm
    """
        
    T1 = ACC_AGG_HYBRID_FORECAST_
    if config_alert['alert_id'][0] == 1:
        if config_alert['Input_table'] == 'ACC_AGG_HYBRID_FORECAST_':
            freq = config_alert['al_time_lvl'][0]
            for key in ['product', 'location', 'customer', 'distr_channel']:
                column_name = f"{key}_lvl_id{config_alert[f'al_{key}_lvl']}"
                if config_alert[f'al_{key}_lvl'] == 8 and key == 'product':
                    product_df = pd.DataFrame(PRODUCT['product_id']).drop_duplicates()
                    ACC_AGG_HYBRID_FORECAST_ = pd.merge(ACC_AGG_HYBRID_FORECAST_, product_df, 'left')
                else:
                    keys_df = pd.DataFrame(hierarchies[key][column_name]).drop_duplicates()
                    ACC_AGG_HYBRID_FORECAST_ = pd.merge(ACC_AGG_HYBRID_FORECAST_, keys_df, 'left')
            
#             ACC_AGG_HYBRID_FORECAST_.drop(['product_lvl_id7', 'location_lvl_id5',
#                                            'customer_lvl_id3', 'distr_channel_lvl_id1'], axis=1, inplace=True)
            
    ACC_AGG_HYBRID_FORECAST_['forecast_value'] = ACC_AGG_HYBRID_FORECAST_['hybrid_forecast_value'].mean()
    one = datetime.timedelta(days=1)
    min_start_dt = max(IB_HIST_END_DT + one, min(FORECAST_FLAG['period_dt']))
    max_end_dt = min(IB_HIST_END_DT + IB_FCST_HORIZON, max(FORECAST_FLAG['period_end_dt']))
    FORECAST_FLAG = FORECAST_FLAG.loc[FORECAST_FLAG['status'] == config['IB_FF_ACTIVE_STATUS_LIST']]
    if config_config['IB_ALERT_GRANULARITY'] == 'WEEK':
        FORECAST_FLAG['period_dt'] = pd.date_range(start=min_start_dt, end = max_end_dt, freq='W').to_series().dt.week
       
     
    ACC_AGG_HYBRID_FORECAST_[ACC_AGG_HYBRID_FORECAST_['forecast_value'].isna()]
    
#     keys = ["period_dt", f"product_lvl_id{config_alert['al_product_lvl']}",
#        f"location_lvl_id{config_alert['al_location_lvl']}", f"customer_lvl_id{config_alert['al_customer_lvl']}",
#        f"distr_channel_lvl_id{config_alert['al_distr_channel_lvl']}"]       
#     merge_keys = ACC_AGG_HYBRID_FORECAST_.columns[ACC_AGG_HYBRID_FORECAST_.columns.str.contains('lvl_id')].tolist()
#     T1 = pd.merge(T1, FORECAST_FLAG, on=merge_keys, how = 'left')
    T1['kpi_nm']= np.nan
    T1['alert_type_id']= max(config_alert['al_product_lvl'], config_alert['al_location_lvl'], 
                            config_alert['al_customer_lvl'], config_alert['al_distr_channel_lvl'])
    T1['kpi_nm'] = np.random.choice([config_alert['tgt_type']], T1.shape[0])
    x = ACC_AGG_HYBRID_FORECAST_[ACC_AGG_HYBRID_FORECAST_['forecast_value'].isna()]
    T1['alert_type_id'] = T1['alert_type_id'].apply(lambda x: 'nareg' if x else 'zeroreg')
    T1['input_table'] = config_alert['Input_table']
    T1['stat_nom_nm'] = 'forecast_value'
    T1['stat_den_nm'] = np.nan
    T1['stat_nom_val'] = ACC_AGG_HYBRID_FORECAST_['forecast_value']
    T1['stat_den_val'] = 1
    T1['alert_threshold'] = np.nan
    T1['alert_start_val'] = np.nan
    T1['store_location_id'] = np.nan
#     T1 = T1.groupby('period_dt').mean()

    return T1

# ACC_AGG_HYBRID_FORECAST_ = ACC_AGG_HYBRID_FORECAST_[ACC_AGG_HYBRID_FORECAST_['forecast_value'] >= config['IB_ALERT_MIN_VAL']]

In [22]:
T1 = alert_1(config_alert, 
             hierarchies, 
             config,
             config_config,
             IB_HIST_END_DT,
             IB_FCST_HORIZON,
             ACC_AGG_HYBRID_FORECAST_,
             RESTORED_DEMAND,
             FORECAST_FLAG)

In [23]:
T1.head()

Unnamed: 0,period_dt,product_lvl_id7,location_lvl_id5,customer_lvl_id3,distr_channel_lvl_id1,segment_name,vf_forecast_value,demand_type,assortment_type,ml_forecast_value,...,kpi_nm,alert_type_id,input_table,stat_nom_nm,stat_den_nm,stat_nom_val,stat_den_val,alert_threshold,alert_start_val,store_location_id
0,2022-07-18,70001,500015,3000000,2,Name of segment 0,504.525867,promo,old,874.173459,...,POS,nareg,ACC_AGG_HYBRID_FORECAST_,forecast_value,,511.39863,1,,,
1,2022-07-18,70001,500014,3000000,2,Name of segment 1,646.607944,promo,old,353.307388,...,POS,nareg,ACC_AGG_HYBRID_FORECAST_,forecast_value,,511.39863,1,,,
2,2022-07-18,70001,500016,3000000,2,Name of segment 2,365.263969,regular,new,631.268931,...,POS,nareg,ACC_AGG_HYBRID_FORECAST_,forecast_value,,511.39863,1,,,
3,2022-07-18,70001,500013,3000000,2,Name of segment 3,664.792099,regular,old,510.240452,...,POS,nareg,ACC_AGG_HYBRID_FORECAST_,forecast_value,,511.39863,1,,,
4,2022-07-18,70001,500017,3000000,2,Name of segment 4,311.589664,regular,old,746.472756,...,POS,nareg,ACC_AGG_HYBRID_FORECAST_,forecast_value,,511.39863,1,,,


In [None]:
# T2 = FORECAST_FLAG
# one = datetime.timedelta(days=1)
# min_start_dt = max(IB_HIST_END_DT + one, min(FORECAST_FLAG['period_start_dt']))
# max_end_dt = min(IB_HIST_END_DT + IB_FCST_HORIZON, max(FORECAST_FLAG['period_end_dt']))
# freq = config_alert['al_time_lvl'][0]
# timerange = pd.date_range(min_start_dt, max_end_dt, freq=freq)
# timerange += datetime.timedelta(1)
# T2 = pd.DataFrame(timerange, columns=['period_dt'])