# Promotion Feature
This notebook follows the orders

## Loading our data...

In [1]:
import numpy as np
import pandas as pd
from utils import read_data, process_time, merge_data, promo_detector, promo_detector_fixed
import seaborn as sns
import matplotlib.pyplot as plt
import sys

sys.path.append("../../main/datasets/")

In [2]:
!ls  ../../main/datasets/

1.0v.zip


In [3]:
infos, items, orders = read_data("../../main/datasets/")
print("Sanity checks...", infos.shape, items.shape, orders.shape)

Sanity checks... (10463, 3) (10463, 8) (2181955, 5)


In [4]:
orders.head()

Unnamed: 0,time,transactID,itemID,order,salesPrice
0,2018-01-01 00:01:56,2278968,450,1,17.42
1,2018-01-01 00:01:56,2278968,83,1,5.19
2,2018-01-01 00:07:11,2255797,7851,2,20.47
3,2018-01-01 00:09:24,2278968,450,1,17.42
4,2018-01-01 00:09:24,2278968,83,1,5.19


## Preprocessing our orders

These cells were taken from Bruno's "1.1-First Model" Notebook, that can be found in this repository.

In [5]:
process_time(orders)

In [6]:
def promo_detector_fixed(orders, aggregation=True, mode=True):
    new_df = pd.DataFrame()
    
    def agregationMode(x): return x.value_counts().index[0]

    for i in range(1, 14):
        current_agg = orders.loc[orders.group_backwards <= i].groupby(['itemID']).agg(salesPriceMode=('salesPrice', agregationMode))
        current_agg['promotion'] = 0
        orders_copy = orders.loc[orders.group_backwards == i].copy()
        current_orders_with_promotion = pd.merge(orders_copy, current_agg, how='inner', left_on='itemID', right_on='itemID')
        current_orders_with_promotion.loc[current_orders_with_promotion['salesPrice'] <
                                                       current_orders_with_promotion['salesPriceMode'], 'promotion'] = 1
        new_df = pd.concat([new_df, current_orders_with_promotion])
    
    if (not(aggregation)):
        new_df.drop(
            'salesPriceMode', axis=1, inplace=True)
    return new_df

In [7]:
new_df = promo_detector_fixed(orders)

In [23]:
new_df.sort_values(by=['group_backwards', 'itemID'], inplace=True)

In [8]:
orders = promo_detector(orders)

In [18]:
# Sanity checking...
orders.loc[orders['promotion'] == 1]

Unnamed: 0,time,transactID,itemID,order,salesPrice,days,days_backwards,group_backwards,salesPriceMode,promotion
574,2018-02-18 20:44:40,3330,7851,1,18.72,49,132,10,20.47,1
575,2018-02-19 15:30:18,2293923,7851,1,18.20,50,131,10,20.47,1
813,2018-04-30 13:07:35,2263766,7851,1,16.38,120,61,5,20.47,1
1256,2018-01-01 00:51:59,2278968,19,1,77.64,1,180,13,79.68,1
1257,2018-01-01 00:56:54,2278968,19,1,77.64,1,180,13,79.68,1
...,...,...,...,...,...,...,...,...,...,...
2180253,2018-06-29 14:45:16,2291000,7367,1,6.33,180,1,1,7.92,1
2180259,2018-06-29 17:28:01,2260280,7367,1,6.33,180,1,1,7.92,1
2180260,2018-06-29 17:46:14,2260280,7367,1,6.33,180,1,1,7.92,1
2180546,2018-06-29 22:45:39,2256386,7367,2,6.33,180,1,1,7.92,1


In [9]:
def promotionAggregation(orders, items, promotionMode='mean', timeScale='group_backwards', salesPriceMode='mean'):
    """The 'promotion' feature is, originally, given by sale. This function aggregates it into the selected
    time scale.
    
    Parameters
    -------------
    orders : A pandas DataFrame with all the sales.
    
    items: A pandas DataFrame with the infos about all items
                
    promotionMode : A pandas aggregation compatible data type; 
                    The aggregation mode of the 'promotion' feature
    timeScale : A String with the name of the column containing the time signature.
                E.g.: 'group_backwards'
    salesPriceMode : A pandas aggregation compatible data type;
                    The aggregation mode of the 'salesPrice' feature
                
    """

    df = orders.groupby([timeScale, 'itemID'], as_index=False).agg(
        {'order': 'sum', 'promotion': promotionMode, 'salesPrice': salesPriceMode})
    
    print(df)

    items_copy = items.copy()

    df.rename(columns={'order': 'orderSum', 'promotion': f'promotion_{promotionMode}',
                       'salesPrice': f'salesPrice_{salesPriceMode}'}, inplace=True)
    return pd.merge(df, items_copy, how='left', left_on=['itemID'], right_on=['itemID'])


In [10]:
df = promotionAggregation(orders, items)

       group_backwards  itemID  order  promotion   salesPrice
0                    1       1      3   0.000000     3.430000
1                    1       3    140   0.000000    14.040000
2                    1       4    145   0.000000    14.100000
3                    1       5      1   1.000000     7.480000
4                    1       7      1   0.000000    34.390000
...                ...     ...    ...        ...          ...
39510               13    9887     12   0.000000  1397.550000
39511               13    9938     20   0.000000  1451.670000
39512               13    9986      2   0.000000    12.020000
39513               13    9999      7   0.000000    59.890000
39514               13   10000    200   0.005155    28.549433

[39515 rows x 5 columns]


In [40]:
df.loc[df.itemID == 1]

Unnamed: 0,group_backwards,itemID,orderSum,promotion_mean,salesPrice_mean,brand,manufacturer,customerRating,category1,category2,category3,recommendedRetailPrice
0,1,1,3,0.0,3.43,0,1,4.38,1,1,1,8.84
9199,3,1,31,0.0,3.11,0,1,4.38,1,1,1,8.84
13487,4,1,3,0.0,3.11,0,1,4.38,1,1,1,8.84
17395,5,1,299,0.0,3.11,0,1,4.38,1,1,1,8.84
21137,6,1,2,0.0,3.11,0,1,4.38,1,1,1,8.84
24470,7,1,1,0.0,3.11,0,1,4.38,1,1,1,8.84
27638,8,1,1,0.0,3.11,0,1,4.38,1,1,1,8.84
30552,9,1,3,0.0,3.11,0,1,4.38,1,1,1,8.84
32928,10,1,35,0.0,3.11,0,1,4.38,1,1,1,8.84
34806,11,1,313,0.0,3.11,0,1,4.38,1,1,1,8.84
