In [3]:
import pandas as pd

df_train = pd.read_csv('data/orders0206_train.csv', sep='|', parse_dates=['time'])
df_test = pd.read_csv('data/orders0206_train.csv', sep='|', parse_dates=['time'])

df_info = pd.read_csv('data/infos.csv', sep='|', index_col='itemID')
product_prices = df_info['simulationPrice'].to_dict()

In [4]:
df_info.head()

Unnamed: 0_level_0,simulationPrice,promotion
itemID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,3.43,
2,9.15,
3,14.04,
4,14.1,
5,7.48,


In [6]:
df_train.head()

Unnamed: 0,time,transactID,itemID,order,salesPrice
0,2018-01-01 00:01:56,2278968,450,1,17.42
1,2018-01-01 00:01:56,2278968,83,1,5.19
2,2018-01-01 00:07:11,2255797,7851,2,20.47
3,2018-01-01 00:09:24,2278968,450,1,17.42
4,2018-01-01 00:09:24,2278968,83,1,5.19


Evaluation Method

In [4]:
from collections import defaultdict

def evaluate_result(y: dict, y_pred: dict):
    monetary_value = 0
    y_pred = defaultdict(int, y_pred)  # return prediction of 0 for items without prediction
    
    for item, demand in y.items():
        predicted_demand = y_pred[item]
        price = product_prices[item]
        monetary_value += price * min(demand, predicted_demand)
        if predicted_demand > demand:
            monetary_value -= .6 * price * (predicted_demand - demand)

    print(monetary_value)

Baseline Models

In [5]:
# actual demand
y = df_test.groupby(by='itemID')['order'].sum().to_dict()

# baseline 1 (average demand of previous 14 days)
y_baseline1 = df_train[df_train['time'] >= '2018-05-19'].groupby(by='itemID')['order'].sum().to_dict()

# baseline 2 (average demand of previous half year)
total_orders = df_train.groupby(by='itemID')['order'].sum().to_dict()
total_observed_days = (df_train['time'].dt.normalize().max() - df_train['time'].dt.normalize().min()).days
y_baseline2 = {item: orders / total_observed_days * 14 for item, orders in total_orders.items()}  # 14-day avg. demand



# baseline 3 (average demand of previous 14 days)
y_baseline3 = df_train[(df_train['time'] >= '2018-05-19') & (df_train['time'] < '2018-05-19')].groupby(by='itemID')['order'].sum().to_dict()

BaselineEvaluation

In [6]:
# perfect result
evaluate_result(y, y)

# baseline 1
evaluate_result(y, y_baseline1)

# baseline 2 (average of previous half year)
evaluate_result(y, y_baseline2)

66271234.590000115
8904189.420000017
6144352.875894056
