In [1]:
import pandas as pd

df_train = pd.read_csv('data/orders0206_train.csv', sep='|', parse_dates=['time'])
df_test = pd.read_csv('data/orders0206_test.csv', sep='|', parse_dates=['time'])

df_info = pd.read_csv('data/infos.csv', sep='|', index_col='itemID')
product_prices = df_info['simulationPrice'].to_dict()

In [2]:
from collections import defaultdict

def evaluate_result(y: dict, y_pred: dict):
    monetary_value = 0
    y_pred = defaultdict(int, y_pred)  # return prediction of 0 for items without prediction
    
    for item in set(y_pred).difference(set(y)):
        y[item] = 0  # make sure that all items for which a demand has been predicted are contained in the actual demands
    
    for item, demand in y.items():
        predicted_demand = y_pred[item]
        price = product_prices[item]
        monetary_value += price * min(demand, predicted_demand)
        if predicted_demand > demand:
            monetary_value -= .6 * price * (predicted_demand - demand)
            
    return monetary_value

In [3]:
# actual demand
y = df_test.groupby(by='itemID')['order'].sum().to_dict()

# baseline 1 (average demand of previous 14 days)
y_baseline1 = df_train[df_train['time'] >= '2018-05-19'].groupby(by='itemID')['order'].sum().to_dict()

# baseline 2 (average demand of previous half year)
total_orders = df_train.groupby(by='itemID')['order'].sum().to_dict()
total_observed_days = (df_train['time'].dt.normalize().max() - df_train['time'].dt.normalize().min()).days
y_baseline2 = {item: orders / total_observed_days * 14 for item, orders in total_orders.items()}  # 14-day avg. demand

In [4]:
# perfect result
print(f'Perfect Result: {evaluate_result(y, y):.2f}')

# baseline 1
print(f'Baseline 1: {evaluate_result(y, y_baseline1):.2f}')

# baseline 2
print(f'Baseline 2: {evaluate_result(y, y_baseline2):.2f}')

Perfect Result: 7895975.87
Baseline 1: -3727365.60
Baseline 2: -1672504.21
