In [3]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm

In [155]:
tsb = pd.read_csv('Uni_APU_2_tsb.csv')
ets = pd.read_csv('Uni_APU_2_ets.csv')
mean = pd.read_csv('Uni_APU_2_mean.csv')

In [14]:
infos = pd.read_pickle('infos.pkl')
items = pd.read_pickle('items.pkl')
sorted_orders = pd.read_pickle('sorted_orders.pkl')
daily_orders = pd.read_csv('daily_orders.csv')

In [164]:
tsb.rename({'demandPrediction': 'demand_tsb'}, axis = 1, inplace = True)

In [165]:
tsb['demand_ets'] = ets['demandPrediction']

In [174]:
tsb['demand_mean'] = (tsb['demand_tsb'] + tsb['demand_ets']) / 2

In [181]:
tsb.sum()

itemID         5.111543e+07
demand_tsb     2.157761e+05
demand_ets     3.811917e+05
demand_mean    2.984839e+05
dtype: float64

Right off the bat, I like that `demand_mean` predicts the expected amount of **297933** items, which follows the increasing demand trend. The TSB predictions are underforecasts and ETS ones are overforecasts but their mean seems to be just the right amount.

In [182]:
tsb.to_csv('forecasts.csv', index = False)

In [183]:
df = pd.read_csv('forecasts.csv')

In [184]:
df

Unnamed: 0,itemID,demand_tsb,demand_ets,demand_mean
0,1,49.158814,53.863913,51.511363
1,2,0.364125,0.392323,0.378224
2,3,29.292589,25.701549,27.497069
3,4,17.916622,15.419364,16.667993
4,5,13.570625,14.085350,13.827988
...,...,...,...,...
9835,10450,11.057287,30.379691,20.718489
9836,10459,0.041685,0.077831,0.059758
9837,10460,0.015735,0.077828,0.046781
9838,10462,0.021403,0.077829,0.049616


In [185]:
price_df = pd.DataFrame(sorted_orders.groupby('itemID')['salesPrice'].mean())
price_df.reset_index(inplace = True)
items_df = pd.DataFrame(items['itemID'], columns = ['itemID'])

In [186]:
df['price'] = price_df['salesPrice']
df = items_df.join(df.set_index('itemID'), on = 'itemID')
df['promotion'] = infos['promotion']

In [187]:
df

Unnamed: 0,itemID,demand_tsb,demand_ets,demand_mean,price,promotion
0,1,49.158814,53.863913,51.511363,3.111661,
1,2,0.364125,0.392323,0.378224,9.150000,
2,3,29.292589,25.701549,27.497069,12.733253,
3,4,17.916622,15.419364,16.667993,13.798895,
4,5,13.570625,14.085350,13.827988,7.735556,
...,...,...,...,...,...,...
10458,10459,0.041685,0.077831,0.059758,14.710000,
10459,10460,0.015735,0.077828,0.046781,325.670000,
10460,10461,,,,,
10461,10462,0.021403,0.077829,0.049616,304.300000,


In [188]:
df['price'].fillna(value = 0, inplace = True)

In [189]:
df_copy = df.copy()

In [190]:
df.fillna(-1, inplace = True)

In [191]:
df['promotion'] = [str(x) for x in df['promotion']]

In [192]:
for i in tqdm(list(range(len(df)))):
    
    item = df.iloc[i,:]
    
    # for those items in the promotion, round up. otherwise, round normally
    if item['promotion'] != '-1':
        df.loc[i,'demand_mean'] = np.ceil(df.loc[i,'demand_mean'])
    else:
        df.loc[i,'demand_mean'] = round(df.loc[i,'demand_mean'])
        
    # for those unsold items not in the promotion, predict 0. otherwise predict 1
    if (item['demand_mean'] == -1) and (item['promotion'] == '-1'):
        df.loc[i,'demand_mean'] = 0
    elif (item['demand_mean'] == -1) and (item['promotion'] != '-1'):
        df.loc[i,'demand_mean'] = 1
    else:
        pass
    
    # for one day promotion items that are predicted 0, make them 1
    if (item['demand_mean'] == 0) and (len(item['promotion']) == 10):
        df.loc[i,'demand_mean'] = 1
    # for two day promotion items that are predicted <= 1, make them 2
    elif (item['demand_mean'] <= 1) and (len(item['promotion']) == 21):
        df.loc[i,'demand_mean'] = 2
    # for three day promotion items that are predicted <= 2, make them 3
    elif (item['demand_mean'] <= 2) and (len(item['promotion']) == 32):
        df.loc[i,'demand_mean'] = 3
    else:
        pass


100%|██████████████████████████████████████████████████████████████████████████| 10463/10463 [00:07<00:00, 1361.22it/s]


In [193]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10463 entries, 0 to 10462
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   itemID       10463 non-null  int64  
 1   demand_tsb   10463 non-null  float64
 2   demand_ets   10463 non-null  float64
 3   demand_mean  10463 non-null  float64
 4   price        10463 non-null  float64
 5   promotion    10463 non-null  object 
dtypes: float64(4), int64(1), object(1)
memory usage: 490.6+ KB


In [194]:
(df_copy['demand_mean'] * df_copy['price']).sum()

8386552.693521747

In [200]:
df['demand_mean'].sum()

299604.0

Can do the same preprocessing for `demand_tsb` and `demand_ets` but nah.

In [207]:
df['demand_mean'] = [int(x) for x in df['demand_mean']]

In [210]:
submission = df[['itemID', 'demand_mean']]

In [212]:
submission.rename({'demand_mean': 'demandPrediction'}, axis = 1, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [214]:
submission

Unnamed: 0,itemID,demandPrediction
0,1,52
1,2,0
2,3,27
3,4,17
4,5,14
...,...,...
10458,10459,0
10459,10460,0
10460,10461,0
10461,10462,0


In [216]:
submission.to_csv('veryfinal.csv', sep = '|', index = False)

In [217]:
test = pd.read_csv('../DMC-2020-Task/Submissions/Uni_APU_2.csv', sep = '|')

In [220]:
test

Unnamed: 0,itemID,demandPrediction
0,1,52
1,2,0
2,3,27
3,4,17
4,5,14
...,...,...
10458,10459,0
10459,10460,0
10460,10461,0
10461,10462,0


## Final iteration of the forecasts, using the mean of TSB and ETS, was submitted on 17-June-2020 12:08 PM