## Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import datetime

## Loading data

In [2]:
dateparse = lambda dates: pd.datetime.strptime(dates, '%Y-%m-%d')

In [3]:
demand = pd.read_csv('data/demand.csv', parse_dates=['day'], index_col='day',date_parser=dateparse)

In [4]:
supply = pd.read_csv('data/supply.csv', parse_dates=['day'], index_col='day',date_parser=dateparse)

## Add shift unit to Supply

In [5]:
supply['hour'] = pd.to_datetime(supply['timeslot_from'], format='%H:%M')

In [6]:
supply['shift'] = list(map(lambda x: 'MS' if x.hour < 11 else('ES' if x.hour> 16 else np.NaN), supply['hour']))

## Create history demand features for prediction

In [7]:
ms_demands = demand[demand['shift'] == 'MS'].sort_index()
for i in range(1, 8):
    ms_demands["pickups-{}".format(i)] = ms_demands.num_pickups.shift(i)
ms_demands.head()

Unnamed: 0_level_0,shift,num_pickups,pickups-1,pickups-2,pickups-3,pickups-4,pickups-5,pickups-6,pickups-7
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-12-01,MS,67,,,,,,,
2017-12-02,MS,48,67.0,,,,,,
2017-12-04,MS,86,48.0,67.0,,,,,
2017-12-06,MS,74,86.0,48.0,67.0,,,,
2017-12-08,MS,56,74.0,86.0,48.0,67.0,,,


In [8]:
es_demands = demand[demand['shift'] == 'ES'].sort_index()
for i in range(1, 8):
    es_demands["pickups-{}".format(i)] = es_demands.num_pickups.shift(i)
es_demands.head()

Unnamed: 0_level_0,shift,num_pickups,pickups-1,pickups-2,pickups-3,pickups-4,pickups-5,pickups-6,pickups-7
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-12-04,ES,60,,,,,,,
2017-12-06,ES,40,60.0,,,,,,
2017-12-11,ES,49,40.0,60.0,,,,,
2017-12-13,ES,29,49.0,40.0,60.0,,,,
2017-12-18,ES,52,29.0,49.0,40.0,60.0,,,


In [9]:
demand_history = pd.concat([ms_demands, es_demands])

In [10]:
demand_history.isna().sum()

shift           0
num_pickups     0
pickups-1       2
pickups-2       4
pickups-3       6
pickups-4       8
pickups-5      10
pickups-6      12
pickups-7      14
dtype: int64

In [11]:
# Drop rows with NaN values
demand_history = demand_history.dropna()

## Find Mean of avail_area_ratio per day in supply data
mean should display what availability was displayed to the customers on average

### For Morning Shift

In [12]:
supply['calculated_datetime'] = pd.to_datetime(supply['calculated_datetime'])

In [13]:
ms_feature_per_day = pd.DataFrame(data=supply.index.unique())

In [14]:
ms_supply = supply[supply['shift'] == 'MS'].sort_index()

In [15]:
ms_supply_grouped = ms_supply.groupby(['day'])

In [16]:
ms_mean_avail_area_of_day = []
for day, group in ms_supply_grouped:
    m = group['avail_area_ratio'].mean()
    ms_mean_avail_area_of_day.append(m)

In [17]:
ms_feature_per_day['mean_avail_area_of_day'] = ms_mean_avail_area_of_day
ms_feature_per_day['shift'] = 'MS'

### For Evening Shift

In [18]:
es_supply = supply[supply['shift'] == 'ES'].sort_index()
es_feature_per_day = pd.DataFrame(data=es_supply.index.unique())
es_supply_grouped = es_supply.groupby(['day'])

es_mean_avail_area_of_day = []
for day, group in es_supply_grouped:
    m_es = group['avail_area_ratio'].mean()
    es_mean_avail_area_of_day.append(m_es)

In [19]:
es_feature_per_day['mean_avail_area_of_day'] = es_mean_avail_area_of_day
es_feature_per_day['shift'] = 'ES'

### Concat mean feature

In [20]:
feature_per_day = pd.concat([ms_feature_per_day, es_feature_per_day])

## Merge supply and demand

In [21]:
features_and_target = pd.merge(demand_history, feature_per_day, on=['day', 'shift'], how='left')

In [22]:
features_and_target = features_and_target.fillna(0)

In [23]:
features_and_target = features_and_target.set_index('day')

In [24]:
features_and_target = features_and_target.sort_values('day')

# Save Features

In [25]:
features_and_target.to_csv('data/features_and_target.csv', index=True, header=True)