# Generate Weekday/Hour Offset Mapping

This code ensures that a "trading day" starts at 17:00 on one day and runs through 16:59 on the following day.

Oanda trading hours:  Sunday 17:05 through Friday 16:59 (New York time)

## Load useful libraries

In [1]:
import pandas as pd
import datetime
import pytz
import os

In [2]:
import boilerplate
from timeseries.models import Interval, Instrument, PriceType, Candlestick
from utilities.make_dataframe_from_db import make_candlestick_dataframe
from utilities.date_and_time_related_calculations import compute_datetime_information

In [3]:
os.environ['DJANGO_ALLOW_ASYNC_UNSAFE'] = 'true'

## User settings

In [4]:
#
# the following criteria rely on the database having candlestick values for them:
#
price_type_name = 'mid'
instrument_name = 'EUR/USD'
interval_name = 'Minute'

In [5]:
tz = pytz.timezone('US/Eastern')
output_directory = os.environ['APP_HOME'] + '/badassdatascience/forecasting/deep_learning/output'

In [6]:
print(output_directory)

/home/emily/Desktop/projects/test/badass-data-science/badassdatascience/forecasting/deep_learning/output


## Generate the mapping from the data

In [7]:
df_from_data = make_candlestick_dataframe(price_type_name, instrument_name, interval_name)
df_from_data = compute_datetime_information(df_from_data, tz)

df_shifted_weekday_from_data = df_from_data[['weekday_tz', 'hour_tz']].drop_duplicates().sort_values(by = ['weekday_tz', 'hour_tz']).copy()

df_shifted_weekday_from_data['weekday_shifted'] = (
    df_shifted_weekday_from_data['weekday_tz'].shift(-7)
    .fillna(0.)
    .astype('int32')
)

df_shifted_weekday_from_data['weekday_shifted'] = [4 if x == 6 else x for x in df_shifted_weekday_from_data['weekday_shifted']]

print(df_shifted_weekday_from_data.to_string())

            weekday_tz  hour_tz  weekday_shifted
1220846400           0        0                0
1220850000           0        1                0
1220853600           0        2                0
1220857200           0        3                0
1220860800           0        4                0
1220864400           0        5                0
1220868000           0        6                0
1220871600           0        7                0
1220875200           0        8                0
1220878800           0        9                0
1220882400           0       10                0
1220886000           0       11                0
1220889600           0       12                0
1220893200           0       13                0
1220896800           0       14                0
1220900400           0       15                0
1220904000           0       16                0
1220907600           0       17                1
1220911200           0       18                1
1220914800          

## Generate the mapping manually

By creating the mapping twice we help QA along:

In [8]:
hour_list = []
weekday_list = []
shifted_list = []

shifted_list.extend([0] * 17)

for i in range(0, 4):
    weekday_list.extend([i] * 24)
    hour_list.extend(sorted(list(range(0, 24))))

    if i >= 1:
        shifted_list.extend([i] * 24)

shifted_list.extend([4] * 24)

weekday_list.extend([4] * 17)
hour_list.extend(sorted(list(range(0, 17))))

weekday_list.extend([6] * 7)
hour_list.extend(sorted(list(range(17, 24))))
shifted_list.extend([0] * 7)

df_shifted_weekday_manually_constructed = pd.DataFrame({'weekday_tz' : weekday_list, 'hour_tz' : hour_list, 'weekday_shifted' : shifted_list})

## QA

#### Test for dataframe equality

In [9]:
for column_name in ['weekday_tz', 'hour_tz', 'weekday_shifted']:
    print(list(df_shifted_weekday_from_data[column_name]) == list(df_shifted_weekday_manually_constructed[column_name]))
    print(min([int(i) for i in [x == y for x, y in zip(df_shifted_weekday_from_data[column_name], df_shifted_weekday_manually_constructed[column_name])]]))

False
0
False
1
False
0


#### Aggregations

In [10]:
df_test = df_shifted_weekday_manually_constructed.copy()
df_test['dummy_variable'] = 1

(
    df_test
    .groupby(['weekday_tz'])
    ['dummy_variable']
    .agg('count')
    .reset_index()
)

Unnamed: 0,weekday_tz,dummy_variable
0,0,24
1,1,24
2,2,24
3,3,24
4,4,17
5,6,7


In [11]:
df_agg_hour_tz = (
    df_test
    .groupby(['hour_tz'])
    ['dummy_variable']
    .agg('count')
    .reset_index()
)

df_agg_hour_tz

Unnamed: 0,hour_tz,dummy_variable
0,0,5
1,1,5
2,2,5
3,3,5
4,4,5
5,5,5
6,6,5
7,7,5
8,8,5
9,9,5


In [12]:
print(min(df_agg_hour_tz['dummy_variable']), max(df_agg_hour_tz['dummy_variable']))

5 5


In [13]:
(
    df_test
    .groupby(['weekday_shifted'])
    ['dummy_variable']
    .agg('count')
    .reset_index()
)

Unnamed: 0,weekday_shifted,dummy_variable
0,0,24
1,1,24
2,2,24
3,3,24
4,4,24


## Save mapping

In [14]:
df_shifted_weekday_manually_constructed.to_csv(output_directory + '/df_weekday_shift_lookup_table.csv', index=False)