In [1]:
import os
import pandas as pd
from typing import List
from datetime import datetime
import numpy as np

from lib.utils import get_nyse_date_tups, get_oxford_dfs
import pandas_market_calendars as mcal

In [5]:
start, end = '2018-10-11', '2023-10-09'
unix_tups = get_nyse_date_tups(start, end, unix=True)
market_days = [tup[0] for tup in get_nyse_date_tups(start, end, unix=False)]

In [3]:
dfs = get_oxford_dfs(2)

In [6]:
def filter_market_hours(dfs):
    market_hours = []
    # outside_hours = [] 
    for df in dfs: # no list comp for clarity
        bool1 = (df['datetime'].dt.time >= pd.to_datetime('09:30:00').time()) & (df['datetime'].dt.time < pd.to_datetime('16:00:00').time()) # 9:30am - 4pm
        bool2 = df['datetime'].dt.date.isin(pd.to_datetime(market_days).date) # only market days (no weekends or holidays)
        bool_ = bool1 & bool2 # combine bools
        market_hours.append(df[bool_].reset_index(drop=True))
        market_hours.append(df[bool_].reset_index(drop=True))
        # outside_hours.append(df[~bool_].reset_index(drop=True))
    return market_hours

market_hours = filter_market_hours(dfs)

In [7]:
test = market_hours[0].copy()

In [8]:
def create_aggs_log_ret(df: pd.DataFrame):
    def _log_ret(x:  pd.Series):
        return np.log(x.iloc[-1] / x.iloc[0])
    aggs = df.set_index('datetime').groupby(pd.Grouper(freq='D'))\
        .resample('1D', offset='9H30T', label='right')\
        .agg({'c': _log_ret})
    aggs.index = aggs.index.droplevel(0)
    aggs.reset_index(inplace=True)
    aggs.rename(columns={'c': 'log_ret'}, inplace=True)
    aggs = aggs[["datetime", "log_ret"]]
    return aggs

In [9]:
all_log_ret = [create_aggs_log_ret(df) for df in market_hours]

In [10]:
all_log_ret[0]

Unnamed: 0,datetime,log_ret
0,2018-10-12 09:30:00-04:00,-0.021181
1,2018-10-13 09:30:00-04:00,0.005331
2,2018-10-16 09:30:00-04:00,-0.008121
3,2018-10-17 09:30:00-04:00,0.020100
4,2018-10-18 09:30:00-04:00,0.002849
...,...,...
1251,2023-10-04 09:30:00-04:00,0.012908
1252,2023-10-05 09:30:00-04:00,0.002673
1253,2023-10-06 09:30:00-04:00,0.003964
1254,2023-10-07 09:30:00-04:00,0.005733


In [98]:
tickers = [df['ticker'].iloc[0] for df in dfs]

In [99]:
base_path = "/Users/beneverman/Documents/Coding/QuantHive/IDVF-Oxford-v1/data/processed-5yr-93-minute/raw_log_ret"
if not os.path.exists(base_path): os.mkdir(base_path)
for idx, agg in enumerate(all_log_ret):
    agg['ticker'] = tickers[idx]
    path = f"{base_path}/{tickers[idx]}_65min.csv"
    agg.to_csv(path, index=False)

In [100]:
mega_df = pd.DataFrame(all_log_ret[0]['datetime'])
for df, ticker in zip(all_log_ret, tickers):
    mega_df[ticker] = df['log_ret']

In [101]:
mega_df.head(100)

Unnamed: 0,datetime,TMO,ABT,HD,MCD,PG,CAT,DIS,CCI,JNJ,...,COST,IBM,ADP,AVGO,WMT,AMGN,INTU,AXP,MMC,CB
0,2018-10-11 10:35:00-04:00,0.002044,0.000290,0.007970,-0.003542,-0.015252,0.003827,-0.000490,-0.009682,-0.010058,...,0.000760,-0.001296,-0.012762,0.006567,-0.007732,-0.026439,0.020094,0.012177,0.005559,-0.013216
1,2018-10-11 11:40:00-04:00,-0.012958,-0.005812,-0.010241,-0.007665,-0.005534,-0.008440,-0.003210,-0.007077,-0.007419,...,-0.003389,-0.007346,-0.009383,-0.009835,-0.003572,-0.003722,-0.008705,-0.007352,-0.004068,-0.015409
2,2018-10-11 12:45:00-04:00,0.011864,0.009495,0.005513,0.001582,-0.003369,0.004187,0.004273,0.001215,0.002866,...,0.010420,0.003169,0.005284,0.003212,0.000000,0.008531,0.000877,0.004544,0.001974,0.001637
3,2018-10-11 13:50:00-04:00,-0.010124,-0.006878,-0.005410,-0.004801,-0.003759,-0.005166,-0.003026,-0.001871,-0.003086,...,-0.003204,-0.006559,-0.005000,-0.004676,-0.001789,-0.009610,-0.006060,-0.002754,-0.003210,-0.003745
4,2018-10-11 14:55:00-04:00,-0.007333,-0.006271,-0.017991,-0.007464,-0.005163,-0.006255,-0.008510,-0.012835,-0.010583,...,-0.008888,-0.009318,-0.008487,-0.013015,-0.007933,-0.004655,-0.010946,-0.009342,-0.010448,-0.009196
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2018-11-01 16:00:00-04:00,0.001756,0.002141,-0.000111,-0.002113,0.006496,-0.000160,0.000129,-0.000091,0.001208,...,0.002442,0.002571,0.002487,0.001611,0.001891,0.001194,0.005431,0.000914,-0.004012,-0.000956
96,2018-11-02 10:35:00-04:00,-0.002111,-0.003257,-0.011604,-0.004373,-0.002674,-0.012378,-0.009302,-0.004500,-0.009292,...,0.004246,-0.007725,-0.010008,-0.015634,-0.001493,-0.026150,0.000656,-0.004453,0.002934,0.002140
97,2018-11-02 11:40:00-04:00,0.000913,-0.003054,0.000000,-0.005656,-0.001116,0.007308,-0.002255,-0.006647,-0.001284,...,-0.008501,-0.000773,-0.002989,0.002264,-0.002829,-0.001304,-0.000094,-0.002599,-0.007530,-0.004647
98,2018-11-02 12:45:00-04:00,-0.004235,-0.003924,-0.005409,0.000201,-0.004255,-0.004211,-0.001870,0.001480,0.000857,...,-0.002057,-0.006895,-0.003537,-0.008791,-0.000298,-0.001377,0.000000,-0.003088,-0.008426,-0.003305


In [102]:
mega_df.to_csv("/Users/beneverman/Documents/Coding/QuantHive/IDVF-Oxford-v1/data/processed-5yr-93-minute/65min_log_ret.csv")