In [75]:
import pandas as pd
from typing import Tuple

In [2]:
CONTRACT_INTRADAY_SLIDING_OPEN_FILE_PATH = '../data/processed/futures_contracts/contract_open_enriched_sliding_open.csv'
CONTRACT_INTRADAY_TRUE_OPEN_FILE_PATH = '../data/processed/futures_contracts/contract_open_enriched_true_open.csv'
CONTRACT_OVERNIGHT_CHANGES_SLIDING_OPEN_FILE_PATH = '../data/processed/futures_contracts/overnight_changes_by_contract_sliding_open.csv'
CONTRACT_OVERNIGHT_CHANGES_TRUE_OPEN_FILE_PATH = '../data/processed/futures_contracts/overnight_changes_by_contract_true_open.csv'

In [3]:
def intraday_open_csv_to_df(filename) -> pd.DataFrame:
    csv_as_df = pd.read_csv(
        filename,
        parse_dates=['DateTime'], 
        usecols=[
          'Symbol','DateTime','Open Minutes Offset','Open','High','Low','Close',
            'Volume','Price Change From Intraday Open','Expiration Date','DTE'
          ]
    )
    return csv_as_df

In [4]:
def overnight_changes_csv_to_df(filename) -> pd.DataFrame:
  csv_as_df = pd.read_csv(
    filename,
    parse_dates=['Date'],
    usecols=['Symbol','Date','12:59 Change','13:04 Change','Last Bar Change']
  )
  return csv_as_df

In [5]:
intraday_sliding_open_df = intraday_open_csv_to_df(CONTRACT_INTRADAY_SLIDING_OPEN_FILE_PATH)
intraday_true_open_df = intraday_open_csv_to_df(CONTRACT_INTRADAY_TRUE_OPEN_FILE_PATH)
overnight_sliding_open_df = overnight_changes_csv_to_df(CONTRACT_OVERNIGHT_CHANGES_SLIDING_OPEN_FILE_PATH)
overnight_true_open_df = overnight_changes_csv_to_df(CONTRACT_OVERNIGHT_CHANGES_TRUE_OPEN_FILE_PATH)

In [63]:
true_open_twelve_fifty_nine_all_bars_positive_change_df = pd.DataFrame()
true_open_twelve_fifty_nine_all_bars_negative_change_df = pd.DataFrame()
true_open_thirteen_oh_four_all_bars_positive_change_df = pd.DataFrame()
true_open_thirteen_oh_four_all_bars_negative_change_df = pd.DataFrame()
true_open_last_bar_all_bars_positive_change_df = pd.DataFrame()
true_open_last_bar_all_bars_negative_change_df = pd.DataFrame()

In [6]:
unique_symbols = list(intraday_true_open_df.Symbol.unique())

In [7]:
overnight_true_open_twelve_fifty_nine_df = overnight_true_open_df[overnight_true_open_df['12:59 Change'].notna()]
overnight_true_open_twelve_fifty_nine_df

Unnamed: 0,Symbol,Date,12:59 Change,13:04 Change,Last Bar Change
134,LEG09,2008-08-05,-0.500,,-0.425
143,LEG09,2008-08-18,0.050,,0.000
144,LEG09,2008-08-19,-0.575,,-0.150
152,LEG09,2008-08-29,-0.225,,0.300
156,LEG09,2008-09-05,-0.475,-0.475,-0.325
...,...,...,...,...,...
25816,LEZ20,2020-12-14,-0.175,,0.025
25820,LEZ20,2020-12-18,0.550,0.600,0.125
25822,LEZ20,2020-12-22,-0.175,,0.050
25825,LEZ20,2020-12-28,-0.150,-0.150,-0.150


In [8]:
overnight_true_open_twelve_fifty_nine_positive_change_df = overnight_true_open_twelve_fifty_nine_df[overnight_true_open_twelve_fifty_nine_df['12:59 Change'] >= 0]
overnight_true_open_twelve_fifty_nine_negative_change_df = overnight_true_open_twelve_fifty_nine_df[overnight_true_open_twelve_fifty_nine_df['12:59 Change'] < 0]
display(overnight_true_open_twelve_fifty_nine_positive_change_df)
display(overnight_true_open_twelve_fifty_nine_negative_change_df)

Unnamed: 0,Symbol,Date,12:59 Change,13:04 Change,Last Bar Change
143,LEG09,2008-08-18,0.050,,0.000
165,LEG09,2008-09-18,0.675,0.550,0.250
166,LEG09,2008-09-19,1.625,1.750,0.750
167,LEG09,2008-09-22,0.475,0.425,0.300
169,LEG09,2008-09-24,0.225,0.325,0.150
...,...,...,...,...,...
25804,LEZ20,2020-11-25,0.100,0.175,0.175
25809,LEZ20,2020-12-03,0.050,-0.050,0.175
25810,LEZ20,2020-12-04,0.325,0.400,0.000
25820,LEZ20,2020-12-18,0.550,0.600,0.125


Unnamed: 0,Symbol,Date,12:59 Change,13:04 Change,Last Bar Change
134,LEG09,2008-08-05,-0.500,,-0.425
144,LEG09,2008-08-19,-0.575,,-0.150
152,LEG09,2008-08-29,-0.225,,0.300
156,LEG09,2008-09-05,-0.475,-0.475,-0.325
162,LEG09,2008-09-15,-0.150,,-0.600
...,...,...,...,...,...
25814,LEZ20,2020-12-10,-0.250,-0.250,-0.225
25815,LEZ20,2020-12-11,-0.200,-0.200,0.000
25816,LEZ20,2020-12-14,-0.175,,0.025
25822,LEZ20,2020-12-22,-0.175,,0.050


In [89]:
def split_intraday_activity_by_overnight_change_for_symbol(
  contract_symbol: str,
  overnight_positive_change_from_close_df: pd.DataFrame,
  overnight_negative_change_from_close_df: pd.DataFrame,
  intraday_df: pd.DataFrame
  ) -> Tuple[pd.DataFrame, pd.DataFrame]:
    '''
    Split the intraday minutes for a given contract symbol into those minutes which correspond to a positive overnight close change and those minutes that correspond to a negative overnight close change.
    Return the split data as a tuple of dataframes
    '''
    # Get a series of dates representing the days where there was a negative change from the prior days close bar for this symbol
    dates_of_positive_change_series = overnight_positive_change_from_close_df[overnight_positive_change_from_close_df['Symbol'] == contract_symbol]['Date'].dt.date
    # Get a series of dates representing the days where there was a negative change from the prior days close bar for this symbol
    dates_of_negative_change_series = overnight_negative_change_from_close_df[overnight_negative_change_from_close_df['Symbol'] == contract_symbol]['Date'].dt.date
    # Filter down our enriched contract data to only include those items matching the the symbol we are currently analyzing
    intraday_for_symbol_df = intraday_df[intraday_df['Symbol'] == contract_symbol]
    # Filter down the rows in intraday enriched dataset to only those associated with a day where there was a negative change for this type of close
    intraday_minutes_negative_change_df = intraday_for_symbol_df[intraday_for_symbol_df['DateTime'].dt.date.isin(dates_of_negative_change_series)]
    # Filter down the rows in intraday enriched dataset to only those associated with a day where there was a positive change for this type of close
    intraday_minutes_positive_change_df = intraday_for_symbol_df[intraday_for_symbol_df['DateTime'].dt.date.isin(dates_of_positive_change_series)]
    return (intraday_minutes_positive_change_df, intraday_minutes_negative_change_df)

In [91]:
for symbol in unique_symbols[0:1]:
  print(f"Splitting intraday minutes for symbol {symbol}")
  intraday_minutes_positive_change_df, intraday_minutes_negative_change_df = split_intraday_activity_by_overnight_change_for_symbol(
    contract_symbol=symbol,
    overnight_positive_change_from_close_df=overnight_true_open_twelve_fifty_nine_positive_change_df,
    overnight_negative_change_from_close_df=overnight_true_open_twelve_fifty_nine_negative_change_df,
    intraday_df=intraday_true_open_df
    )
    true_open_twelve_fifty_nine_all_bars_positive_change_df = pd.concat([true_open_twelve_fifty_nine_all_bars_positive_change_df, intraday_minutes_positive_change_df])

In [64]:
for symbol in unique_symbols:
  # Get a series of dates representing the days where there was a negative change from the prior days 12:59 bar for this symbol
  dates_of_positive_change_series = overnight_true_open_twelve_fifty_nine_positive_change_df[overnight_true_open_twelve_fifty_nine_positive_change_df['Symbol'] == symbol]['Date'].dt.date
  # Get a series of dates representing the days where there was a negative change from the prior days 12:59 bar for this symbol
  dates_of_negative_change_series = overnight_true_open_twelve_fifty_nine_negative_change_df[overnight_true_open_twelve_fifty_nine_negative_change_df['Symbol'] == symbol]['Date'].dt.date
  # Filter down our enriched true open contract data to only include those items matching the the symbol we are currently analyzing in this loop iteration
  true_open_minute_bars_df = intraday_true_open_df[intraday_true_open_df['Symbol'] == symbol]
  # Filter down the rows in the true open encriched dataset to only those associated with a day where there was a negative 12:59 change
  true_open_minute_bars_negative_change_df = true_open_minute_bars_df[true_open_minute_bars_df['DateTime'].dt.date.isin(dates_of_negative_change_series)]
  # Filter down the rows in the true open encriched dataset to only those associated with a day where there was a positive 12:59 change
  true_open_minute_bars_positive_change_df = true_open_minute_bars_df[true_open_minute_bars_df['DateTime'].dt.date.isin(dates_of_positive_change_series)]
  

In [79]:
first_index = pd.Timestamp(2008, 8, 5).date()
second_index = pd.Timestamp(2008, 8, 19).date()
temp_series = pd.Series([first_index, second_index])
true_open_minute_bars_df[true_open_minute_bars_df['DateTime'].dt.date.isin(temp_series)]

Unnamed: 0,Symbol,DateTime,Open Minutes Offset,Open,High,Low,Close,Volume,Price Change From Intraday Open,Expiration Date,DTE
242,LEG09,2008-08-05 10:05:00,0,109.075,109.425,109.075,109.425,3,0.35,2009-02-27,206
243,LEG09,2008-08-05 10:06:00,1,109.4,109.45,109.4,109.45,3,0.375,2009-02-27,206
244,LEG09,2008-08-05 10:08:00,3,109.5,109.5,109.5,109.5,4,0.425,2009-02-27,206
245,LEG09,2008-08-05 10:11:00,6,109.225,109.225,109.225,109.225,1,0.15,2009-02-27,206
246,LEG09,2008-08-05 10:12:00,7,109.15,109.15,109.0,109.075,16,0.0,2009-02-27,206
247,LEG09,2008-08-05 10:13:00,8,109.075,109.075,109.025,109.025,15,-0.05,2009-02-27,206
248,LEG09,2008-08-05 10:16:00,11,108.95,108.95,108.95,108.95,2,-0.125,2009-02-27,206
249,LEG09,2008-08-05 10:18:00,13,108.925,108.925,108.925,108.925,2,-0.15,2009-02-27,206
250,LEG09,2008-08-05 10:19:00,14,108.95,108.95,108.9,108.9,5,-0.175,2009-02-27,206
251,LEG09,2008-08-05 10:21:00,16,108.9,108.9,108.9,108.9,1,-0.175,2009-02-27,206
