In [15]:
import pandas as pd
from typing import Tuple, NamedTuple, List
from collections import namedtuple

In [2]:
CONTRACT_INTRADAY_SLIDING_OPEN_FILE_PATH = '../data/processed/futures_contracts/contract_open_enriched_sliding_open.csv'
CONTRACT_INTRADAY_TRUE_OPEN_FILE_PATH = '../data/processed/futures_contracts/contract_open_enriched_true_open.csv'
CONTRACT_OVERNIGHT_CHANGES_SLIDING_OPEN_FILE_PATH = '../data/processed/futures_contracts/overnight_changes_by_contract_sliding_open.csv'
CONTRACT_OVERNIGHT_CHANGES_TRUE_OPEN_FILE_PATH = '../data/processed/futures_contracts/overnight_changes_by_contract_true_open.csv'

In [3]:
def intraday_open_csv_to_df(filename) -> pd.DataFrame:
    csv_as_df = pd.read_csv(
        filename,
        parse_dates=['DateTime'], 
        usecols=[
          'Symbol','DateTime','Open Minutes Offset','Open','High','Low','Close',
            'Volume','Price Change From Intraday Open','Expiration Date','DTE'
          ]
    )
    return csv_as_df

In [4]:
def overnight_changes_csv_to_df(filename) -> pd.DataFrame:
  csv_as_df = pd.read_csv(
    filename,
    parse_dates=['Date'],
    usecols=['Symbol','Date','12:59 Change','13:04 Change','Last Bar Change']
  )
  return csv_as_df

In [5]:
intraday_sliding_open_df = intraday_open_csv_to_df(CONTRACT_INTRADAY_SLIDING_OPEN_FILE_PATH)
intraday_true_open_df = intraday_open_csv_to_df(CONTRACT_INTRADAY_TRUE_OPEN_FILE_PATH)
overnight_sliding_open_df = overnight_changes_csv_to_df(CONTRACT_OVERNIGHT_CHANGES_SLIDING_OPEN_FILE_PATH)
overnight_true_open_df = overnight_changes_csv_to_df(CONTRACT_OVERNIGHT_CHANGES_TRUE_OPEN_FILE_PATH)

In [6]:
true_open_twelve_fifty_nine_all_bars_positive_change_df = pd.DataFrame()
true_open_twelve_fifty_nine_all_bars_negative_change_df = pd.DataFrame()
true_open_thirteen_oh_four_all_bars_positive_change_df = pd.DataFrame()
true_open_thirteen_oh_four_all_bars_negative_change_df = pd.DataFrame()
true_open_last_bar_all_bars_positive_change_df = pd.DataFrame()
true_open_last_bar_all_bars_negative_change_df = pd.DataFrame()

In [7]:
unique_symbols = list(intraday_true_open_df.Symbol.unique())

In [8]:
overnight_true_open_twelve_fifty_nine_df = overnight_true_open_df[overnight_true_open_df['12:59 Change'].notna()]
overnight_true_open_twelve_fifty_nine_df

Unnamed: 0,Symbol,Date,12:59 Change,13:04 Change,Last Bar Change
134,LEG09,2008-08-05,-0.500,,-0.425
143,LEG09,2008-08-18,0.050,,0.000
144,LEG09,2008-08-19,-0.575,,-0.150
152,LEG09,2008-08-29,-0.225,,0.300
156,LEG09,2008-09-05,-0.475,-0.475,-0.325
...,...,...,...,...,...
25816,LEZ20,2020-12-14,-0.175,,0.025
25820,LEZ20,2020-12-18,0.550,0.600,0.125
25822,LEZ20,2020-12-22,-0.175,,0.050
25825,LEZ20,2020-12-28,-0.150,-0.150,-0.150


In [9]:
overnight_true_open_twelve_fifty_nine_positive_change_df = overnight_true_open_twelve_fifty_nine_df[overnight_true_open_twelve_fifty_nine_df['12:59 Change'] >= 0]
overnight_true_open_twelve_fifty_nine_negative_change_df = overnight_true_open_twelve_fifty_nine_df[overnight_true_open_twelve_fifty_nine_df['12:59 Change'] < 0]
display(overnight_true_open_twelve_fifty_nine_positive_change_df)
display(overnight_true_open_twelve_fifty_nine_negative_change_df)

Unnamed: 0,Symbol,Date,12:59 Change,13:04 Change,Last Bar Change
143,LEG09,2008-08-18,0.050,,0.000
165,LEG09,2008-09-18,0.675,0.550,0.250
166,LEG09,2008-09-19,1.625,1.750,0.750
167,LEG09,2008-09-22,0.475,0.425,0.300
169,LEG09,2008-09-24,0.225,0.325,0.150
...,...,...,...,...,...
25804,LEZ20,2020-11-25,0.100,0.175,0.175
25809,LEZ20,2020-12-03,0.050,-0.050,0.175
25810,LEZ20,2020-12-04,0.325,0.400,0.000
25820,LEZ20,2020-12-18,0.550,0.600,0.125


Unnamed: 0,Symbol,Date,12:59 Change,13:04 Change,Last Bar Change
134,LEG09,2008-08-05,-0.500,,-0.425
144,LEG09,2008-08-19,-0.575,,-0.150
152,LEG09,2008-08-29,-0.225,,0.300
156,LEG09,2008-09-05,-0.475,-0.475,-0.325
162,LEG09,2008-09-15,-0.150,,-0.600
...,...,...,...,...,...
25814,LEZ20,2020-12-10,-0.250,-0.250,-0.225
25815,LEZ20,2020-12-11,-0.200,-0.200,0.000
25816,LEZ20,2020-12-14,-0.175,,0.025
25822,LEZ20,2020-12-22,-0.175,,0.050


In [10]:
def split_intraday_activity_by_overnight_change_for_symbol(
  contract_symbol: str,
  overnight_positive_change_from_close_df: pd.DataFrame,
  overnight_negative_change_from_close_df: pd.DataFrame,
  intraday_df: pd.DataFrame
  ) -> NamedTuple:
    '''
    Split the intraday minutes for a given contract symbol into those days minutes which correspond to a positive overnight close change and those days minutes that correspond to a negative overnight close change.
    Return the split data as a tuple of dataframes accordingly
    '''
    # Get a series of dates representing the days where there was a positive change from the prior days close bar for this symbol
    dates_of_positive_change_series = overnight_positive_change_from_close_df[overnight_positive_change_from_close_df['Symbol'] == contract_symbol]['Date'].dt.date
    # Get a series of dates representing the days where there was a negative change from the prior days close bar for this symbol
    dates_of_negative_change_series = overnight_negative_change_from_close_df[overnight_negative_change_from_close_df['Symbol'] == contract_symbol]['Date'].dt.date
    # Filter down our intraday enriched contract data to only include those items matching the the symbol we are currently analyzing
    intraday_for_symbol_df = intraday_df[intraday_df['Symbol'] == contract_symbol]
    # Filter down the rows in the intraday data for this symbol to only those associated with a day where there was a negative overnight change for this type of close
    intraday_minutes_negative_change_df = intraday_for_symbol_df[intraday_for_symbol_df['DateTime'].dt.date.isin(dates_of_negative_change_series)]
    # Filter down the rows in the intraday data for this symbol to only those associated with a day where there was a positive overnight change for this type of close
    intraday_minutes_positive_change_df = intraday_for_symbol_df[intraday_for_symbol_df['DateTime'].dt.date.isin(dates_of_positive_change_series)]
    intraday_minute_changes = namedtuple('intraday_minute_changes', ['positive_change_df', 'negative_change_df'])
    return intraday_minute_changes(intraday_minutes_positive_change_df, intraday_minutes_negative_change_df)

In [13]:
def split_intraday_activity_by_overnight_change_all_symbols(
  symbols: List[str],
  overnight_positive_change_from_close_df: pd.DataFrame,
  overnight_negative_change_from_close_df: pd.DataFrame,
  intraday_df: pd.DataFrame
) -> NamedTuple:
  open_bars_where_close_was_positive_df = pd.DataFrame()
  open_bars_where_close_was_negative_df = pd.DataFrame()
  for symbol in symbols:
    print(f"Splitting intraday minutes for symbol {symbol}")
    intraday_minute_changes = split_intraday_activity_by_overnight_change_for_symbol(
      contract_symbol=symbol,
      overnight_positive_change_from_close_df=overnight_positive_change_from_close_df,
      overnight_negative_change_from_close_df=overnight_negative_change_from_close_df,
      intraday_df=intraday_df
      )
    open_bars_where_close_was_positive_df = pd.concat([open_bars_where_close_was_positive_df, intraday_minute_changes.positive_change_df])
    open_bars_where_close_was_negative_df = pd.concat([open_bars_where_close_was_negative_df, intraday_minute_changes.negative_change_df])

SyntaxError: invalid syntax (3120068425.py, line 5)

In [12]:
for symbol in unique_symbols:
  print(f"Splitting intraday minutes for symbol {symbol}")
  intraday_minute_changes = split_intraday_activity_by_overnight_change_for_symbol(
    contract_symbol=symbol,
    overnight_positive_change_from_close_df=overnight_true_open_twelve_fifty_nine_positive_change_df,
    overnight_negative_change_from_close_df=overnight_true_open_twelve_fifty_nine_negative_change_df,
    intraday_df=intraday_true_open_df
    )
  true_open_twelve_fifty_nine_all_bars_positive_change_df = pd.concat([true_open_twelve_fifty_nine_all_bars_positive_change_df, intraday_minute_changes.positive_change_df])
  true_open_twelve_fifty_nine_all_bars_negative_change_df = pd.concat([true_open_twelve_fifty_nine_all_bars_negative_change_df, intraday_minute_changes.negative_change_df])

Splitting intraday minutes for symbol LEG09
Splitting intraday minutes for symbol LEG10
Splitting intraday minutes for symbol LEG11
Splitting intraday minutes for symbol LEG12
Splitting intraday minutes for symbol LEG13
Splitting intraday minutes for symbol LEG14
Splitting intraday minutes for symbol LEG15
Splitting intraday minutes for symbol LEG16
Splitting intraday minutes for symbol LEG17
Splitting intraday minutes for symbol LEG18
Splitting intraday minutes for symbol LEG19
Splitting intraday minutes for symbol LEG20
Splitting intraday minutes for symbol LEJ08
Splitting intraday minutes for symbol LEJ09
Splitting intraday minutes for symbol LEJ10
Splitting intraday minutes for symbol LEJ11
Splitting intraday minutes for symbol LEJ12
Splitting intraday minutes for symbol LEJ13
Splitting intraday minutes for symbol LEJ14
Splitting intraday minutes for symbol LEJ15
Splitting intraday minutes for symbol LEJ16
Splitting intraday minutes for symbol LEJ17
Splitting intraday minutes for s

In [14]:
true_open_twelve_fifty_nine_all_bars_positive_change_df

Unnamed: 0,Symbol,DateTime,Open Minutes Offset,Open,High,Low,Close,Volume,Price Change From Intraday Open,Expiration Date,DTE
317,LEG09,2008-08-18 10:05:00,0,107.000,107.000,107.000,107.000,1,0.000,2009-02-27,193
318,LEG09,2008-08-18 10:06:00,1,107.100,107.100,107.075,107.075,3,0.075,2009-02-27,193
319,LEG09,2008-08-18 10:08:00,3,107.100,107.100,107.100,107.100,1,0.100,2009-02-27,193
320,LEG09,2008-08-18 10:09:00,4,107.325,107.600,107.325,107.600,9,0.600,2009-02-27,193
321,LEG09,2008-08-18 10:12:00,7,107.325,107.325,107.325,107.325,1,0.325,2009-02-27,193
...,...,...,...,...,...,...,...,...,...,...,...
811850,LEZ20,2020-12-31 10:15:00,45,112.875,112.875,112.875,112.875,3,0.475,2020-12-31,0
811851,LEZ20,2020-12-31 10:17:00,47,112.900,112.900,112.900,112.900,2,0.500,2020-12-31,0
811852,LEZ20,2020-12-31 10:18:00,48,112.900,112.900,112.900,112.900,1,0.500,2020-12-31,0
811853,LEZ20,2020-12-31 10:20:00,50,112.900,112.900,112.900,112.900,7,0.500,2020-12-31,0


In [None]:
first_index = pd.Timestamp(2008, 8, 5).date()
second_index = pd.Timestamp(2008, 8, 19).date()
temp_series = pd.Series([first_index, second_index])
true_open_minute_bars_df[true_open_minute_bars_df['DateTime'].dt.date.isin(temp_series)]