In [1]:
import pandas as pd
from typing import Tuple, NamedTuple, List
from collections import namedtuple
import plotly.express as px
import plotly.graph_objects as go

In [2]:
CONTRACT_INTRADAY_SLIDING_OPEN_FILE_PATH = '../data/processed/futures_contracts/contract_open_enriched_sliding_open.csv'
CONTRACT_INTRADAY_TRUE_OPEN_FILE_PATH = '../data/processed/futures_contracts/contract_open_enriched_true_open.csv'
CONTRACT_OVERNIGHT_CHANGES_SLIDING_OPEN_FILE_PATH = '../data/processed/futures_contracts/overnight_changes_by_contract_sliding_open.csv'
CONTRACT_OVERNIGHT_CHANGES_TRUE_OPEN_FILE_PATH = '../data/processed/futures_contracts/overnight_changes_by_contract_true_open.csv'

In [3]:
def intraday_open_csv_to_df(filename) -> pd.DataFrame:
    csv_as_df = pd.read_csv(
        filename,
        parse_dates=['DateTime'], 
        usecols=[
          'Symbol','DateTime','Open Minutes Offset','Open','High','Low','Close',
            'Volume','Price Change From Intraday Open','Expiration Date','DTE'
          ]
    )
    return csv_as_df

In [4]:
def overnight_changes_csv_to_df(filename) -> pd.DataFrame:
  csv_as_df = pd.read_csv(
    filename,
    parse_dates=['Date'],
    usecols=['Symbol','Date','12:59 Change','13:04 Change','Last Bar Change']
  )
  return csv_as_df

In [5]:
def filter_and_split_overnight_changes(
  overnight_changes_df: pd.DataFrame,
  close_bar_column_name: str # For example '12:59 Change'
) -> NamedTuple:
  '''
  Filter an overnight changes dataset to only include those days where a particular close bar column has changes. Then split those days/rows into two dataframes
  based on whether they have a positive or negative value
  '''
  overnight_where_change_exists_df = overnight_changes_df[overnight_changes_df[close_bar_column_name].notna()]
  overnight_positive_change_df = overnight_where_change_exists_df[overnight_where_change_exists_df[close_bar_column_name] >= 0]
  overnight_negative_change_df = overnight_where_change_exists_df[overnight_where_change_exists_df[close_bar_column_name] < 0]
  overnight_changes = namedtuple('overnight_changes', ['positive_change_df', 'negative_change_df'])
  return overnight_changes(overnight_positive_change_df, overnight_negative_change_df)

In [6]:
def split_intraday_activity_by_overnight_change_for_symbol(
  contract_symbol: str,
  overnight_positive_change_from_close_df: pd.DataFrame,
  overnight_negative_change_from_close_df: pd.DataFrame,
  intraday_df: pd.DataFrame
  ) -> NamedTuple:
    '''
    Split the intraday minutes for a given contract symbol into those days minutes which correspond to a positive overnight close change and those days minutes that correspond to a negative overnight close change.
    Return the split data as a tuple of dataframes accordingly
    '''
    # Get a series of dates representing the days where there was a positive change from the prior days close bar for this symbol
    dates_of_positive_change_series = overnight_positive_change_from_close_df[overnight_positive_change_from_close_df['Symbol'] == contract_symbol]['Date'].dt.date
    # Get a series of dates representing the days where there was a negative change from the prior days close bar for this symbol
    dates_of_negative_change_series = overnight_negative_change_from_close_df[overnight_negative_change_from_close_df['Symbol'] == contract_symbol]['Date'].dt.date
    # Filter down our intraday enriched contract data to only include those items matching the the symbol we are currently analyzing
    intraday_for_symbol_df = intraday_df[intraday_df['Symbol'] == contract_symbol]
    # Filter down the rows in the intraday data for this symbol to only those associated with a day where there was a negative overnight change for this type of close
    intraday_minutes_negative_change_df = intraday_for_symbol_df[intraday_for_symbol_df['DateTime'].dt.date.isin(dates_of_negative_change_series)]
    # Filter down the rows in the intraday data for this symbol to only those associated with a day where there was a positive overnight change for this type of close
    intraday_minutes_positive_change_df = intraday_for_symbol_df[intraday_for_symbol_df['DateTime'].dt.date.isin(dates_of_positive_change_series)]
    intraday_minute_changes = namedtuple('intraday_minute_changes', ['positive_change_df', 'negative_change_df'])
    return intraday_minute_changes(intraday_minutes_positive_change_df, intraday_minutes_negative_change_df)

In [7]:
def split_intraday_activity_by_overnight_change_all_symbols(
  symbols: List[str],
  overnight_positive_change_from_close_df: pd.DataFrame,
  overnight_negative_change_from_close_df: pd.DataFrame,
  intraday_df: pd.DataFrame
) -> NamedTuple:
  '''
  Split the intraday minutes for a list of symbols into those days minutes which correspond to a positive overnight close change and those days minutes that correspond to a negative overnight close change.
  Return the split data as a tuple of dataframes accordingly
  '''
  open_bars_where_close_was_positive_df = pd.DataFrame()
  open_bars_where_close_was_negative_df = pd.DataFrame()
  for symbol in symbols:
    print(f"Splitting intraday minutes for symbol {symbol}")
    intraday_minute_changes = split_intraday_activity_by_overnight_change_for_symbol(
      contract_symbol=symbol,
      overnight_positive_change_from_close_df=overnight_positive_change_from_close_df,
      overnight_negative_change_from_close_df=overnight_negative_change_from_close_df,
      intraday_df=intraday_df
      )
    open_bars_where_close_was_positive_df = pd.concat([open_bars_where_close_was_positive_df, intraday_minute_changes.positive_change_df])
    open_bars_where_close_was_negative_df = pd.concat([open_bars_where_close_was_negative_df, intraday_minute_changes.negative_change_df])
  intraday_minute_bars_split = namedtuple('intraday_minute_bars_split', ['positive_change_df', 'negative_change_df'])
  return intraday_minute_bars_split(open_bars_where_close_was_positive_df, open_bars_where_close_was_negative_df)

In [8]:
def calculate_average_intraday_price_change_grouped_by_open_minutes_offset(intraday_minute_bars_df: pd.DataFrame) -> pd.DataFrame:
  '''
  Group the intraday minute bars by their Open Minutes Offset and calculate the mean for each minute. Return all that as a single dataframe
  '''
  overnight_positive_change_df = intraday_minute_bars_df.positive_change_df.groupby('Open Minutes Offset', as_index=False)['Price Change From Intraday Open'].mean()
  overnight_negative_change_df = intraday_minute_bars_df.negative_change_df.groupby('Open Minutes Offset', as_index=False)['Price Change From Intraday Open'].mean()
  to_return_df = pd.DataFrame({
    'Open Minutes Offset': overnight_positive_change_df['Open Minutes Offset'],
    'Avg Intraday Price Change When Overnight Change >= 0': overnight_positive_change_df['Price Change From Intraday Open'],
    'Avg Intraday Price Change When Overnight Change < 0': overnight_negative_change_df['Price Change From Intraday Open']
  })
  return to_return_df

In [9]:
def generate_figure(intraday_price_changes_split_df: pd.DataFrame, fig_title: str) -> go.Figure:
  fig = go.Figure()
  fig.add_trace(go.Scatter(
    x=intraday_price_changes_split_df['Open Minutes Offset'],
    y=intraday_price_changes_split_df['Avg Intraday Price Change When Overnight Change >= 0'],
    mode='lines+markers',
    name='When Overnight Change >= 0')
    )
  fig.add_trace(go.Scatter(
    x=intraday_price_changes_split_df['Open Minutes Offset'],
    y=intraday_price_changes_split_df['Avg Intraday Price Change When Overnight Change < 0'],
    mode='lines+markers',
    name='When Overnight Change < 0')
    )
  fig.update_xaxes(title_text='Minutes After Open')
  fig.update_yaxes(title_text='Avg Price Change From Open')
  fig.update_layout(title_text=fig_title)
  return fig

In [10]:
intraday_sliding_open_df = intraday_open_csv_to_df(CONTRACT_INTRADAY_SLIDING_OPEN_FILE_PATH)
intraday_true_open_df = intraday_open_csv_to_df(CONTRACT_INTRADAY_TRUE_OPEN_FILE_PATH)
overnight_sliding_open_df = overnight_changes_csv_to_df(CONTRACT_OVERNIGHT_CHANGES_SLIDING_OPEN_FILE_PATH)
overnight_true_open_df = overnight_changes_csv_to_df(CONTRACT_OVERNIGHT_CHANGES_TRUE_OPEN_FILE_PATH)

Segment the overnight true open dataset into 3 datasets one for each close bar. Each of those datasets is further divided into two data frames one containing positive overnight changes for the bar and the other containing negative overnight changes for that bar

In [11]:
unique_symbols = list(intraday_true_open_df.Symbol.unique())

In [12]:
overnight_twelve_fifty_nine_true_open_changes = filter_and_split_overnight_changes(
  overnight_changes_df=overnight_true_open_df,
  close_bar_column_name='12:59 Change'
)
overnight_thirteen_oh_four_true_open_changes = filter_and_split_overnight_changes(
  overnight_changes_df=overnight_true_open_df,
  close_bar_column_name='13:04 Change'
)
overnight_last_bar_true_open_changes = filter_and_split_overnight_changes(
  overnight_changes_df=overnight_true_open_df,
  close_bar_column_name='Last Bar Change'
)

Segment the intraday true open dataset into 3 datasets one associated with each type of close. Each of those datasets is further split into two dataframes one containing the intraday changes associated with a positive overnight change and the other of which contains the intraday changes associated with a negative overnight change

In [13]:
print("Gathering Intraday split data for true open @ 12:59")
true_open_twelve_fifty_nine_intraday_minute_bars_split = split_intraday_activity_by_overnight_change_all_symbols(
  symbols=unique_symbols,
  overnight_positive_change_from_close_df=overnight_twelve_fifty_nine_true_open_changes.positive_change_df,
  overnight_negative_change_from_close_df=overnight_twelve_fifty_nine_true_open_changes.negative_change_df,
  intraday_df=intraday_true_open_df
)
print("Gathering Intraday split data for true open @ 13:04")
true_open_thirteen_oh_four_intraday_minute_bars_split = split_intraday_activity_by_overnight_change_all_symbols(
  symbols=unique_symbols,
  overnight_positive_change_from_close_df=overnight_thirteen_oh_four_true_open_changes.positive_change_df,
  overnight_negative_change_from_close_df=overnight_thirteen_oh_four_true_open_changes.negative_change_df,
  intraday_df=intraday_true_open_df
)
print("Gathering Intraday split data for true open @ last bar of day")
true_open_last_bar_intraday_minute_bars_split = split_intraday_activity_by_overnight_change_all_symbols(
  symbols=unique_symbols,
  overnight_positive_change_from_close_df=overnight_last_bar_true_open_changes.positive_change_df,
  overnight_negative_change_from_close_df=overnight_last_bar_true_open_changes.negative_change_df,
  intraday_df=intraday_true_open_df
)

Gathering Intraday split data for true open @ 12:59
Splitting intraday minutes for symbol LEG09
Splitting intraday minutes for symbol LEG10
Splitting intraday minutes for symbol LEG11
Splitting intraday minutes for symbol LEG12
Splitting intraday minutes for symbol LEG13
Splitting intraday minutes for symbol LEG14
Splitting intraday minutes for symbol LEG15
Splitting intraday minutes for symbol LEG16
Splitting intraday minutes for symbol LEG17
Splitting intraday minutes for symbol LEG18
Splitting intraday minutes for symbol LEG19
Splitting intraday minutes for symbol LEG20
Splitting intraday minutes for symbol LEJ08
Splitting intraday minutes for symbol LEJ09
Splitting intraday minutes for symbol LEJ10
Splitting intraday minutes for symbol LEJ11
Splitting intraday minutes for symbol LEJ12
Splitting intraday minutes for symbol LEJ13
Splitting intraday minutes for symbol LEJ14
Splitting intraday minutes for symbol LEJ15
Splitting intraday minutes for symbol LEJ16
Splitting intraday minut

In [14]:
true_open_twelve_fifty_nine_intraday_average_changes_df = calculate_average_intraday_price_change_grouped_by_open_minutes_offset(true_open_twelve_fifty_nine_intraday_minute_bars_split)
true_open_thirteen_oh_four_intraday_average_changes_df = calculate_average_intraday_price_change_grouped_by_open_minutes_offset(true_open_thirteen_oh_four_intraday_minute_bars_split)
true_open_last_bar_intraday_average_changes_df = calculate_average_intraday_price_change_grouped_by_open_minutes_offset(true_open_last_bar_intraday_minute_bars_split)

In [16]:
fig1 = generate_figure(
  intraday_price_changes_split_df=true_open_twelve_fifty_nine_intraday_average_changes_df,
  fig_title='True Open 12:59 Change'
  )
fig2 = generate_figure(
  intraday_price_changes_split_df=true_open_thirteen_oh_four_intraday_average_changes_df,
  fig_title='True Open 13:04 Change'
  )
fig3 = generate_figure(
  intraday_price_changes_split_df=true_open_last_bar_intraday_average_changes_df,
  fig_title='True Open Last Bar Change'
  )
figures = [fig1, fig2, fig3]
for fig in figures:
  fig.show()
