In [58]:
import pandas as pd
from typing import Tuple, NamedTuple, List
from collections import namedtuple
import plotly.express as px
import plotly.graph_objects as go
from tqdm.notebook import trange, tqdm
import operator

Analyze the relationship between the overnight price change (Price at Open - Price at yesterdays close) from the prior day and the price dynamics of the current day during the open window typically 60 minutes. We have three different close-bars (12:59, 13:04, and last bar of day). We also use a a true open and sliding open dataset.

In [2]:
CONTRACT_INTRADAY_SLIDING_OPEN_FILE_PATH = '../data/processed/futures_contracts/contract_open_enriched_sliding_open.csv'
CONTRACT_INTRADAY_TRUE_OPEN_FILE_PATH = '../data/processed/futures_contracts/contract_open_enriched_true_open.csv'
CONTRACT_OVERNIGHT_CHANGES_SLIDING_OPEN_FILE_PATH = '../data/processed/futures_contracts/overnight_changes_by_contract_sliding_open.csv'
CONTRACT_OVERNIGHT_CHANGES_TRUE_OPEN_FILE_PATH = '../data/processed/futures_contracts/overnight_changes_by_contract_true_open.csv'

In [3]:
# These parameters allow us to filter out trading activity on days where the contract DTE tends to have missing open bars
FILTER_OUT_DTE_WITH_FREQUENTLY_MISSING_OPEN = True
DTE_FILTER_UPPER_BOUNDARY = 140
DTE_FILTER_LOWER_BOUNDARY = 25

In [4]:
def intraday_open_csv_to_df(filename) -> pd.DataFrame:
    csv_as_df = pd.read_csv(
        filename,
        parse_dates=['DateTime'], 
        usecols=[
          'Symbol','DateTime','Open Minutes Offset','Open','High','Low','Close',
            'Volume','Price Change From Intraday Open','Expiration Date','DTE'
          ]
    )
    return csv_as_df

In [5]:
def overnight_changes_csv_to_df(filename) -> pd.DataFrame:
  csv_as_df = pd.read_csv(
    filename,
    parse_dates=['Date'],
    usecols=['Symbol','Date','12:59 Change','13:04 Change','Last Bar Change']
  )
  return csv_as_df

In [6]:
def filter_and_split_overnight_changes(
  overnight_changes_df: pd.DataFrame,
  close_bar_column_name: str # For example '12:59 Change'
) -> NamedTuple:
  '''
  Filter an overnight changes dataset to only include those days where a particular close bar column has changes. Then split those days/rows into two dataframes
  based on whether they have a positive or negative value
  '''
  overnight_where_change_exists_df = overnight_changes_df[overnight_changes_df[close_bar_column_name].notna()]
  overnight_positive_change_df = overnight_where_change_exists_df[overnight_where_change_exists_df[close_bar_column_name] >= 0]
  overnight_negative_change_df = overnight_where_change_exists_df[overnight_where_change_exists_df[close_bar_column_name] < 0]
  overnight_changes = namedtuple('overnight_changes', ['positive_change_df', 'negative_change_df'])
  return overnight_changes(overnight_positive_change_df, overnight_negative_change_df)

In [7]:
def split_intraday_activity_by_overnight_change_for_symbol(
  contract_symbol: str,
  overnight_positive_change_from_close_df: pd.DataFrame,
  overnight_negative_change_from_close_df: pd.DataFrame,
  intraday_df: pd.DataFrame
  ) -> NamedTuple:
    '''
    Split the intraday minutes for a given contract symbol into those days minutes which correspond to a positive overnight close change and those days minutes that correspond to a negative overnight close change.
    Return the split data as a tuple of dataframes accordingly
    '''
    # Get a series of dates representing the days where there was a positive change from the prior days close bar for this symbol
    dates_of_positive_change_series = overnight_positive_change_from_close_df[overnight_positive_change_from_close_df['Symbol'] == contract_symbol]['Date'].dt.date
    # Get a series of dates representing the days where there was a negative change from the prior days close bar for this symbol
    dates_of_negative_change_series = overnight_negative_change_from_close_df[overnight_negative_change_from_close_df['Symbol'] == contract_symbol]['Date'].dt.date
    # Filter down our intraday enriched contract data to only include those items matching the the symbol we are currently analyzing
    intraday_for_symbol_df = intraday_df[intraday_df['Symbol'] == contract_symbol]
    # Filter down the rows in the intraday data for this symbol to only those associated with a day where there was a negative overnight change for this type of close
    intraday_minutes_negative_change_df = intraday_for_symbol_df[intraday_for_symbol_df['DateTime'].dt.date.isin(dates_of_negative_change_series)]
    # Filter down the rows in the intraday data for this symbol to only those associated with a day where there was a positive overnight change for this type of close
    intraday_minutes_positive_change_df = intraday_for_symbol_df[intraday_for_symbol_df['DateTime'].dt.date.isin(dates_of_positive_change_series)]
    intraday_minute_changes = namedtuple('intraday_minute_changes', ['positive_change_df', 'negative_change_df'])
    return intraday_minute_changes(intraday_minutes_positive_change_df, intraday_minutes_negative_change_df)

In [8]:
def split_intraday_activity_by_overnight_change_all_symbols(
  symbols: List[str],
  overnight_positive_change_from_close_df: pd.DataFrame,
  overnight_negative_change_from_close_df: pd.DataFrame,
  intraday_df: pd.DataFrame
) -> NamedTuple:
  '''
  Split the intraday minutes for a list of symbols into those days minutes which correspond to a positive overnight close change and those days minutes that correspond to a negative overnight close change.
  Return the split data as a tuple of dataframes accordingly
  '''
  open_bars_where_close_was_positive_df = pd.DataFrame()
  open_bars_where_close_was_negative_df = pd.DataFrame()
  for i in trange(len(symbols), desc="Splitting intraday activity by overnight change for each contract"):
    symbol = symbols[i]
    intraday_minute_changes = split_intraday_activity_by_overnight_change_for_symbol(
      contract_symbol=symbol,
      overnight_positive_change_from_close_df=overnight_positive_change_from_close_df,
      overnight_negative_change_from_close_df=overnight_negative_change_from_close_df,
      intraday_df=intraday_df
      )
    open_bars_where_close_was_positive_df = pd.concat([open_bars_where_close_was_positive_df, intraday_minute_changes.positive_change_df], ignore_index=True)
    open_bars_where_close_was_negative_df = pd.concat([open_bars_where_close_was_negative_df, intraday_minute_changes.negative_change_df], ignore_index=True)
  intraday_minute_bars_split = namedtuple('intraday_minute_bars_split', ['positive_change_df', 'negative_change_df'])
  return intraday_minute_bars_split(open_bars_where_close_was_positive_df, open_bars_where_close_was_negative_df)

In [9]:
def calculate_average_intraday_price_change_grouped_by_open_minutes_offset(intraday_minute_bars_df: pd.DataFrame) -> pd.DataFrame:
  '''
  Group the intraday minute bars by their Open Minutes Offset and calculate the mean for each minute. Return all that as a single dataframe
  '''
  overnight_positive_change_df = intraday_minute_bars_df.positive_change_df.groupby('Open Minutes Offset', as_index=False)['Price Change From Intraday Open'].mean()
  overnight_negative_change_df = intraday_minute_bars_df.negative_change_df.groupby('Open Minutes Offset', as_index=False)['Price Change From Intraday Open'].mean()
  to_return_df = pd.DataFrame({
    'Open Minutes Offset': overnight_positive_change_df['Open Minutes Offset'],
    'Avg Intraday Price Change When Overnight Change >= 0': overnight_positive_change_df['Price Change From Intraday Open'],
    'Avg Intraday Price Change When Overnight Change < 0': overnight_negative_change_df['Price Change From Intraday Open']
  })
  return to_return_df

In [10]:
def generate_figure(intraday_price_changes_split_df: pd.DataFrame, fig_title: str) -> go.Figure:
  fig = go.Figure()
  fig.add_trace(go.Scatter(
    x=intraday_price_changes_split_df['Open Minutes Offset'],
    y=intraday_price_changes_split_df['Avg Intraday Price Change When Overnight Change >= 0'],
    mode='lines+markers',
    name='When Overnight Change >= 0')
    )
  fig.add_trace(go.Scatter(
    x=intraday_price_changes_split_df['Open Minutes Offset'],
    y=intraday_price_changes_split_df['Avg Intraday Price Change When Overnight Change < 0'],
    mode='lines+markers',
    name='When Overnight Change < 0')
    )
  fig.update_xaxes(title_text='Minutes After Open')
  fig.update_yaxes(title_text='Avg Price Change From Open')
  fig.update_layout(title_text=fig_title)
  return fig

In [11]:
def filter_bars_for_dte_with_frequently_missing_open(
  intraday_open_df: pd.DataFrame,
  dte_filter_lower_boundary: int,
  dte_filter_upper_boundary: int
  ) -> pd.DataFrame:
  '''Filter out days associated with a DTE that is often missing a true open bar'''
  filtered_df = intraday_open_df[(intraday_open_df['DTE'] >= dte_filter_lower_boundary) & (intraday_open_df['DTE'] <= dte_filter_upper_boundary)]
  return filtered_df

In [49]:
def quantile_values(a_series: pd.Series) -> Tuple:
  first_quartile_value = a_series.quantile(.25)
  second_quartile_value = a_series.quantile(.5)
  third_quartile_value = a_series.quantile(.75)
  fourth_quartile_value = a_series.quantile(1)
  quantile_vals = (first_quartile_value, second_quartile_value, third_quartile_value, fourth_quartile_value)
  return quantile_vals

In [70]:
def rows_between_values(a_df: pd.DataFrame, column_name: str, lower_bound: float, upper_bound: float, lower_bound_is_inclusive: bool, upper_bound_is_inclusive: bool):
  lower_boundary_compare_operator = operator.ge if lower_bound_is_inclusive is True else operator.gt
  upper_boundary_compare_operator = operator.le if upper_bound_is_inclusive is True else operator.le
  between_values_df = a_df.loc[
    (lower_boundary_compare_operator(a_df[column_name], lower_bound)) & (upper_boundary_compare_operator(a_df[column_name], upper_bound))
  ]
  return between_values_df

In [71]:
def dataframe_split_by_quantile(a_df: pd.DataFrame, column_name: str, quantile_boundary_values: Tuple) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
  first_quartile_df = rows_between_values(
    a_df=a_df,
    column_name=column_name,
    lower_bound=0.0,
    upper_bound=quantile_boundary_values[0],
    lower_bound_is_inclusive=True,
    upper_bound_is_inclusive=False
  )
  second_quartile_df = rows_between_values(
    a_df=a_df,
    column_name=column_name,
    lower_bound=quantile_boundary_values[0],
    upper_bound=quantile_boundary_values[1],
    lower_bound_is_inclusive=True,
    upper_bound_is_inclusive=False
  )
  third_quartile_df = rows_between_values(
    a_df=a_df,
    column_name=column_name,
    lower_bound=quantile_boundary_values[1],
    upper_bound=quantile_boundary_values[2],
    lower_bound_is_inclusive=True,
    upper_bound_is_inclusive=False
  )
  fourth_quartile_df = rows_between_values(
    a_df=a_df,
    column_name=column_name,
    lower_bound=quantile_boundary_values[2],
    upper_bound=quantile_boundary_values[3],
    lower_bound_is_inclusive=True,
    upper_bound_is_inclusive=True
  )
  quantile_dataframes = (first_quartile_df, second_quartile_df, third_quartile_df, fourth_quartile_df)
  return quantile_dataframes

In [78]:
def calc_quantile_boundary_values(a_series: pd.Series, column_name: str) -> Tuple:
  a_series_sorted = a_series.sort_values().drop_duplicates()
  the_values = quantile_values(a_series=a_series_sorted)
  return the_values

In [90]:
def reverse_tuple(tuples):
    new_tup = tuples[::-1]
    return new_tup

In [12]:
intraday_sliding_open_df = intraday_open_csv_to_df(CONTRACT_INTRADAY_SLIDING_OPEN_FILE_PATH)
intraday_true_open_df = intraday_open_csv_to_df(CONTRACT_INTRADAY_TRUE_OPEN_FILE_PATH)
overnight_sliding_open_df = overnight_changes_csv_to_df(CONTRACT_OVERNIGHT_CHANGES_SLIDING_OPEN_FILE_PATH)
overnight_true_open_df = overnight_changes_csv_to_df(CONTRACT_OVERNIGHT_CHANGES_TRUE_OPEN_FILE_PATH)

Segment the overnight true open dataset into 3 datasets one for each close bar. Each of those datasets is further divided into two data frames one containing positive overnight changes for the bar and the other containing negative overnight changes for that bar

In [13]:
unique_symbols = list(intraday_true_open_df.Symbol.unique())

In [98]:
sorted_overnight_series = overnight_true_open_df[overnight_true_open_df['12:59 Change'].notna()]['12:59 Change'].copy()
positive_quantile_values = calc_quantile_boundary_values(a_series=sorted_overnight_series[sorted_overnight_series >= 0], column_name='12:59 Change')
negative_quantile_values = reverse_tuple(calc_quantile_boundary_values(a_series=sorted_overnight_series[sorted_overnight_series < 0], column_name='12:59 Change'))

(-0.025, -0.9, -1.775, -2.775)

In [100]:
dataframes_by_quantile = dataframe_split_by_quantile(a_df=overnight_true_open_df, column_name='12:59 Change', quantile_boundary_values=negative_quantile_values)
dataframes_by_quantile

(Empty DataFrame
 Columns: [Symbol, Date, 12:59 Change, 13:04 Change, Last Bar Change]
 Index: [],
 Empty DataFrame
 Columns: [Symbol, Date, 12:59 Change, 13:04 Change, Last Bar Change]
 Index: [],
 Empty DataFrame
 Columns: [Symbol, Date, 12:59 Change, 13:04 Change, Last Bar Change]
 Index: [],
 Empty DataFrame
 Columns: [Symbol, Date, 12:59 Change, 13:04 Change, Last Bar Change]
 Index: [])

Get all the overnight changes for the true open approach

In [14]:
overnight_twelve_fifty_nine_true_open_changes = filter_and_split_overnight_changes(
  overnight_changes_df=overnight_true_open_df,
  close_bar_column_name='12:59 Change'
)
overnight_thirteen_oh_four_true_open_changes = filter_and_split_overnight_changes(
  overnight_changes_df=overnight_true_open_df,
  close_bar_column_name='13:04 Change'
)
overnight_last_bar_true_open_changes = filter_and_split_overnight_changes(
  overnight_changes_df=overnight_true_open_df,
  close_bar_column_name='Last Bar Change'
)

Get all the overnight changes for the sliding open approach

In [15]:
overnight_twelve_fifty_nine_sliding_open_changes = filter_and_split_overnight_changes(
  overnight_changes_df=overnight_sliding_open_df,
  close_bar_column_name='12:59 Change'
)
overnight_thirteen_oh_four_sliding_open_changes = filter_and_split_overnight_changes(
  overnight_changes_df=overnight_sliding_open_df,
  close_bar_column_name='13:04 Change'
)
overnight_last_bar_sliding_open_changes = filter_and_split_overnight_changes(
  overnight_changes_df=overnight_sliding_open_df,
  close_bar_column_name='Last Bar Change'
)

In [16]:
if FILTER_OUT_DTE_WITH_FREQUENTLY_MISSING_OPEN:
  intraday_true_open_df = filter_bars_for_dte_with_frequently_missing_open(intraday_open_df=intraday_true_open_df, dte_filter_lower_boundary=DTE_FILTER_LOWER_BOUNDARY, dte_filter_upper_boundary=DTE_FILTER_UPPER_BOUNDARY)
  intraday_sliding_open_df = filter_bars_for_dte_with_frequently_missing_open(intraday_open_df=intraday_sliding_open_df, dte_filter_lower_boundary=DTE_FILTER_LOWER_BOUNDARY, dte_filter_upper_boundary=DTE_FILTER_UPPER_BOUNDARY)

Segment the intraday true open dataset into 3 datasets one associated with each type of close. Each of those datasets is further split into two dataframes one containing the intraday changes associated with a positive overnight change and the other of which contains the intraday changes associated with a negative overnight change

In [17]:
print("Gathering Intraday split data for true open @ 12:59")
true_open_twelve_fifty_nine_intraday_minute_bars_split = split_intraday_activity_by_overnight_change_all_symbols(
  symbols=unique_symbols,
  overnight_positive_change_from_close_df=overnight_twelve_fifty_nine_true_open_changes.positive_change_df,
  overnight_negative_change_from_close_df=overnight_twelve_fifty_nine_true_open_changes.negative_change_df,
  intraday_df=intraday_true_open_df
)
print("Gathering Intraday split data for true open @ 13:04")
true_open_thirteen_oh_four_intraday_minute_bars_split = split_intraday_activity_by_overnight_change_all_symbols(
  symbols=unique_symbols,
  overnight_positive_change_from_close_df=overnight_thirteen_oh_four_true_open_changes.positive_change_df,
  overnight_negative_change_from_close_df=overnight_thirteen_oh_four_true_open_changes.negative_change_df,
  intraday_df=intraday_true_open_df
)
print("Gathering Intraday split data for true open @ last bar of day")
true_open_last_bar_intraday_minute_bars_split = split_intraday_activity_by_overnight_change_all_symbols(
  symbols=unique_symbols,
  overnight_positive_change_from_close_df=overnight_last_bar_true_open_changes.positive_change_df,
  overnight_negative_change_from_close_df=overnight_last_bar_true_open_changes.negative_change_df,
  intraday_df=intraday_true_open_df
)

Gathering Intraday split data for true open @ 12:59


Splitting intraday activity by overnight change for each contract:   0%|          | 0/77 [00:00<?, ?it/s]

Gathering Intraday split data for true open @ 13:04


Splitting intraday activity by overnight change for each contract:   0%|          | 0/77 [00:00<?, ?it/s]

Gathering Intraday split data for true open @ last bar of day


Splitting intraday activity by overnight change for each contract:   0%|          | 0/77 [00:00<?, ?it/s]

Do the same for the sliding open datasets

In [18]:
print("Gathering Intraday split data for sliding open @ 12:59")
sliding_open_twelve_fifty_nine_intraday_minute_bars_split = split_intraday_activity_by_overnight_change_all_symbols(
  symbols=unique_symbols,
  overnight_positive_change_from_close_df=overnight_twelve_fifty_nine_sliding_open_changes.positive_change_df,
  overnight_negative_change_from_close_df=overnight_twelve_fifty_nine_sliding_open_changes.negative_change_df,
  intraday_df=intraday_sliding_open_df
)
print("Gathering Intraday split data for sliding open @ 13:04")
sliding_open_thirteen_oh_four_intraday_minute_bars_split = split_intraday_activity_by_overnight_change_all_symbols(
  symbols=unique_symbols,
  overnight_positive_change_from_close_df=overnight_thirteen_oh_four_sliding_open_changes.positive_change_df,
  overnight_negative_change_from_close_df=overnight_thirteen_oh_four_sliding_open_changes.negative_change_df,
  intraday_df=intraday_sliding_open_df
)
print("Gathering Intraday split data for sliding open @ last bar of day")
sliding_open_last_bar_intraday_minute_bars_split = split_intraday_activity_by_overnight_change_all_symbols(
  symbols=unique_symbols,
  overnight_positive_change_from_close_df=overnight_last_bar_sliding_open_changes.positive_change_df,
  overnight_negative_change_from_close_df=overnight_last_bar_sliding_open_changes.negative_change_df,
  intraday_df=intraday_sliding_open_df
)

Gathering Intraday split data for sliding open @ 12:59


Splitting intraday activity by overnight change for each contract:   0%|          | 0/77 [00:00<?, ?it/s]

Gathering Intraday split data for sliding open @ 13:04


Splitting intraday activity by overnight change for each contract:   0%|          | 0/77 [00:00<?, ?it/s]

Gathering Intraday split data for sliding open @ last bar of day


Splitting intraday activity by overnight change for each contract:   0%|          | 0/77 [00:00<?, ?it/s]

Calculate intraday average price changes by minute split by overnight price change <= 0 vs overnight price change < 0 for every dataset

In [19]:
# True Open
true_open_twelve_fifty_nine_intraday_average_changes_df = calculate_average_intraday_price_change_grouped_by_open_minutes_offset(true_open_twelve_fifty_nine_intraday_minute_bars_split)
true_open_thirteen_oh_four_intraday_average_changes_df = calculate_average_intraday_price_change_grouped_by_open_minutes_offset(true_open_thirteen_oh_four_intraday_minute_bars_split)
true_open_last_bar_intraday_average_changes_df = calculate_average_intraday_price_change_grouped_by_open_minutes_offset(true_open_last_bar_intraday_minute_bars_split)
# Sliding Open
sliding_open_twelve_fifty_nine_intraday_average_changes_df = calculate_average_intraday_price_change_grouped_by_open_minutes_offset(sliding_open_twelve_fifty_nine_intraday_minute_bars_split)
sliding_open_thirteen_oh_four_intraday_average_changes_df = calculate_average_intraday_price_change_grouped_by_open_minutes_offset(sliding_open_thirteen_oh_four_intraday_minute_bars_split)
sliding_open_last_bar_intraday_average_changes_df = calculate_average_intraday_price_change_grouped_by_open_minutes_offset(sliding_open_last_bar_intraday_minute_bars_split)

Generate the figures

In [20]:
fig1 = generate_figure(
  intraday_price_changes_split_df=true_open_twelve_fifty_nine_intraday_average_changes_df,
  fig_title='True Open 12:59 Change'
  )
fig2 = generate_figure(
  intraday_price_changes_split_df=true_open_thirteen_oh_four_intraday_average_changes_df,
  fig_title='True Open 13:04 Change'
  )
fig3 = generate_figure(
  intraday_price_changes_split_df=true_open_last_bar_intraday_average_changes_df,
  fig_title='True Open Last Bar Change'
  )
fig4 = generate_figure(
  intraday_price_changes_split_df=sliding_open_twelve_fifty_nine_intraday_average_changes_df,
  fig_title='Sliding Open 12:59 Change'
  )
fig5 = generate_figure(
  intraday_price_changes_split_df=sliding_open_thirteen_oh_four_intraday_average_changes_df,
  fig_title='Sliding Open 13:04 Change'
  )
fig6 = generate_figure(
  intraday_price_changes_split_df=sliding_open_last_bar_intraday_average_changes_df,
  fig_title='Sliding Open Last Bar Change'
  )


Show the figures

In [21]:
figures = [fig1, fig2, fig3, fig4, fig5, fig6]
for fig in figures:
  fig.show()