In [67]:
import os
import pandas as pd
import datetime as dt
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
from IPython.display import display
from tqdm.notebook import trange, tqdm

In [68]:
CONTRACT_OPEN_ENRICHED_FILE_PATH = '../data/processed/futures_contracts/contract_open_enriched.csv'

In [69]:
def calculate_num_unique_dates_for_contract(contract_df: pd.DataFrame, symbol: str):
    missing_open_for_contract_df = contract_df[contract_df['Symbol'] == symbol]
    missing_open_dates_series_unique = missing_open_for_contract_df['Date'].unique()
    num_missing_open_dates_series_unique = len(missing_open_dates_series_unique)
    return num_missing_open_dates_series_unique

In [70]:
contract_open_enriched_df = pd.read_csv(CONTRACT_OPEN_ENRICHED_FILE_PATH, parse_dates=['DateTime'])

In [71]:
contract_open_enriched_df['Date'] = contract_open_enriched_df['DateTime'].apply(
        lambda x: x.strftime("%Y-%m-%d")
    )

In [73]:
rows_with_missing_open_df = contract_open_enriched_df[contract_open_enriched_df['Intraday Open Bar Price Delta'].isnull()].loc[:, contract_open_enriched_df.columns.isin(['Symbol', 'DateTime', 'DTE', 'Date'])]
rows_with_populated_open_df = contract_open_enriched_df[contract_open_enriched_df['Intraday Open Bar Price Delta'].notnull()].loc[:, contract_open_enriched_df.columns.isin(['Symbol', 'DateTime', 'DTE', 'Date'])]

In [75]:
unique_symbols = rows_with_missing_open_df.Symbol.unique()

In [76]:
open_bar_by_contract_count_df = pd.DataFrame(columns=['Symbol', 'Days Without Open Bar', 'Days With Open Bar'])

Iterate over our contract symbol. For each one calculate the number of unique dates that have an open bar and that do not. Add this information to the open_bar_by_contract_count_df dataframe

In [77]:
for symbol in unique_symbols:
    num_unique_dates_with_missing_open = calculate_num_unique_dates_for_contract(
        contract_df=rows_with_missing_open_df, symbol=symbol
    )
    num_unique_dates_with_open = calculate_num_unique_dates_for_contract(
        contract_df=rows_with_populated_open_df, symbol=symbol
    )
    open_bar_by_contract_count_df = open_bar_by_contract_count_df.append({
        'Symbol': symbol,
        'Days Without Open Bar': num_unique_dates_with_missing_open,
        'Days With Open Bar': num_unique_dates_with_open
    }, ignore_index=True)
    # print(num_unique_dates_with_missing_open)
    
    

Initialize a dataframe that will be used to count the number of times we encounter trading days with missing and populated open bars respectively

In [78]:
days_to_expiration_open_bar_count_df = pd.DataFrame(data={
  'DTE': pd.Series(range(0,601)),
  'Days Without Open Bar': 0,
  'Days With Open Bar': 0
})

In [79]:
for i in trange(len(unique_symbols), desc="Calculating missing open by DTE for each contract"):
  symbol = unique_symbols[i]
  missing_open_for_contract_df = rows_with_missing_open_df[rows_with_missing_open_df['Symbol'] == symbol]
  populated_open_for_contract_df = rows_with_populated_open_df[rows_with_populated_open_df['Symbol'] == symbol]
  dte_values_with_missing_open_bar = missing_open_for_contract_df['DTE'].unique()
  dte_values_with_populated_open_bar = populated_open_for_contract_df['DTE'].unique()
  # Iterate over each unique dte value thats associated with a missing open bar and increment the count in the dataframe we will be using to plot
  for missing_dte_value in dte_values_with_missing_open_bar:
    index_of_row_to_increment = days_to_expiration_open_bar_count_df.index[days_to_expiration_open_bar_count_df['DTE'] == missing_dte_value]
    current_value = days_to_expiration_open_bar_count_df.iloc[index_of_row_to_increment]['Days Without Open Bar']
    days_to_expiration_open_bar_count_df.at[index_of_row_to_increment, 'Days Without Open Bar'] = current_value + 1
  # Iterate over each unique dte value thats associated with a populated open bar and increment the count in the dataframe we will be using to plot
  for populated_dte_value in dte_values_with_populated_open_bar:
    index_of_row_to_increment = days_to_expiration_open_bar_count_df.index[days_to_expiration_open_bar_count_df['DTE'] == populated_dte_value]
    current_value = days_to_expiration_open_bar_count_df.iloc[index_of_row_to_increment]['Days With Open Bar']
    days_to_expiration_open_bar_count_df.at[index_of_row_to_increment, 'Days With Open Bar'] = current_value + 1

Calculating missing open by DTE for each contract:   0%|          | 0/77 [00:00<?, ?it/s]

In [81]:
fig = go.Figure(data=[
    go.Bar(name='Days Without Open Bar', x=unique_symbols, y=open_bar_by_contract_count_df['Days Without Open Bar']),
    go.Bar(name='Days With Open Bar', x=unique_symbols, y=open_bar_by_contract_count_df['Days With Open Bar'])
])
# Change the bar mode
fig.update_layout(barmode='group', title_text='Number of days With and Without An Open Bar By Contract - LE Only')
fig.show()

In [95]:
unique_dte = list(days_to_expiration_open_bar_count_df['DTE'])
open_bar_by_contract_count_df['Days Without Open Bar']

0     107
1      92
2      94
3     105
4     106
     ... 
72    113
73     27
74     63
75     57
76     47
Name: Days Without Open Bar, Length: 77, dtype: object

In [106]:
days_to_expiration_open_bar_count_df['Total Days'] = days_to_expiration_open_bar_count_df['Days With Open Bar'] + days_to_expiration_open_bar_count_df['Days Without Open Bar']
days_to_expiration_open_bar_count_df['Percentage Missing Open Bar'] = (days_to_expiration_open_bar_count_df['Days Without Open Bar'] / days_to_expiration_open_bar_count_df['Total Days']) * 100

In [104]:
dte_without_open_fig = go.Figure(data=[
    go.Bar(name='Days Without Open Bar', x=unique_dte, y=days_to_expiration_open_bar_count_df['Days Without Open Bar'])
])
dte_without_open_fig.update_layout(title_text='Number of trading days without An Open Bar By DTE - LE Only')
dte_without_open_fig.show()