In [1]:
'''
Analyze volume for all cattle commodities split by before and after an arbitrary contract month and year
For each contract we plot the following:
- Average Daily Nominal Trading Volume By Minute
- Average Daily Normalized Trading Volume By Minute
'''
import pandas as pd
import os
import ipywidgets as widgets
from IPython.display import display
from tqdm.notebook import trange, tqdm

In [2]:
# Get a list of all the csv files to process
csv_files = []
for file in os.listdir("../data/raw/firstratedata_futures"):
    if file.startswith("LEM0"):
        csv_files.append(file)
csv_files.sort()
csv_files

['LEM08.csv', 'LEM09.csv']

In [3]:
analysis_split_date_widget = widgets.DatePicker(
    style={'description_width': 'initial'},
    description='Analysis Split Date',
    disabled=False
)
display(analysis_split_date_widget)

DatePicker(value=None, description='Analysis Split Date', style=DescriptionStyle(description_width='initial'))

In [4]:
analysis_split_date_widget.value

In [5]:
def convert_csv_to_df(filename):
    df_volume = pd.read_csv(
        f"../data/raw/firstratedata_futures/{filename}",
        parse_dates=['DateTime'], usecols=['DateTime', 'Volume'], index_col=['DateTime']
    )
    return df_volume

In [6]:
def initialize_df_grouped_by_minute():
    '''Initialize an empty dataframe with no data and an index with a row for each minute of the day'''
    date_range = pd.date_range(start='1/1/2021', end='1/02/2021', freq='T')[:-1]
    new_df = pd.DataFrame(data={'DateTime':date_range}).set_index('DateTime')
    new_df = new_df.groupby(lambda x: x.time()).sum()
    return new_df

In [11]:
def combine_data_frames(df1,df2):
    combined_df = pd.concat([df1, df2])
    return combined_df

In [14]:
def get_master_ungrouped_data_frame(files_to_process):
    initial_df = pd.DataFrame(columns = ["DateTime", "Volume"]).set_index('DateTime')
    for i in trange(len(files_to_process), desc=f"Overall Analysis"):
        file = files_to_process[i]
        contract_symbol = file[:len(file) - 4]
        a_contract_df = convert_csv_to_df(file)
        initial_df = combine_data_frames(initial_df, a_contract_df)
        display(a_contract_df)
    return initial_df

In [25]:
def resample_volume_by_minute(df):
    '''Resample the data-set by minute filling in the gaps and summing the trading volume within each minute'''
    df_volume_by_minute = df.resample('1T').sum()[["Volume"]]
    return df_volume_by_minute

In [17]:
master_avg_daily_nominal_df = initialize_df_grouped_by_minute()
master_avg_daily_normalized_df = initialize_df_grouped_by_minute()
master_ungrouped_df = get_master_ungrouped_data_frame(csv_files)

Overall Analysis:   0%|          | 0/2 [00:00<?, ?it/s]

Unnamed: 0_level_0,Volume
DateTime,Unnamed: 1_level_1
2007-06-05 13:07:00,3
2007-06-07 18:06:00,1
2007-06-08 10:02:00,1
2007-07-03 11:34:00,2
2007-07-03 12:17:00,4
...,...
2008-06-27 12:52:00,1
2008-06-27 13:26:00,2
2008-06-27 13:33:00,25
2008-06-27 13:36:00,12


Unnamed: 0_level_0,Volume
DateTime,Unnamed: 1_level_1
2008-01-14 19:51:00,1
2008-02-11 10:44:00,2
2008-02-27 10:22:00,3
2008-03-07 12:54:00,2
2008-03-07 12:55:00,9
...,...
2009-06-30 12:13:00,4
2009-06-30 12:19:00,1
2009-06-30 12:40:00,1
2009-06-30 12:45:00,1


In [28]:
master_ungrouped_df
master_ungrouped_df.sort_values(by=['DateTime'])
resample_volume_by_minute(master_ungrouped_df).groupby(lambda x: x.time()).sum()

Unnamed: 0_level_0,Volume
DateTime,Unnamed: 1_level_1
00:00:00,1
00:01:00,31
00:02:00,8
00:03:00,6
00:04:00,4
...,...
23:55:00,7
23:56:00,1
23:57:00,4
23:58:00,5


In [20]:
# initial_df = pd.DataFrame(columns = ["DateTime", "Volume"]).set_index('DateTime')
# for i in trange(len(csv_files), desc=f"Overall Analysis"):
#     file = csv_files[i]
#     contract_symbol = file[:len(file) - 4]
#     a_contract_df = convert_csv_to_df(file)
#     initial_df = combine_data_frames(initial_df, a_contract_df)
#     # display(initial_df)