# Merge Data Files
Created by Mitas Ray on 2024-12-06.  
Last edited by Mitas Ray on 2024-12-06.

This notebook is used to merge two pickle files where each pickle file represents a pandas DataFrame.

To run the notebook, use Python 3.10 (Python 3.12 does not work), and
- on linux: use `ficc_python/requirements_py310_linux_jupyter.txt`
- on mac: use `ficc_python/requirements_py310_mac_jupyter.txt`

In [None]:
import warnings

import pandas as pd

In [None]:
file1_path = '/Users/mitas/ficc/ficc_python/notebooks/merge_data_files/files/processed_data_yield_spread_with_similar_trades.pkl'
file2_path = '/Users/mitas/ficc/ficc_python/notebooks/compare_datasets/files/new_data.pkl'
output_file_path = '/Users/mitas/ficc/ficc_python/notebooks/merge_data_files/files/processed_data_yield_spread_with_similar_trades_v2.pkl'

In [None]:
DATETIME_COLUMN_NAME = 'trade_datetime'

In [None]:
START_OF_DAY_TIME = '00:00:00'
END_OF_DAY_TIME = '23:59:59'

In [None]:
def get_df_between_datetimes(file_path: str, 
                             start_datetime: str = None, 
                             end_datetime: str = None, 
                             datetime_column_name: str = DATETIME_COLUMN_NAME) -> pd.DataFrame:
    if start_datetime is None and end_datetime is None: warnings.warn('Both `start_datetime` and `end_datetime` are `None`, and so all rows returned')
    df = pd.read_pickle(file_path)
    if datetime_column_name not in df.columns: raise ValueError(f'Column `{datetime_column_name}` not found in the DataFrame from {file_path}')
    if start_datetime is not None: df = df[df[datetime_column_name] >= start_datetime]
    if end_datetime is not None: df = df[df[datetime_column_name] <= end_datetime]
    return df

In [None]:
df1_to_june_30 = get_df_between_datetimes(file1_path, end_datetime=f'2024-06-30 {END_OF_DAY_TIME}')
df2_from_july_1 = get_df_between_datetimes(file1_path, start_datetime=f'2024-07-01 {START_OF_DAY_TIME}')
combined_df = pd.concat([df1_to_june_30, df2_from_july_1])

In [None]:
combined_df.to_pickle(output_file_path)