In [1]:
import os
import pandas as pd

# Define the directories
data_dir = 'data'
data5min_dir = 'data5min'

# Create the output directory if it doesn't exist
os.makedirs(data5min_dir, exist_ok=True)


def compute_average_ask_bid(price_data):
    if( price_data['Ask'] is not None and price_data['Bid'] is not None):
        return (price_data['Ask'] + price_data['Bid']) / 2
    else:
        return None
    

# List all files in the data directory
files = [f for f in os.listdir(data_dir) if f.endswith('.json')]

# Process each file
for file in files:
    # Read the JSON file into a DataFrame
    file_path = os.path.join(data_dir, file)
    df = pd.read_json(file_path)
    
    # Ensure the 'SnapshotTime' column is in datetime format
    df['SnapshotTime'] = pd.to_datetime(df['SnapshotTime'])
    
    # Set 'SnapshotTime' as the index
    df.set_index('SnapshotTime', inplace=True)

    df['openPrices'] = [compute_average_ask_bid(OpenPrice) for OpenPrice in df["OpenPrice"]];
    df['highPrices'] = [compute_average_ask_bid(HighPrice) for HighPrice in df["HighPrice"]];
    df['lowPrices'] = [compute_average_ask_bid(LowPrice) for LowPrice in df["LowPrice"]];
    df['closedprices'] = [compute_average_ask_bid(closedprice) for closedprice in df["ClosePrice"]];
    
    # Resample to 5-minute candlestick data
    df_5min = df.resample('5min').agg({
        'openPrices': 'first',
        'highPrices': 'max',
        'lowPrices': 'min',
        'closedprices': 'last'
    })

    df_5min.reset_index(inplace=True)
    df_5min['SnapshotTime'] = df_5min['SnapshotTime'].dt.strftime('%Y-%m-%d %H:%M:%S')
    
    # Drop rows with NaN values (if any)
    df_5min.dropna(inplace=True)
    
    # Save the resampled DataFrame to the data5min directory
    output_file_path = os.path.join(data5min_dir, file)
    df_5min.to_json(output_file_path, orient='records', date_format='iso', index=False)


print("Conversion to 5-minute candlestick data completed.")

Conversion to 5-minute candlestick data completed.
