In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [4]:
import pandas as pd
import numpy as np

def calculate_realized_volatility_100ms(price_data, annualization_factor=None):
    """
    Calculates the annualized realized volatility from 100ms frequency price data.

    Args:
        price_data (pd.Series or pd.DataFrame): A pandas Series or DataFrame
            containing the 100ms frequency price data. If it's a DataFrame, it
            should have a column named 'Price' or a single column of prices. The
            index should be datetime.
        annualization_factor (int, optional): The number of 100ms periods in a year
            based on the typical trading hours and days. If None, a default
            will be estimated (assuming 24 hours/day and 252 trading days/year).

    Returns:
        float: The annualized realized volatility based on the entire input data.
    """
    if isinstance(price_data, pd.DataFrame):
        if 'Price' in price_data.columns:
            prices = price_data['Price']
        elif len(price_data.columns) == 1:
            prices = price_data.iloc[:, 0]
        else:
            raise ValueError("DataFrame must have a 'Price' column or a single column of prices.")
    elif isinstance(price_data, pd.Series):
        prices = price_data
    else:
        raise ValueError("price_data must be a pandas Series or DataFrame.")

    # Calculate high-frequency logarithmic returns
    log_returns = np.log(prices / prices.shift(1)).dropna()

    # Estimate annualization factor if not provided
    if annualization_factor is None:
        # 1 second = 10 * 100ms
        seconds_in_minute = 60
        minutes_in_hour = 60
        hours_in_day = 24
        trading_days_in_year = 252

        periods_per_second = 10  # 100ms frequency
        periods_per_minute = periods_per_second * seconds_in_minute
        periods_per_hour = periods_per_minute * minutes_in_hour
        periods_per_day = periods_per_hour * hours_in_day
        annualization_factor = periods_per_day * trading_days_in_year

    # Calculate realized variance and then realized volatility
    realized_variance = np.sum(log_returns**2) * annualization_factor
    realized_volatility = np.sqrt(realized_variance)

    return realized_volatility

# Example usage:
# Assuming you have your 100ms price data in a pandas DataFrame called 'hf_100ms_df'
# with a column named 'Price' and the index is datetime.

# Create a sample 100ms DataFrame (replace with your actual data)
# time_index_100ms = pd.to_datetime(['2025-04-09 09:00:00.000', '2025-04-09 09:00:00.100',
#                                    '2025-04-09 09:00:00.200', '2025-04-09 09:00:00.300',
#                                    '2025-04-11 16:00:00.000', '2025-04-11 16:00:00.100',
#                                    '2025-04-11 16:00:00.200'])
# data_100ms = {'Price': [100.0000, 100.0001, 100.0002, 100.0001, 100.0005, 100.0004, 100.0006]}
# hf_100ms_df = pd.DataFrame(data_100ms, index=time_index_100ms)

# Calculate the annualized realized volatility for 100ms data
# realized_vol_100ms = calculate_realized_volatility_100ms(hf_100ms_df['Price'])
# print(f"Annualized Realized Volatility (100ms): {realized_vol_100ms:.4f}")

# You can also provide a custom annualization factor if your trading schedule differs
# Example: If you only trade for 8 hours a day
# periods_per_second = 10
# seconds_in_minute = 60
# minutes_in_hour = 60
# trading_hours_per_year = 252 * 8
# custom_annualization_factor = periods_per_second * seconds_in_minute * minutes_in_hour * trading_hours_per_year
# realized_vol_custom = calculate_realized_volatility_100ms(hf_100ms_df['Price'], annualization_factor=custom_annualization_factor)
# print(f"Annualized Realized Volatility (100ms, custom hours): {realized_vol_custom:.4f}")

In [5]:
day_m1 = pd.read_csv('data/prices_round_2_day_-1.csv', delimiter=";")
day_0 = pd.read_csv('data/prices_round_2_day_0.csv', delimiter=";")
day_p1 = pd.read_csv('data/prices_round_2_day_1.csv', delimiter=";")

df = pd.concat([day_m1, day_0, day_p1]).reset_index()
print(df.head())

   index  day  timestamp         product  bid_price_1  bid_volume_1  \
0      0   -1          0      CROISSANTS         4304           112   
1      1   -1          0            JAMS         6670            66   
2      2   -1          0       SQUID_INK         2005             1   
3      3   -1          0  PICNIC_BASKET1        59284            20   
4      4   -1          0  PICNIC_BASKET2        30606            20   

   bid_price_2  bid_volume_2  bid_price_3  bid_volume_3  ask_price_1  \
0          NaN           NaN          NaN           NaN         4305   
1       6669.0         137.0          NaN           NaN         6671   
2       2002.0          31.0          NaN           NaN         2006   
3      59283.0          18.0          NaN           NaN        59294   
4      30605.0          18.0          NaN           NaN        30612   

   ask_volume_1  ask_price_2  ask_volume_2  ask_price_3  ask_volume_3  \
0           112          NaN           NaN          NaN           N

In [None]:
jams = df[df['product'] == 'JAMS']
djembes = df[df['product'] == 'DJEMBES']

realized_vol_100ms = calculate_realized_volatility_100ms(jams['mid_price'])
print(f"Annualized Realized Volatility (100ms): {realized_vol_100ms:.4f}")

Annualized Realized Volatility (100ms): 201.6605
