In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
def calculate_realized_volatility_per_second_100ms(price_data):
    """
    Calculates the realized volatility per second from 100ms frequency price data.

    Args:
        price_data (pd.Series or pd.DataFrame): A pandas Series or DataFrame
            containing the 100ms frequency price data. If it's a DataFrame, it
            should have a column named 'Price' or a single column of prices. The
            index should be datetime.

    Returns:
        float: The realized volatility per second based on the input data.
    """
    if isinstance(price_data, pd.DataFrame):
        if 'mid_price' in price_data.columns:
            prices = price_data['mid_price']
        elif len(price_data.columns) == 1:
            prices = price_data.iloc[:, 0]
        else:
            raise ValueError("DataFrame must have a 'mid_price' column or a single column of prices.")
    elif isinstance(price_data, pd.Series):
        prices = price_data
    else:
        raise ValueError("price_data must be a pandas Series or DataFrame.")

    # Calculate high-frequency logarithmic returns
    log_returns = np.log(prices / prices.shift(1)).dropna()

    # Calculate realized variance per second
    periods_per_second = 10  # 100ms frequency
    realized_variance = np.sum(log_returns**2) / len(log_returns) * periods_per_second

    # Calculate realized volatility per second
    realized_volatility = np.sqrt(realized_variance)

    return realized_volatility

In [3]:
day_m2 = pd.read_csv('data/prices_round_1_day_-2.csv', delimiter=";")
day_m1 = pd.read_csv('data/prices_round_1_day_-1.csv', delimiter=";")
day_0 = pd.read_csv('data/prices_round_1_day_0.csv', delimiter=";")

df = pd.concat([day_m2, day_m1, day_0]).reset_index()
print(df.head())

   index  day  timestamp           product  bid_price_1  bid_volume_1  \
0      0   -2          0  RAINFOREST_RESIN         9996             1   
1      1   -2          0              KELP         1998            26   
2      2   -2          0         SQUID_INK         1998            26   
3      3   -2        100         SQUID_INK         1999             5   
4      4   -2        100  RAINFOREST_RESIN        10000             5   

   bid_price_2  bid_volume_2  bid_price_3  bid_volume_3  ask_price_1  \
0       9995.0          25.0          NaN           NaN        10004   
1          NaN           NaN          NaN           NaN         2002   
2          NaN           NaN          NaN           NaN         2002   
3       1998.0          26.0          NaN           NaN         2001   
4       9995.0          20.0          NaN           NaN        10005   

   ask_volume_1  ask_price_2  ask_volume_2  ask_price_3  ask_volume_3  \
0             1      10005.0          25.0          NaN

In [4]:
kelp = df[df['product'] == 'KELP']

realized_vol_100ms = calculate_realized_volatility_per_second_100ms(kelp['mid_price'])
print(f"Realized Volatility per Second: {realized_vol_100ms:.4f}")

Realized Volatility per Second: 0.0012
