In [9]:
import pandas as pd
import numpy as np

import glob

In [11]:
# TODO: download daily open and close for each index

#jpn_opens = pd.read_csv("datasets_stocks/....csv")
#jpn_closes = pd.read_csv("datasets_stocks/....csv")

intraday_root = "datasets_stocks/intraday"
jpn_path =f"{intraday_root}/JPNIDX"

In [12]:
def process_intraday_data(path):

    df = pd.DataFrame()

    for file in sorted(glob.glob(f"{path}/*.json")):
        df_month = pd.read_json(file)
        df = pd.concat([df, df_month])

    df = df.set_index("timestamp")
    
    return df

In [8]:
def calculate_annualised_rv(opens, closes, intraday): # rv = realised variance
    
    # 1. calculate j_t: diff between closing on day t-1 and opening on day t
    
    # j_t = 100*(ln(P_t_open) - ln(P_{t-1}_close))
    j = 100*np.log(opens.iloc[1:] / closes.shift(1).iloc[1:])
    #j = j[j.index <= "2020-12-01"] # truncate
    
    # 2. continuous return within a day
    # r_{i,t} = 100*(ln(P_{i,t}) - ln(P_{i,t-1}))
    # we want R_t = sum(r_{i,t}^2)

    intraday.index = pd.DatetimeIndex(intraday.index)
    dates_avail = sorted(list(set(intraday.index.date)),reverse=True) # <= unique dates available in data
    R = pd.DataFrame(index=dates_avail, columns=["R"])

    for date in dates_avail:
        prices = intraday[intraday.index.date == date].copy()["close"] # prices observed on this date
        rs = 100*np.log(prices.iloc[1:]/prices.shift(1).iloc[1:])# all the rs calculated on this date
        R.loc[date]["R"] = sum(rs**2)
    R.index = pd.DatetimeIndex(R.index)   
    
    # 3. Annualised daily variance RV (return variance)
    # RV_t = 252*(j_t^2 + R_t)

    merged = pd.merge(j, R, how="inner", left_index=True, right_index=True)
    merged.columns = ["j", "R"]

    merged["RV"] = 252*(merged["j"]**2 + merged["R"])
    
    return merged