In [25]:
import numpy as np
import pandas as pd
from datetime import datetime

Read hourly stock price data

In [26]:
data = pd.read_csv('hour_data.csv', index_col=0, sep=";")

Check for missing data

In [27]:
sum(data.isnull().sum())

271

Fill missing values with previous observations

In [28]:
data = data.fillna(method='ffill')

Check  if there are no missing values now

In [29]:
sum(data.isnull().sum())

0

Take natural logarithm of observations

In [30]:
for ind in data.columns:
    data[ind] = (np.log(data[ind]+1e-16))

Calculate returns: take one lag difference 

In [31]:
data = data.diff()

Exclude overnight returns

In [32]:
mask = list(range(0, len(data), 7))
data = data.drop(index=data.index[mask])

Square returns

In [33]:
data = data.pow(2)

Create list of trading days

In [34]:
day_index = list(set([x[:-6] for x in data.index]))
day_index.sort(key=lambda date: datetime.strptime(date, "%m/%d/%Y"))

Create empty dataframe for realized volatilty with column names from data and index of trading days

In [35]:
rv = pd.DataFrame(columns = list(data.columns),  index = day_index)

Calculate  daily realized volatilities

$\mathrm{RV}_{i, t}^{(h)}:=  \displaystyle\sum_{s=t-h+1}^t r_{i, s}^2 $, for period 
$[t-h, t]$

In [36]:
for ind in data.columns:
    for i in range(len(rv)):
        start = 6*i
        end = start + 6
        rv[ind][i] = sum(data[ind][start:end])

# Multiply by np.sqrt(252) to annualize the volatility estimate assuming 252 trading days in a year?

In [37]:
rv.to_csv("daily_rv_hour_data.csv", index=True)