In [1]:
#NN library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import numpy as np
import torch
from typing import Optional, Union, Tuple


import torch.nn.functional as F
from torch.distributions import Distribution
from torch.distributions import Bernoulli, Normal, StudentT, Poisson, NegativeBinomial

from torch.distributions import constraints

from ray import tune

from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATSx, NHITS
from neuralforecast.auto import AutoNHITS, AutoLSTM
from neuralforecast.tsdataset import TimeSeriesDataset
from neuralforecast.utils import AirPassengers, AirPassengersPanel, AirPassengersStatic
from neuralforecast.losses.numpy import rmse, mape
from pandas.tseries.holiday import USFederalHolidayCalendar
from pandas.tseries.offsets import CustomBusinessDay
from neuralforecast.losses.pytorch import MSE

In [2]:
from numpy.random import seed
from random import randrange

In [3]:
#Realized volatility 
def Yang_Zhang_RV_yahoo(tickers, start=None, end=None, period=None, interval=None):
    # importing needed libraries
    import yfinance as yf
    import pandas as pd
    import numpy as np
    import warnings
    warnings.filterwarnings("ignore")
    
    #Data extraction
    if period==None:
        data=yf.download(tickers=tickers, start=start, end=end, interval=interval)
    else:
        data=yf.download(tickers=tickers, period=period, interval=interval)
    #dropping N/A values
    if data.isnull().values.any()==True:
        data=data.dropna()
        print("Rows with missing values were removed")
    else:
        data=data
        
    # Yang_Zhang_RV formula is give as:
    # RV^2 = Vo + k*Vc + (1-k)*Vrs
    # where Vo = 1/(n-1)*sum(Oi-Obar)^2
    # with oi = normalized opening price at time t and Obar = mean of normalized opening prices
    # Vc = = 1/(n-1)*sum(ci-Cbar)^2
    # with ci = normalized close price at time t and Cbar = mean of normalized close prices
    # k = 0.34/(1.34+(n+1)/(n-1))
    # with n = total number of days or time periods considered
    # Vrs (Rogers & Satchell RV proxy) = ui(ui-ci)+di(di-ci)
    # with ui = ln(Hi/Oi), ci = ln(Ci/Oi), di=(Li/Oi), oi = ln(Oi/Ci-1)
    # where Hi = high price at time t and Li = low price at time t
    
    data["ui"]=np.log(np.divide(data["High"][1:],data["Open"][1:]))
    data["ci"]=np.log(np.divide(data["Close"][1:],data["Open"][1:]))
    data["di"]=np.log(np.divide(data["Low"][1:],data["Open"][1:]))
    data["oi"]=np.log(np.divide(data["Open"][1:],data["Close"][:len(data)-1]))
    data=data[1:]
    data["RS"]=data["ui"]*(data["ui"]-data["ci"])+data["di"]*(data["di"]-data["ci"])
    RS_var= data["RS"].groupby(pd.Grouper(freq='W')).mean().dropna()
    Vc_and_Vo=data[["oi", "ci"]].groupby(pd.Grouper(freq='W')).var().dropna()
    n=int(len(data)/len(RS_var))
    k = 0.34/(1.34+(n+1)/(n-1))
    Yang_Zhang_RV=np.sqrt((1-k)*RS_var+Vc_and_Vo["oi"]+Vc_and_Vo["ci"]*k)
    Yang_Zhang_RV_df=pd.DataFrame(Yang_Zhang_RV)
    Yang_Zhang_RV_df.rename(columns={0: "Yang & Zhang RV proxy"},inplace=True)
    
    return Yang_Zhang_RV_df

def Yang_Zhang_RV_own_data(data):
    # importing needed libraries
    import pandas as pd
    import numpy as np
    import warnings
    warnings.filterwarnings("ignore")
    
    # Yang_Zhang_RV formula is give as:
    # RV^2 = Vo + k*Vc + (1-k)*Vrs
    # where Vo = 1/(n-1)*sum(Oi-Obar)^2
    # with oi = normalized opening price at time t and Obar = mean of normalized opening prices
    # Vc = = 1/(n-1)*sum(ci-Cbar)^2
    # with ci = normalized close price at time t and Cbar = mean of normalized close prices
    # k = 0.34/(1.34+(n+1)/(n-1))
    # with n = total number of days or time periods considered
    # Vrs (Rogers & Satchell RV proxy) = ui(ui-ci)+di(di-ci)
    # with ui = ln(Hi/Oi), ci = ln(Ci/Oi), di=(Li/Oi), oi = ln(Oi/Ci-1)
    # where Hi = high price at time t and Li = low price at time t
    
    data["ui"]=np.log(np.divide(data["High"][1:],data["Open"][1:]))
    data["ci"]=np.log(np.divide(data["Close"][1:],data["Open"][1:]))
    data["di"]=np.log(np.divide(data["Low"][1:],data["Open"][1:]))
    data["oi"]=np.log(np.divide(data["Open"][1:],data["Close"][:len(data)-1]))
    data=data[1:]
    data["RS"]=data["ui"]*(data["ui"]-data["ci"])+data["di"]*(data["di"]-data["ci"])
    RS_var= data["RS"].groupby(pd.Grouper(freq='D')).mean().dropna()
    Vc_and_Vo=data[["oi", "ci"]].groupby(pd.Grouper(freq='D')).var().dropna()
    n=int(len(data)/len(RS_var))
    k = 0.34/(1.34+(n+1)/(n-1))
    Yang_Zhang_RV=np.sqrt((1-k)*RS_var+Vc_and_Vo["oi"]+Vc_and_Vo["ci"]*k)
    Yang_Zhang_RV_df=pd.DataFrame(Yang_Zhang_RV)
    Yang_Zhang_RV_df.rename(columns={0: "Yang & Zhang RV proxy"},inplace=True)
    
    return Yang_Zhang_RV_df
    
    
    
def Multivariate_Yang_Zhang_RV_own_data(data_list):
    Multivariate_Yang_Zhang_RV=[]
    for i in range(len(data_list)):
        Yang_Zhang_RV_df=Yang_Zhang_RV_own_data(data=data_list[i])
        Multivariate_Yang_Zhang_RV.append(Yang_Zhang_RV_df)
    return Multivariate_Yang_Zhang_RV
    
    
def Multivariate_Yang_Zhang_RV_yahoo(tickers, start=None, end=None, period=None, interval=None):
    # importing needed libraries
    import yfinance as yf
    import pandas as pd
    import numpy as np
    import warnings
    warnings.filterwarnings("ignore")
    
    #Data extraction
    if period==None:
        data=yf.download(tickers=tickers, start=start, end=end, interval=interval)
    else:
        data=yf.download(tickers=tickers, period=period, interval=interval)
    #dropping N/A values
    if data.isnull().values.any()==True:
        data=data.dropna()
        print("Rows with missing values were removed")
    else:
        data=data

    data=data.unstack().reset_index(name="Actuals").rename(columns={"level_1":"Stocks"}).set_index("Datetime").pivot(columns=['Stocks','level_0'])
    data=data['Actuals']
        
    # Yang_Zhang_RV formula is give as:
    # RV^2 = Vo + k*Vc + (1-k)*Vrs
    # where Vo = 1/(n-1)*sum(Oi-Obar)^2
    # with oi = normalized opening price at time t and Obar = mean of normalized opening prices
    # Vc = = 1/(n-1)*sum(ci-Cbar)^2
    # with ci = normalized close price at time t and Cbar = mean of normalized close prices
    # k = 0.34/(1.34+(n+1)/(n-1))
    # with n = total number of days or time periods considered
    # Vrs (Rogers & Satchell RV proxy) = ui(ui-ci)+di(di-ci)
    # with ui = ln(Hi/Oi), ci = ln(Ci/Oi), di=(Li/Oi), oi = ln(Oi/Ci-1)
    # where Hi = high price at time t and Li = low price at time t
    
    Multivariate_Yang_Zhang_RV=[]
    for i in range(len(tickers)):
        data1=data[tickers[i]]
        data1["ui"]=np.log(np.divide(data1["High"][1:],data1["Open"][1:]))
        data1["ci"]=np.log(np.divide(data1["Close"][1:],data1["Open"][1:]))
        data1["di"]=np.log(np.divide(data1["Low"][1:],data1["Open"][1:]))
        data1["oi"]=np.log(np.divide(data1["Open"][1:],data1["Close"][:len(data1)-1]))
        data1=data1[1:]
        data1["RS"]=data1["ui"]*(data1["ui"]-data1["ci"])+data1["di"]*(data1["di"]-data1["ci"])
        RS_var= data1["RS"].groupby(pd.Grouper(freq='D')).mean().dropna()
        Vc_and_Vo=data1[["oi", "ci"]].groupby(pd.Grouper(freq='D')).var().dropna()
        n=int(len(data1)/len(RS_var))
        k = 0.34/(1.34+(n+1)/(n-1))
        Yang_Zhang_RV=np.sqrt((1-k)*RS_var+Vc_and_Vo["oi"]+Vc_and_Vo["ci"]*k)
        Yang_Zhang_RV_df=pd.DataFrame(Yang_Zhang_RV)
        Yang_Zhang_RV_df.rename(columns={0: "Yang & Zhang RV proxy"},inplace=True)
        Multivariate_Yang_Zhang_RV.append(Yang_Zhang_RV_df)
    
    return Multivariate_Yang_Zhang_RV

In [4]:
import pandas as pd
from binance import Client
from dotenv import dotenv_values
from datetime import datetime

config = dotenv_values('.env')
client = Client(config.get('KEY'), config.get('SECRET_KEY'))
TICKER = 'BTCUSDT'
start_date = datetime(2018, 1, 1)
end_date = datetime(2024, 3, 1)

start_date_str = start_date.strftime('%d %b, %Y')
end_date_str = end_date.strftime('%d %b, %Y')

klines = client.get_historical_klines(TICKER, client.KLINE_INTERVAL_1HOUR, start_date_str, end_date_str)
dataBTC = pd.DataFrame(
    data=[row[1:7] for row in klines],
    columns=['Open', 'High', 'Low', 'Close', 'Volume', 'Date'],
).set_index('Date')
dataBTC.index = pd.to_datetime(dataBTC.index, unit='ms')
dataBTC = dataBTC.sort_index()
dataBTC = dataBTC.apply(pd.to_numeric, axis=1)
dataBTC

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-01-01 00:59:59.999,13715.65,13715.65,13400.01,13529.01,443.356199
2018-01-01 01:59:59.999,13528.99,13595.89,13155.38,13203.06,383.697006
2018-01-01 02:59:59.999,13203.00,13418.43,13200.00,13330.18,429.064572
2018-01-01 03:59:59.999,13330.26,13611.27,13290.00,13410.03,420.087030
2018-01-01 04:59:59.999,13434.98,13623.29,13322.15,13601.01,340.807329
...,...,...,...,...,...
2024-02-29 20:59:59.999,61599.99,62285.47,61521.73,61934.73,3755.220100
2024-02-29 21:59:59.999,61934.73,61999.75,60584.07,61374.94,4040.139080
2024-02-29 22:59:59.999,61374.95,61474.81,60672.82,61224.02,1906.566300
2024-02-29 23:59:59.999,61224.02,61536.94,60998.51,61130.98,1694.180000
