# Bar data

In [1]:
import numpy as np
import pandas as pd
import datetime as dt
import os

In [2]:
def winddown(time):
        if time < dt.time(9,30):
            return 0.7
        elif time < dt.time(9,45):
            return 0.6
        elif time < dt.time(10,30):
            return 0.5
        elif time < dt.time(11,30):
            return 0.4
        elif time < dt.time(13):
            return 0.3
        elif time <  dt.time(14,30):
            return 0.25
        elif time < dt.time(15,30):
            return 0.15
        elif time < dt.time(16):
            return 0.05
        else:
            return 0

def tte(start_time, end_time):
    trading_days = np.busday_count( start_time.date(), end_time.date() )
    start_time_winddown = winddown(start_time.time())
    end_time_winddown = winddown(end_time.time())
    total_days = start_time_winddown + 1 - end_time_winddown + trading_days -1
    return total_days/252



def UpdateRealizedData(symbol, data_path, symbol_kind, interval_time_list, realized_days_list):
    realized_dict = {}
    earnings_dates_list = GetEarningsDatesList(symbol, data_path, symbol_kind)
    for interval_time in interval_time_list:
        interval_time_string = interval_time.replace(" ", "_")
        file_path = data_path + "/" + symbol_kind + "/" + symbol + "/" + interval_time_string + ".csv"
        df = pd.read_csv(file_path)
        df['datetime'] = pd.to_datetime(df['date'])
        df['date'] = pd.to_datetime(df['datetime']).dt.date
        df['change'] = df.close - df.close.shift(1)
        df['prev_datetime'] = df['datetime'].shift(1)
        df['prev_date'] = pd.to_datetime(df['prev_datetime']).dt.date
        df = df[1:]
        df['tte_from_prev_entry'] = df.apply(lambda x: tte(x['prev_datetime'], x['datetime']), axis =1)
        df['percent_change'] = abs(df.change/df.open)
        df['variance'] = df['percent_change'] * df['percent_change']
        df['time_in_years_till_now'] = df.apply(lambda x : tte(x['datetime'], df['datetime'].iloc[-1]), axis = 1)
        df['time_in_years_till_now'] = df['time_in_years_till_now'].shift(1)
        df['time_in_days_till_now'] = df['time_in_years_till_now'] * 252.0
        df = df[1:]
        df = df.reset_index()


        for num_day in realized_days_list:
            idx = df['time_in_days_till_now'].sub(num_day).abs().idxmin()
            subset_df = df[idx:]
            dict_key = str(num_day)+'day_interval_'+ interval_time.replace(" ", "")
            total_variance = subset_df.variance.sum()
            total_time = subset_df.tte_from_prev_entry.sum()
            annualized_variance = total_variance / total_time
            annualized_vol= annualized_variance **0.5
            realized_dict[dict_key] = annualized_vol

            subset_df = subset_df[~subset_df['date'].isin(earnings_dates_list)]
            subset_df = subset_df[~subset_df['prev_date'].isin(earnings_dates_list)]
            dict_key = str(num_day)+'day_interval_without_earnings_'+ interval_time.replace(" ", "")
            total_variance = subset_df.variance.sum()
            total_time = subset_df.tte_from_prev_entry.sum()
            annualized_variance = total_variance / total_time
            annualized_vol= annualized_variance **0.5
            realized_dict[dict_key] = annualized_vol




    realized_dict['timestamp'] = dt.datetime.now()
    AddToRealizedFile(realized_dict, symbol, data_path, symbol_kind)
    return realized_dict

def AddToRealizedFile(realized_dict, symbol, data_path, symbol_kind):
    realized_df = pd.DataFrame(realized_dict, index=[0])
    file_path = data_path + "/" + symbol_kind + "/" + symbol + "/realized.csv"
    file_exist = os.path.exists(file_path)
    if file_exist:
        realized_df.to_csv(file_path, mode='a', index=False, header=False)
    else:
        realized_df.to_csv(file_path, index = False)


def GetEarningsDatesList(symbol, data_path, symbol_kind):
    file_path = data_path + "/" + symbol_kind + "/" + symbol + "/earnings.csv"
    file_df = pd.read_csv(file_path)
    datetime_converted = pd.to_datetime(file_df.date).dt.date.to_list()
    return datetime_converted

def GetInputs(inputs_path):
    df = pd.read_csv(inputs_path, header= None)
    df_dict = dict(zip(df[0], df[1]))
    df_dict['symbols'] = df_dict['symbols'].split("|")
    return df_dict

In [3]:
inputs_path = "./../../Inputs/historical_data_inputs.csv"
data_path = "./../../HistoricalData"
interval_time_list = ['1 day', '1 hour', '10 mins']
realized_days_list = [1,3,10,50,256]
inputs_dict = GetInputs(inputs_path)

for symbol in inputs_dict['symbols']:
    UpdateRealizedData(symbol, data_path, "stocks", interval_time_list, realized_days_list)
