# Imports

In [None]:
import warnings
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from src.functions import CorrClass as cs
import seaborn as sns
from scipy.cluster.hierarchy import dendrogram, linkage
warnings.filterwarnings('ignore')
plt.rcParams['figure.figsize'] = 12,40

cs = cs()

# Set rules

In [None]:
MIN_ADD = datetime.timedelta(seconds = 60)
HOUR_ADD = 60 * MIN_ADD
DAY_ADD = HOUR_ADD * 24 

# Get data

In [None]:
dfMarketData = pd.read_csv('WDODATA.csv','\t')
dfMarketData.columns = ['Date','Time','Open','High','Low','Close','TickVol','Vol','Spread']
dfMarketData['Datetime'] = dfMarketData['Date'].str.replace('.','-') + ' ' + dfMarketData['Time']
dfMarketData = dfMarketData[['Datetime','Open','High','Low','Close']]
dfMarketData['Datetime'] = pd.to_datetime(dfMarketData['Datetime'])
dfMarketData = dfMarketData.set_index('Datetime')
dfMarketData = dfMarketData.sort_index()

In [None]:
dfEventsData = pd.read_csv('EventsData.csv')
dfEventsFromTo = pd.read_csv('EventsFromTo.csv')
dfEventsFromTo=dfEventsFromTo.set_index('EventID').T
dfEventsData['ReleaseTime'] = pd.to_datetime(dfEventsData['ReleaseTime'])
dfEventsData['PercentChng'] = dfEventsData['ActualValue']/dfEventsData['PreviousValue']-1
dfEventsData['Date'] = dfEventsData['ReleaseTime'].apply(lambda x: datetime.datetime.strptime(str(x).split(' ')[0], '%Y-%m-%d'))
dfEventsData = dfEventsData.loc[dfEventsData['ReleaseTime'] > min(dfMarketData.index),:]
dfEventsData['ReleaseTime'] = pd.to_datetime(dfEventsData['ReleaseTime'])
dfEventsData = dfEventsData.set_index('ReleaseTime')
dfEventsData = dfEventsData.sort_index()

# Split train/test

In [None]:
dfEventsDataTrain=dfEventsData[dfEventsData.index<=datetime.datetime.combine(dfEventsData.iloc[int(.7*len(dfEventsData)),:]['Date'], datetime.datetime.min.time())]
dfEventsDataTest=dfEventsData[dfEventsData.index>datetime.datetime.combine(dfEventsData.iloc[int(.7*len(dfEventsData)),:]['Date'], datetime.datetime.min.time())]
dfMarketDataTrain=dfMarketData.loc[dfMarketData.index<datetime.datetime.combine(max(dfEventsDataTrain.index).date(), datetime.datetime.min.time())+DAY_ADD]
dfMarketDataTest=dfMarketData[dfMarketData.index>=datetime.datetime.combine(max(dfMarketDataTrain.index).date(), datetime.datetime.min.time())]

# Calculate variation after releases

In [None]:
def variationRatio(
    dfEventsData,
    dfMarketData
) -> pd.DataFrame():
    df = pd.DataFrame()
    for idx, release in dfEventsData.iterrows():
        mktdata_on_release = idx
        mktdata_after_release = mktdata_on_release + MIN_ADD * 5
        mktdata_pre_release = mktdata_on_release - MIN_ADD * 5
        df_mktdata_release = dfMarketData[(dfMarketData.index >= mktdata_on_release) & (dfMarketData.index <= mktdata_after_release)]
        df_mktdata_pre_release = dfMarketData[(dfMarketData.index <= mktdata_on_release) & (dfMarketData.index >= mktdata_pre_release)]
        if not df_mktdata_release.empty and not df_mktdata_pre_release.empty:
            release_price = -1
            try:
                release_price = df_mktdata_release['Open'][mktdata_on_release]
            except:
                try:
                    release_price = df_mktdata_release['Close'][mktdata_on_release - MIN_ADD]
                except:
                    continue
            variation_after_release = max(abs(release_price-max(df_mktdata_release['High'])),abs(release_price-min(df_mktdata_release['Low'])))
            variation_pre_release = max(abs(release_price-max(df_mktdata_pre_release['High'])),abs(release_price-min(df_mktdata_pre_release['Low'])))
            variation_diff = variation_after_release/variation_pre_release-1
            df = df.append(
                {
                    'EventID': dfEventsFromTo[int(release['EventID'])]['EventName'],
                    'Ratio': variation_diff
                }, ignore_index=True 
            )
    dict_data = {}
    for idx, data in df.iterrows():
        if data['EventID'] in dict_data.keys():
            dict_data[data['EventID']].append(data['Ratio'])
        else:
            dict_data[data['EventID']]=[data['Ratio']]
    df = pd.DataFrame.from_dict(dict_data, orient='index')
    df = df.transpose()
    return df

In [None]:
dfTrain = variationRatio(dfEventsData=dfEventsDataTrain,dfMarketData=dfMarketDataTrain)
cs.plot_heatmap_densogram(dfTrain)

In [None]:
dfTrainDenoised = cs.cov2corr(cs.cleanMatrix(dfTrain))
cs.plot_heatmap_densogram(dfTrainDenoised)

In [None]:
dfTrainDetoned = cs.cov2corr(cs.detonMatrix(dfTrain))
cs.plot_heatmap_densogram(dfTrainDetoned)