In [1]:
import pandas as pd
import dask
dask.config.set(scheduler="processes")
import matplotlib.pyplot as plt
import glob
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from helpers import config
%load_ext autoreload
%autoreload 2

# Loading the data

work on a daily basis

In [None]:
location = "NL"
date = config["test_date"]


In [None]:
glob.glob(config['dir']['data']}/)

In [None]:
mkt_suffix = config["markets"][location]
path_expr = f"{config['dir']['data']}/{location}/{config['signal']}/{config['stock']}.{mkt_suffix}/{date}*"
path_expr
glob.glob(path_expr)


In [None]:
from helpers import config
from helpers.loading import load_daily_data
daily_data = load_daily_data(date, location, to_returns=True)


In [None]:
daily_data

# simple data exploration

In [None]:
from helpers.plots import save_plot
sizes = [len(v) for _,v in daily_data.items()]
size_df = pd.DataFrame({'markets': daily_data.keys(), 'size': sizes})
fig = px.bar(
    size_df.sort_values('size'), 
    x='markets', y='size',
    title=f"Number of transaction per exchange ({config['test_date']})"
)
save_plot(fig, "nb_transaction_per_exchange")
fig.show()



In [None]:
from helpers.delay import generate_delayed_data
N = len(config['markets'])
joined_len_outer = np.zeros((N,N))
joined_len_inner = np.zeros((N,N))
for i,k1 in enumerate(daily_data):
    for j,k2 in enumerate(daily_data):
        s1,s2 = daily_data[k1].copy(),daily_data[k2].copy()
        l_outer = len(generate_delayed_data(s1,s2,0))
        l_inner = len(generate_delayed_data(s1,s2,0,join_type="inner"))
        joined_len_outer[i,j] = l_outer
        joined_len_inner[i,j] = l_inner


In [None]:
fig = make_subplots(rows=1, cols=2, shared_yaxes=True,
                    subplot_titles=["inner", "outer"])
fig.add_trace(
    go.Heatmap(z=joined_len_inner, x=config['markets'], y=config['markets']),
    row=1, col=1
)

fig.add_trace(
    go.Heatmap(z=joined_len_outer, x=config['markets'], y=config['markets']),
    row=1, col=2
)
fig.update_layout(height=600, width=800, title_text="Number of transaction in the joined data for all market pairs")
save_plot(fig, 'nb_transaction_join_market_pairs')
fig.show()


# Signal visu

In [None]:
from helpers.stats import compute_correlation
from helpers.delay import generate_delayed_data

def visualize_delay(n1,n2,delay,n_samples=1000):
    s1,s2 = daily_data[n1],daily_data[n2]
    pair_data = generate_delayed_data(s1,s2,delay)[:n_samples]
    corr,_,_  = compute_correlation(pair_data)
    
    plt.plot(pair_data["trade-price_1"])
    plt.plot(pair_data["trade-price_2"])
    plt.title(f"delay:{delay}ms, market1:{n1}, market2:{n2}, corr={corr:0.5f}")
    plt.show()

In [None]:
step = 100 #ms
n_steps = 2
delays = range(-n_steps*step, n_steps*step, step)
for delay in delays:
    visualize_delay(n1="DF",n2="B",delay=delay)

# Demo

In [None]:
from helpers.delay import compute_delays
from helpers.algorithm import  increasing_function_check
def find_best_delay_demo(n1,n2):
    n_iteration = 15
    center      = 0
    step_size   = 1000 #ms
    last_best_delay = None
    for it in range(n_iteration):
        delays,correlations,los,his = compute_delays(daily_data,n1,n2,center=center,step_size=step_size)
        best_delay = delays[np.argmax(correlations)]
        if (last_best_delay is not None and last_best_delay==best_delay) or not step_size > 1:
            break
        
        
        last_best_delay = best_delay 
        center          = best_delay
        
        ######################
        x = list(delays)
        y = correlations
        fig = go.Figure(data=go.Scatter(x=x,y=y,
        error_y=dict(
            type='data',
            symmetric=False,
            array=los,
            arrayminus=his)
        )
        ,layout = go.Layout(autosize=False,width=800,height=500)               
        )
        title = f"Correlation_vs_delay_window_iteration({it})_market({n1}_{n2})"
        fig.update_layout(
        title=title,
        xaxis_title="delay",
        yaxis_title="correlation"
        )
        fig.write_html(f"./Figures/{title}.html",full_html=False, include_plotlyjs=False)
        fig.show()
        
        print(f"idx:{np.argmax(correlations)}, step_size:{step_size}, center:{center}")
        if increasing_function_check(correlations):
            step_size = int(step_size*1.5)
        else:
            step_size = step_size//2
        


In [None]:
find_best_delay_demo("DF","O")

In [None]:
find_best_delay_demo("B","II")

# Visu

In [None]:
for i,k1 in enumerate(daily_data):
    for j,k2 in enumerate(daily_data):
        delays,correlations,los,his = compute_delays(daily_data,n1=k1,n2=k2,center=0,step_size=2000)
        x = list(delays)
        y = correlations
        errors = [los,his]
        plt.errorbar(x, y, yerr=errors, fmt='o')
        plt.title(f"{k1},{k2} ({delays[np.argmax(correlations)]})")
        plt.xlabel("delay")
        plt.ylabel("corr")
        plt.show()

In [None]:
for i,k1 in enumerate(daily_data):
    for j,k2 in enumerate(daily_data):
        best_delay,delays,correlations,los,his = find_best_delay(n1=k1,n2=k2)
        x = list(delays)
        y = correlations
        errors = [los,his]
        plt.errorbar(x, y, yerr=errors, fmt='o')
        plt.title(f"{k1},{k2},{best_delay}")
        plt.xlabel("delay")
        plt.ylabel("corr")
        plt.show()




# Checks

* sort by delay and vaidates with geo distance
* moore's law