In [None]:
!ls

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

In [None]:
FONT_SIZE_TITLE_PLOT = 48
FONT_SIZE_TITLE_AX = 36
FONT_SIZE_LABEL = 30
FONT_SIZE_TICKS = 24
FONT_SIZE_LEGEND = 32

In [None]:
PROJECT_FOLDER = "PycharmProjects/thesis-gan"
train_data_path = f"{PROJECT_FOLDER}/data/midprice_volume__KO_PEP_NVDA_KSU__train.csv"
stock_names = ["KO", "PEP"]

In [None]:
df = pd.read_csv(train_data_path)
df = df.rename(columns={"Unnamed: 0": "date"})
midprices_KO_PEP = df[[f"mid_price_{stock_name}" for stock_name in stock_names] + ["date"]]
midprices_KO_PEP["date"] = pd.to_datetime(midprices_KO_PEP["date"])
midprices_KO_PEP

In [None]:
scaler = StandardScaler()

## Tau = 1DAY, Delta = 1Minute

In [None]:
DELTA_MAX = 60

grouped = midprices_KO_PEP.groupby(midprices_KO_PEP.date.dt.date)

date2delta2corr = dict()

for date, midprices_KO_PEP_day in grouped:
    if midprices_KO_PEP_day.shape[0] == 390:
        delta2corr = dict()
        mid_price_KO_day, mid_price_PEP_day = (
            midprices_KO_PEP_day["mid_price_KO"].values,
            midprices_KO_PEP_day["mid_price_PEP"].values,
        )

        for delta in range(-DELTA_MAX, DELTA_MAX + 1):
            corrcoef = np.round(
                np.corrcoef(mid_price_KO_day[60 + delta : 330 + delta], mid_price_PEP_day[60:330])[0, 1], 2
            )
            delta2corr[delta] = corrcoef

        date2delta2corr[str(date)] = delta2corr

In [None]:
for date, midprices_KO_PEP_day in grouped:
    if midprices_KO_PEP_day.shape[0] != 390:
        continue
    fig, axes = plt.subplots(2, 1, figsize=(16, 9))
    axes = axes.ravel()
    ax_price, ax_corr = axes

    mid_price_KO_day, mid_price_PEP_day = (
        midprices_KO_PEP_day["mid_price_KO"].values,
        midprices_KO_PEP_day["mid_price_PEP"].values,
    )
    mid_price_KO_day_scaled = scaler.fit_transform(mid_price_KO_day.reshape(-1, 1))
    mid_price_PEP_day_scaled = scaler.fit_transform(mid_price_PEP_day.reshape(-1, 1))

    ax_price.plot(range(390), mid_price_KO_day_scaled, color="C4", label="KO")
    ax_price.plot(range(390), mid_price_PEP_day_scaled, color="C5", label="PEP")

    ax_price.set_xlabel("Steps", fontdict={"fontsize": FONT_SIZE_LABEL})
    ax_price.set_ylabel("Price", fontdict={"fontsize": FONT_SIZE_LABEL})
    ax_price.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax_price.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax_price.legend(ncol=2, loc="lower center", fontsize=FONT_SIZE_LEGEND, frameon=False)

    delta2corr = date2delta2corr[str(date)]
    delta2corr = dict(sorted(delta2corr.items()))
    ax_corr.plot(delta2corr.keys(), delta2corr.values(), color="C6", label=r"$\rho(KO[\delta:], PEP[:-\delta])$")

    ax_corr.set_ylim((-1, 1))
    ax_corr.set_xlabel(r"$\delta$", fontdict={"fontsize": FONT_SIZE_LABEL})
    ax_corr.set_ylabel("Correlation\ncoefficient", fontdict={"fontsize": FONT_SIZE_LABEL})
    ax_corr.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax_corr.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax_corr.legend(loc="lower center", fontsize=FONT_SIZE_LEGEND, frameon=False)

    fig.suptitle(f"{date}", fontsize=FONT_SIZE_TITLE_PLOT)
    plt.tight_layout()
    plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/shifted_correlations/tau=1day_delta=1minute/{date}.pdf")
    # plt.show()
    plt.close(fig)

## Tau = 1Week, Delta = 1Hour

In [None]:
DELTA_MAX = 6

grouped = midprices_KO_PEP.groupby([pd.Grouper(key="date", freq="W", origin=pd.Timestamp(year=2018, month=1, day=1))])

date2delta2corr = dict()

for date, midprices_KO_PEP_week in grouped:
    if midprices_KO_PEP_week.shape[0] == 390 * 5:
        delta2corr = dict()
        mid_price_KO_week, mid_price_PEP_week = (
            midprices_KO_PEP_week["mid_price_KO"].values,
            midprices_KO_PEP_week["mid_price_PEP"].values,
        )

        for delta in range(-DELTA_MAX, DELTA_MAX + 1):
            corrcoef = np.round(
                np.corrcoef(mid_price_KO_week[390 + 60 * delta : 1560 + 60 * delta], mid_price_PEP_week[390:1560])[
                    0, 1
                ],
                2,
            )
            delta2corr[delta] = corrcoef

        date2delta2corr[str(date)] = delta2corr

In [None]:
for date, midprices_KO_PEP_week in grouped:
    if midprices_KO_PEP_week.shape[0] != 390 * 5:
        continue
    fig, axes = plt.subplots(2, 1, figsize=(16, 9))
    axes = axes.ravel()
    ax_price, ax_corr = axes

    mid_price_KO_week, mid_price_PEP_week = (
        midprices_KO_PEP_week["mid_price_KO"].values,
        midprices_KO_PEP_week["mid_price_PEP"].values,
    )
    mid_price_KO_week_scaled = scaler.fit_transform(mid_price_KO_week.reshape(-1, 1))
    mid_price_PEP_week_scaled = scaler.fit_transform(mid_price_PEP_week.reshape(-1, 1))

    ax_price.plot(range(390 * 5), mid_price_KO_week_scaled, color="C4", label="KO")
    ax_price.plot(range(390 * 5), mid_price_PEP_week_scaled, color="C5", label="PEP")

    ax_price.set_xlabel("Steps", fontdict={"fontsize": FONT_SIZE_LABEL})
    ax_price.set_ylabel("Price", fontdict={"fontsize": FONT_SIZE_LABEL})
    ax_price.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax_price.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax_price.legend(ncol=2, loc="lower center", fontsize=FONT_SIZE_LEGEND, frameon=False)

    delta2corr = date2delta2corr[str(date)]
    delta2corr = dict(sorted(delta2corr.items()))
    ax_corr.plot(delta2corr.keys(), delta2corr.values(), color="C6", label=r"$\rho(KO[\delta:], PEP[:-\delta])$")

    ax_corr.set_ylim((-1, 1))
    ax_corr.set_xlabel(r"$\delta$", fontdict={"fontsize": FONT_SIZE_LABEL})
    ax_corr.set_ylabel("Correlation\ncoefficient", fontdict={"fontsize": FONT_SIZE_LABEL})
    ax_corr.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax_corr.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax_corr.legend(loc="lower center", fontsize=FONT_SIZE_LEGEND, frameon=False)

    fig.suptitle(f"{date}", fontsize=FONT_SIZE_TITLE_PLOT)
    plt.tight_layout()
    plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/shifted_correlations/tau=1week_delta=1hour/{date}.pdf")
    # plt.show()
    plt.close(fig)

## HFT

In [None]:
raw_df = pd.read_csv(f"{PROJECT_FOLDER}/data/hft/Sec1/midprice_volume_KO_PEP_NVDA_KSU_train.csv")

In [None]:
hft_df = raw_df[["mid_price_KO", "mid_price_PEP", "date"]]
hft_df["date"] = pd.to_datetime(hft_df["date"])

In [None]:
DELTA_MAX = 60 * 5

grouped = hft_df.groupby(hft_df.date.dt.date)

date2delta2corr = dict()

for date, midprices_KO_PEP_day in tqdm(grouped):
    if midprices_KO_PEP_day.shape[0] == 390 * 60:
        delta2corr = dict()
        mid_price_KO_day, mid_price_PEP_day = (
            midprices_KO_PEP_day["mid_price_KO"].values,
            midprices_KO_PEP_day["mid_price_PEP"].values,
        )

        for delta in range(-DELTA_MAX, DELTA_MAX + 1):
            corrcoef = np.round(
                np.corrcoef(mid_price_KO_day[300 + delta : 23100 + delta], mid_price_PEP_day[300:23100])[0, 1], 2
            )
            delta2corr[delta] = corrcoef

        date2delta2corr[str(date)] = delta2corr

In [None]:
for date, midprices_KO_PEP_day in tqdm(grouped):
    if midprices_KO_PEP_day.shape[0] != 390 * 60:
        continue
    fig, axes = plt.subplots(2, 1, figsize=(16, 9))
    axes = axes.ravel()
    ax_price, ax_corr = axes

    mid_price_KO_day, mid_price_PEP_day = (
        midprices_KO_PEP_day["mid_price_KO"].values,
        midprices_KO_PEP_day["mid_price_PEP"].values,
    )
    mid_price_KO_day_scaled = scaler.fit_transform(mid_price_KO_day.reshape(-1, 1))
    mid_price_PEP_day_scaled = scaler.fit_transform(mid_price_PEP_day.reshape(-1, 1))

    ax_price.plot(range(390 * 60), mid_price_KO_day_scaled, color="C4", label="KO")
    ax_price.plot(range(390 * 60), mid_price_PEP_day_scaled, color="C5", label="PEP")

    ax_price.set_xlabel("Steps", fontdict={"fontsize": FONT_SIZE_LABEL})
    ax_price.set_ylabel("Price", fontdict={"fontsize": FONT_SIZE_LABEL})
    ax_price.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax_price.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax_price.legend(ncol=2, loc="lower center", fontsize=FONT_SIZE_LEGEND, frameon=False)

    delta2corr = date2delta2corr[str(date)]
    delta2corr = dict(sorted(delta2corr.items()))
    ax_corr.plot(delta2corr.keys(), delta2corr.values(), color="C6", label=r"$\rho(KO[\delta:], PEP[:-\delta])$")

    ax_corr.set_ylim((-1, 1))
    ax_corr.set_xlabel(r"$\delta$", fontdict={"fontsize": FONT_SIZE_LABEL})
    ax_corr.set_ylabel("Correlation\ncoefficient", fontdict={"fontsize": FONT_SIZE_LABEL})
    ax_corr.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax_corr.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax_corr.legend(loc="lower center", fontsize=FONT_SIZE_LEGEND, frameon=False)

    fig.suptitle(f"{date}", fontsize=FONT_SIZE_TITLE_PLOT)
    plt.tight_layout()
    plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/shifted_correlations/tau=1day_delta=1second/{date}.pdf")
    # plt.show()
    plt.close(fig)

## TAU=


In [None]:
raw_df = pd.read_csv(f"{PROJECT_FOLDER}/data/hft/mSec100/midprice_volume_KO_PEP_NVDA_KSU_train.csv")

In [None]:
hft_df = raw_df[["mid_price_KO", "mid_price_PEP", "date"]]
hft_df["date"] = pd.to_datetime(hft_df["date"])

In [None]:
hft_df.shape

In [None]:
hft_df.head(20)