In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
from itertools import combinations

import tqdm

In [None]:
plt.rcParams["figure.figsize"] = [16, 9]
# plt.rcParams["figure.dpi"] = 300
plt.rcParams["font.size"] = 20
plt.rcParams["axes.labelsize"] = 20
plt.rcParams["axes.titlesize"] = 24
plt.rcParams["xtick.labelsize"] = 16
plt.rcParams["ytick.labelsize"] = 16
plt.rcParams["font.family"] = "serif"

In [None]:
FONT_SIZE_TITLE_PLOT = 48  # 40
FONT_SIZE_TITLE_AX = 36  # 30
FONT_SIZE_LABEL = 30  # 24
FONT_SIZE_TICKS = 24  # 20
FONT_SIZE_LEGEND = 32  # 28

In [None]:
PROJECT_FOLDER = "PycharmProjects/thesis-gan"

In [None]:
stock_names = ["PEP", "KO", "NVDA", "KSU"]
n_stocks = len(stock_names)

In [None]:
RUN_ID_PRICE = "13v3dpxg"
EPOCH_PRICE = 121
PATH_PICKLE_REAL_PRICE = f"{PROJECT_FOLDER}/storage/thesis-gan/{RUN_ID_PRICE}/reals.pickle"
PATH_PICKLE_PRED_PRICE = f"{PROJECT_FOLDER}/storage/thesis-gan/{RUN_ID_PRICE}/preds_epoch={EPOCH_PRICE}-seed=42-target_price=mid_price-target_volume=None-sampling_seed=599121577.pickle"

In [None]:
RUN_ID_VOLUME = "136a5wci"
EPOCH_VOLUME = 108
PATH_PICKLE_REAL_VOLUME = f"{PROJECT_FOLDER}/storage/thesis-gan/{RUN_ID_VOLUME}/reals.pickle"
PATH_PICKLE_PRED_VOLUME = f"{PROJECT_FOLDER}/storage/thesis-gan/{RUN_ID_VOLUME}/preds_epoch={EPOCH_VOLUME}-seed=42-target_price=None-target_volume=volume-sampling_seed=599121577.pickle"

In [None]:
with open(PATH_PICKLE_REAL_PRICE, "rb") as handle:
    real_price_dict = pickle.load(handle)
with open(PATH_PICKLE_PRED_PRICE, "rb") as handle:
    pred_price_dict = pickle.load(handle)

In [None]:
with open(PATH_PICKLE_REAL_VOLUME, "rb") as handle:
    real_volume_dict = pickle.load(handle)
with open(PATH_PICKLE_PRED_VOLUME, "rb") as handle:
    pred_volume_dict = pickle.load(handle)

In [None]:
real_price_dict.keys(), pred_price_dict.keys()

In [None]:
real_volume_dict.keys(), pred_volume_dict.keys()

In [None]:
x = real_price_dict["x"]
x_hat = pred_price_dict["x_hat"]
x.shape, x_hat.shape

In [None]:
price_real = real_price_dict["prices"]
price_pred = pred_price_dict["pred_prices"]
price_real.shape, price_pred.shape

In [None]:
volume_real = real_volume_dict["volumes"]
volume_pred = pred_volume_dict["pred_volumes"]
volume_real.shape, volume_pred.shape

In [None]:
history_indexes = np.arange(390)
continuation_indexes = np.arange(390, price_real.shape[1])
history_indexes.shape, continuation_indexes.shape

In [None]:
price_real.shape, price_pred.shape

In [None]:
price_real_ = price_real / 10000
price_pred_ = price_pred / 10000
price_real_[0, 1000], price_pred_[0, 1000]

# PRICES

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 9))
axes = axes.ravel()

add_label = True
for (
    ax,
    stock_name,
    real,
    synthetic,
) in zip(axes, stock_names, price_real_, price_pred_):
    ax.plot(history_indexes, real[:390], color="C0", label="Observed" if add_label else None)
    ax.plot(continuation_indexes, real[390:], color="C1", label="Real" if add_label else None)
    ax.plot(continuation_indexes, synthetic[390:], color="C2", label="Synthetic" if add_label else None)
    ax.axvline(x=390, color="r")

    ax.set_title(stock_name, fontsize=FONT_SIZE_TITLE_AX)
    ax.set_xlabel("Steps", fontsize=FONT_SIZE_LABEL)
    ax.set_ylabel("Price ($)", fontsize=FONT_SIZE_LABEL, rotation=90)
    ax.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax.set_xticks(list(ax.get_xticks()[2:-1]) + [390])

    add_label = False

fig.suptitle("Prices", fontsize=FONT_SIZE_TITLE_PLOT, y=1)
fig.legend(loc="upper center", ncol=3, fontsize=FONT_SIZE_LEGEND, frameon=False, bbox_to_anchor=(0.5, 0.97))
fig.tight_layout()
# plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/prices.pdf")
plt.show()
plt.close(fig)

# Couples

In [None]:
df_corr_real = pd.DataFrame(price_real.T, columns=stock_names).corr().round(2)
df_corr_synthetic = pd.DataFrame(price_pred.T, columns=stock_names).corr().round(2)

In [None]:
minutes_in_a_day = 6 * 60 + 30
minutes_in_a_day

In [None]:
fig, axes = plt.subplots(2, 4, figsize=(16, 9))

add_label = True
for i, (ax_real, ax_synthetic) in enumerate(axes.T):
    stock_name, real, synthetic = stock_names[i], price_real_[i], price_pred_[i]

    ylim = (min(real.min(), synthetic.min()), max(real.max(), synthetic.max()))
    ax_real.set_ylim(ylim)
    ax_synthetic.set_ylim(ylim)
    ax_real.set_xticklabels([])
    ax_synthetic.set_xticklabels([])
    ax_real.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax_synthetic.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)

    ax_real.plot(history_indexes, real[:minutes_in_a_day], color="C0", label="Observed" if add_label else None)
    ax_real.plot(continuation_indexes, real[minutes_in_a_day:], color="C1", label="Real" if add_label else None)

    ax_synthetic.plot(history_indexes, synthetic[:minutes_in_a_day], color="C0")
    ax_synthetic.plot(
        continuation_indexes, synthetic[minutes_in_a_day:], color="C2", label="Synthetic" if add_label else None
    )

    ax_real.set_title(stock_name, fontsize=FONT_SIZE_TITLE_AX)

    add_label = False

    if i > 0:
        corr_real, corr_synthetic = df_corr_real[stock_name]["PEP"], df_corr_synthetic[stock_name]["PEP"]
        ax_real.set_xlabel(rf"$\rho({stock_name}, PEP)={corr_real}$", fontsize=FONT_SIZE_LABEL - 6)
        ax_synthetic.set_xlabel(
            r"$\rho(\widehat{" + stock_name + "}, \widehat{PEP})=" + str(corr_synthetic) + "$",
            fontsize=FONT_SIZE_LABEL - 6,
        )

fig.suptitle("Price - Correlation Dynamics", fontsize=FONT_SIZE_TITLE_PLOT, y=1)
fig.legend(
    loc="upper center",
    ncol=3,
    fontsize=FONT_SIZE_LEGEND,
    frameon=False,
    bbox_to_anchor=(0.5, 0.97),
)
plt.tight_layout()
# plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/price_correlations_wrt_PEP.pdf")
plt.show()
plt.close(fig)

# VOLUMES

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 9))
axes = axes.ravel()

add_label = True
for (
    ax,
    stock_name,
    real,
    synthetic,
) in zip(axes, stock_names, volume_real, volume_pred):
    ax.plot(history_indexes, real[:390], color="C0", label="Observed" if add_label else None)
    ax.plot(continuation_indexes, real[390:], color="C1", label="Real" if add_label else None)
    ax.plot(
        continuation_indexes,
        synthetic[390:],
        color="C2",
        alpha=0.7,
        label="Synthetic" if add_label else None,
    )
    ax.axvline(x=390, color="r")

    ax.set_title(stock_name, fontsize=FONT_SIZE_TITLE_AX)
    ax.set_xlabel("Steps", fontsize=FONT_SIZE_LABEL)
    ax.set_ylabel("Shares", fontsize=FONT_SIZE_LABEL, rotation=90)
    ax.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax.set_xticks(list(ax.get_xticks()[2:-1]) + [390])

    add_label = False

fig.suptitle("Volumes", fontsize=FONT_SIZE_TITLE_PLOT, y=1)
fig.legend(loc="upper center", ncol=3, fontsize=FONT_SIZE_LEGEND, frameon=False, bbox_to_anchor=(0.5, 0.97))
fig.tight_layout()
# plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/volumes.pdf")
plt.show()
plt.close(fig)

# STYLISED FACT

In [None]:
price_real = np.reshape(price_real, newshape=(n_stocks, -1, minutes_in_a_day))
price_pred = np.reshape(price_pred, newshape=(n_stocks, -1, minutes_in_a_day))
price_real.shape, price_pred.shape

In [None]:
n_days = price_real.shape[1]
n_days

In [None]:
minutely_returns_real = np.diff(np.log(price_real)).reshape(n_stocks, -1)
minutely_returns_synthetic = np.diff(np.log(price_pred)).reshape(n_stocks, -1)
minutely_returns_real.shape, minutely_returns_synthetic.shape

In [None]:
n_minutes = 15
n_minutely_returns_real = np.diff(np.log(price_real[:, :, ::n_minutes])).reshape(n_stocks, -1)
n_minutely_returns_synthetic = np.diff(np.log(price_pred[:, :, ::n_minutes])).reshape(n_stocks, -1)
n_minutely_returns_real.shape, n_minutely_returns_synthetic.shape

In [None]:
xlim = (
    min(
        minutely_returns_real.min(),
        minutely_returns_synthetic.min(),
        n_minutely_returns_real.min(),
        n_minutely_returns_synthetic.min(),
    ),
    max(
        minutely_returns_real.max(),
        minutely_returns_synthetic.max(),
        n_minutely_returns_real.max(),
        n_minutely_returns_synthetic.max(),
    ),
)

In [None]:
bins = np.linspace(xlim[0], xlim[1], 75)

## RETURNS DISTRIBUTION

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 9))
axes = axes.ravel()

add_label = True
for ax, stock_name, real, synthetic in zip(axes, stock_names, minutely_returns_real, minutely_returns_synthetic):
    ax.set_xlim(xlim)

    ax.hist(
        x=[synthetic, real],
        label=["Synthetic", "Real"] if add_label else None,
        color=["C2", "C1"],
        bins=bins,
        density=True,
        log=True,
        histtype="step",
        linewidth=3,
    )

    add_label = False
    ax.set_title(f"{stock_name}", fontsize=FONT_SIZE_TITLE_AX)
    ax.set_xlabel("Minutely Log-Returns", fontsize=FONT_SIZE_LABEL)
    ax.set_ylabel("Density", fontsize=FONT_SIZE_LABEL)
    ax.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)

fig.suptitle("Returns distribution", fontsize=FONT_SIZE_TITLE_PLOT, y=1.0)
fig.legend(loc="upper center", ncol=2, fontsize=FONT_SIZE_LEGEND, bbox_to_anchor=(0.5, 0.97), frameon=False)
fig.tight_layout()
plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/return_distribution.pdf")
# plt.show()
plt.close(fig)

## AGGREGATIONAL GAUSSIANITY

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 9))
axes = axes.ravel()

add_label = True
for ax, stock_name, real, synthetic in zip(axes, stock_names, n_minutely_returns_real, n_minutely_returns_synthetic):
    ax.set_xlim(xlim)

    ax.hist(
        x=[synthetic, real],
        label=["Synthetic", "Real"] if add_label else None,
        color=["C2", "C1"],
        bins=bins,
        density=True,
        log=True,
        histtype="step",
        linewidth=3,
    )

    add_label = False
    ax.set_title(f"{stock_name}", fontsize=FONT_SIZE_TITLE_AX)
    ax.set_xlabel(f"{n_minutes} Minute Log-Returns", fontsize=FONT_SIZE_LABEL)
    ax.set_ylabel("Density", fontsize=FONT_SIZE_LABEL)
    ax.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)

fig.suptitle("Aggregational Gaussianity", fontsize=FONT_SIZE_TITLE_PLOT, y=1.0)
fig.legend(loc="upper center", ncol=2, fontsize=FONT_SIZE_LEGEND, bbox_to_anchor=(0.5, 0.97), frameon=False)
fig.tight_layout()
plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/aggregational_gaussianity.pdf")
# plt.show()
plt.close(fig)

## ABSENCE OF AUTOCORRELATION

In [None]:
from numpy.lib.stride_tricks import sliding_window_view

In [None]:
minutely_returns_real.shape, minutely_returns_synthetic.shape

In [None]:
window_shape = minutes_in_a_day
rolled_minutely_returns_real = sliding_window_view(minutely_returns_real, window_shape=window_shape, axis=1)
rolled_minutely_returns_synthetic = sliding_window_view(minutely_returns_synthetic, window_shape=window_shape, axis=1)
rolled_minutely_returns_real.shape, rolled_minutely_returns_synthetic.shape

In [None]:
lags = [1, 10, 20, 30]
stock_name2lag2autocorr = dict()
for stock_name, real, synthetic in zip(stock_names, rolled_minutely_returns_real, rolled_minutely_returns_synthetic):
    lag2autocorr = dict()

    for lag in lags:
        l_real, l_synthetic = list(), list()

        for window_real, window_synthetic in zip(real, synthetic):
            autocorr_real = np.corrcoef(window_real[:-lag], window_real[lag:])[0, 1]
            autocorr_synthetic = np.corrcoef(window_synthetic[:-lag], window_synthetic[lag:])[0, 1]
            l_real.append(autocorr_real)
            l_synthetic.append(autocorr_synthetic)

        lag2autocorr[lag] = (np.asarray(l_real), np.asarray(l_synthetic))

    stock_name2lag2autocorr[stock_name] = lag2autocorr

In [None]:
bins = np.linspace(-1, 1, 50)

In [None]:
for stock_name, lag2autocorr in stock_name2lag2autocorr.items():
    fig, axes = plt.subplots(2, 2, figsize=(16, 9))
    axes = axes.ravel()

    add_label = True
    for ax, (lag, (real, synthetic)) in zip(axes, lag2autocorr.items()):
        ax.set_xlim((-1, 1))

        ax.hist(
            x=[synthetic, real],
            label=["Synthetic", "Real"] if add_label else None,
            color=["C2", "C1"],
            bins=bins,
            density=True,
            log=True,
            histtype="step",
            linewidth=3,
        )

        add_label = False
        ax.set_title(f"Lag={lag}", fontsize=FONT_SIZE_TITLE_AX)
        ax.set_xlabel("Correlation Coefficient", fontsize=FONT_SIZE_LABEL)
        ax.set_ylabel("Density", fontsize=FONT_SIZE_LABEL)
        ax.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
        ax.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)

    fig.suptitle(f"Returns Autocorrelations {stock_name} - Window={window_shape}", fontsize=FONT_SIZE_TITLE_PLOT, y=1.0)
    fig.legend(loc="upper center", ncol=2, fontsize=FONT_SIZE_LEGEND, bbox_to_anchor=(0.5, 0.97), frameon=False)
    fig.tight_layout()
    plt.savefig(
        f"{PROJECT_FOLDER}/plot_finali/multistock/absence_autocorrelation/absence_autocorrelation_{stock_name}.pdf"
    )
    plt.show()
    plt.close(fig)

## VOLATILITY CLUSTERING

In [None]:
price_real.shape, price_pred.shape

In [None]:
window_shape = 20

windowed_price_real = sliding_window_view(price_real, window_shape=window_shape, axis=-1)
windowed_price_synthetic = sliding_window_view(price_pred, window_shape=window_shape, axis=-1)

rolled_volatility_real = (windowed_price_real.std(axis=-1) / np.sqrt(window_shape)).reshape(n_stocks, -1)
rolled_volatility_synthetic = (windowed_price_synthetic.std(axis=-1) / np.sqrt(window_shape)).reshape(n_stocks, -1)

rolled_volatility_real.shape, rolled_volatility_synthetic.shape

In [None]:
x_indices, y_indices = [k1 for k1 in range(len(stock_names))], [
    k2 for k2 in range(len(stock_names), 2 * len(stock_names))
]

max_days = 11
lags = np.asarray(range(1, max_days)) * minutes_in_a_day

l_real, l_synthetic = list(), list()
for lag in lags:
    real = np.corrcoef(rolled_volatility_real[:, lag:], rolled_volatility_real[:, :-lag])[x_indices, y_indices]
    l_real.append(real)
    synthetic = np.corrcoef(rolled_volatility_synthetic[:, lag:], rolled_volatility_synthetic[:, :-lag])[
        x_indices, y_indices
    ]
    l_synthetic.append(synthetic)

corrs_real = np.asarray(l_real).T
corrs_synthetic = np.asarray(l_synthetic).T

corrs_real.shape, corrs_synthetic.shape

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 9))
axes = axes.ravel()

add_label = True
for ax, stock_name, real, synthetic in zip(axes, stock_names, corrs_real, corrs_synthetic):
    ax.plot(range(1, max_days), real, color="C1", linewidth=3, label="Real" if add_label else None)
    ax.plot(range(1, max_days), synthetic, color="C2", linewidth=3, label="Synthetic" if add_label else None)

    add_label = False
    ax.set_title(f"{stock_name}", fontsize=FONT_SIZE_TITLE_AX)
    ax.set_xlabel("Lag (Days)", fontsize=FONT_SIZE_LABEL)
    ax.set_ylabel("Correlation\nCoefficient", fontsize=FONT_SIZE_LABEL)
    ax.set_xticks(range(1, max_days))
    ax.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)


fig.suptitle(f"Volatility clustering Window={window_shape}", fontsize=FONT_SIZE_TITLE_PLOT, y=1)
fig.legend(loc="upper center", ncol=2, fontsize=FONT_SIZE_LEGEND, bbox_to_anchor=(0.5, 0.97), frameon=False)
fig.tight_layout()
plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/volatility_clustering.pdf")
plt.show()
plt.close(fig)

## VOLUME VOLATILITY CORRELATION

In [None]:
volume_real = volume_real.reshape(n_stocks, n_days, -1)
volume_synthetic = volume_pred.reshape(n_stocks, n_days, -1)

In [None]:
price_real.shape, volume_real.shape, price_real.shape, price_pred.shape

In [None]:
window_shape = 30

In [None]:
windowed_price_real = sliding_window_view(price_real, window_shape=window_shape, axis=-1)
windowed_price_synthetic = sliding_window_view(price_pred, window_shape=window_shape, axis=-1)
print(windowed_price_real.shape, windowed_price_synthetic.shape)

rolled_volatility_real = (windowed_price_real.std(axis=-1) / np.sqrt(window_shape)).reshape(n_stocks, -1)
rolled_volatility_synthetic = (windowed_price_synthetic.std(axis=-1) / np.sqrt(window_shape)).reshape(n_stocks, -1)

print(rolled_volatility_real.shape, rolled_volatility_synthetic.shape)

In [None]:
windowed_volume_real = sliding_window_view(volume_real, window_shape=window_shape, axis=-1)
windowed_volume_synthetic = sliding_window_view(volume_synthetic, window_shape=window_shape, axis=-1)
print(windowed_volume_real.shape, windowed_volume_synthetic.shape)

rolled_mean_volume_real = (windowed_volume_real.mean(axis=-1)).reshape(n_stocks, -1)
rolled_mean_volume_synthetic = (windowed_volume_synthetic.mean(axis=-1)).reshape(n_stocks, -1)
print(rolled_mean_volume_real.shape, rolled_mean_volume_synthetic.shape)

In [None]:
window_shape = 390

windowed_rolled_volatility_real = sliding_window_view(rolled_volatility_real, window_shape=window_shape, axis=-1)
windowed_rolled_volatility_synthetic = sliding_window_view(
    rolled_volatility_synthetic, window_shape=window_shape, axis=-1
)
print(windowed_rolled_volatility_real.shape, windowed_rolled_volatility_synthetic.shape)

windowed_rolled_mean_volume_real = sliding_window_view(rolled_mean_volume_real, window_shape=window_shape, axis=-1)
windowed_rolled_mean_volume_synthetic = sliding_window_view(
    rolled_mean_volume_synthetic, window_shape=window_shape, axis=-1
)
print(windowed_rolled_mean_volume_real.shape, windowed_rolled_mean_volume_synthetic.shape)

In [None]:
d = dict()
for stock_name, real_volume, real_volatility, synthetic_volume, synthetic_volatility in zip(
    stock_names,
    windowed_rolled_mean_volume_real,
    windowed_rolled_volatility_real,
    windowed_rolled_mean_volume_synthetic,
    windowed_rolled_volatility_synthetic,
):

    l_real = list()
    for (
        window_volume,
        window_volatility,
    ) in zip(real_volume, real_volatility):
        real_corr = np.corrcoef(window_volume, window_volatility)[0, 1]
        l_real.append(real_corr)

    l_synthetic = list()
    for (
        window_volume,
        window_volatility,
    ) in zip(synthetic_volume, synthetic_volatility):
        synthetic_corr = np.corrcoef(window_volume, window_volatility)[0, 1]
        l_synthetic.append(synthetic_corr)

    d[stock_name] = (np.asarray(l_real), np.asarray(l_synthetic))

In [None]:
bins = np.linspace(-1, 1, 50)

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 9))
axes = axes.ravel()

add_label = True
for ax, (stock_name, (corrs_real, corrs_synthetic)) in zip(axes, d.items()):
    ax.set_xlim((-1, 1))

    ax.hist(
        x=[corrs_synthetic, corrs_real],
        label=["Synthetic", "Real"] if add_label else None,
        color=["C2", "C1"],
        bins=bins,
        density=True,
        log=True,
        histtype="step",
        linewidth=3,
    )

    add_label = False
    ax.set_title(f"{stock_name}", fontsize=FONT_SIZE_TITLE_AX)
    ax.set_xlabel("Correlation Coefficient", fontsize=FONT_SIZE_LABEL)
    ax.set_ylabel("Density", fontsize=FONT_SIZE_LABEL)
    ax.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)

fig.suptitle(f"Volume-Volatility Correlation Window={window_shape}", fontsize=FONT_SIZE_TITLE_PLOT, y=1)
fig.legend(loc="upper center", ncol=2, fontsize=FONT_SIZE_LEGEND, bbox_to_anchor=(0.5, 0.97), frameon=False)
fig.tight_layout()
# plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/volume_volatility_correlation.pdf")
plt.show()
plt.close(fig)