In [None]:
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
import pickle
from matplotlib.lines import Line2D
import scipy.stats as scs
from itertools import combinations

In [None]:
plt.rcParams["figure.figsize"] = [16, 9]
# plt.rcParams["figure.dpi"] = 300
plt.rcParams["font.size"] = 20
plt.rcParams["axes.labelsize"] = 20
plt.rcParams["axes.titlesize"] = 24
plt.rcParams["xtick.labelsize"] = 16
plt.rcParams["ytick.labelsize"] = 16
plt.rcParams["font.family"] = "serif"

In [None]:
PROJECT_FOLDER = "PycharmProjects/thesis-gan"

In [None]:
stock_names = ["PEP", "KO", "NVDA", "KSU"]

In [None]:
RUN_ID_PRICE = "iksf94ne"
EPOCH_PRICE = 260
PATH_PICKLE_REAL_PRICE = f"{PROJECT_FOLDER}/storage/thesis-gan/{RUN_ID_PRICE}/reals.pickle"
PATH_PICKLE_PRED_PRICE = f"{PROJECT_FOLDER}/storage/thesis-gan/{RUN_ID_PRICE}/preds_epoch={EPOCH_PRICE}-target_price=mid_price-target_volume=None.pickle"

In [None]:
RUN_ID_VOLUME = "3ogpusxf"
EPOCH_VOLUME = 71
PATH_PICKLE_REAL_VOLUME = f"{PROJECT_FOLDER}/storage/thesis-gan/{RUN_ID_VOLUME}/reals.pickle"
PATH_PICKLE_PRED_VOLUME = f"{PROJECT_FOLDER}/storage/thesis-gan/{RUN_ID_VOLUME}/preds_epoch={EPOCH_VOLUME}-target_price=None-target_volume=volume.pickle"

In [None]:
with open(PATH_PICKLE_REAL_PRICE, "rb") as handle:
    real_price_dict = pickle.load(handle)
with open(PATH_PICKLE_PRED_PRICE, "rb") as handle:
    pred_price_dict = pickle.load(handle)

In [None]:
with open(PATH_PICKLE_REAL_VOLUME, "rb") as handle:
    real_volume_dict = pickle.load(handle)
with open(PATH_PICKLE_PRED_VOLUME, "rb") as handle:
    pred_volume_dict = pickle.load(handle)

In [None]:
real_price_dict.keys(), pred_price_dict.keys()

In [None]:
real_volume_dict.keys(), pred_volume_dict.keys()

In [None]:
x = real_price_dict["x"]
x_hat = pred_price_dict["x_hat"]
x.shape, x_hat.shape

In [None]:
price_real = real_price_dict["prices"]
price_pred = pred_price_dict["pred_prices"]
price_real.shape, price_pred.shape

In [None]:
volume_real = real_volume_dict["volumes"]
volume_pred = pred_volume_dict["pred_volumes"]
volume_real.shape, volume_pred.shape

In [None]:
history_indexes = np.arange(390)
continuation_indexes = np.arange(390, price_real.shape[1])
history_indexes.shape, continuation_indexes.shape

In [None]:
price_real.shape, price_pred.shape

In [None]:
price_real_ = price_real / 10000
price_pred_ = price_pred / 10000
price_real_[0, 1000], price_pred_[0, 1000]

In [None]:
FONT_SIZE_TITLE_PLOT = 48  # 40
FONT_SIZE_TITLE_AX = 36  # 30
FONT_SIZE_LABEL = 30  # 24
FONT_SIZE_TICKS = 24  # 20
FONT_SIZE_LEGEND = 32  # 28

# PRICES

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 9))
axes = axes.ravel()

add_label = True
for (
    ax,
    stock_name,
    real,
    synthetic,
) in zip(axes, stock_names, price_real_, price_pred_):
    ax.plot(history_indexes, real[:390], color="C0", label="Observed" if add_label else None)
    ax.plot(continuation_indexes, real[390:], color="C1", label="Real continuation" if add_label else None)
    ax.plot(continuation_indexes, synthetic[390:], color="C2", label="Synthetic continuation" if add_label else None)
    ax.axvline(x=390, color="r")

    ax.set_title(stock_name, fontsize=FONT_SIZE_TITLE_AX)
    ax.set_xlabel("Steps", fontsize=FONT_SIZE_LABEL)
    ax.set_ylabel("Price ($)", fontsize=FONT_SIZE_LABEL, rotation=90)
    ax.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax.set_xticks(list(ax.get_xticks()[1:-1]) + [390])

    add_label = False

fig.suptitle("Prices", fontsize=FONT_SIZE_TITLE_PLOT, y=1)
fig.legend(loc="upper center", ncol=3, fontsize=FONT_SIZE_LEGEND, frameon=False, bbox_to_anchor=(0.5, 0.97))
fig.tight_layout()
# plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/prices.pdf")
plt.show()
plt.close(fig)

# Couples

In [None]:
corr_real = np.corrcoef(price_real)
corr_pred = np.corrcoef(price_pred)

In [None]:
for stock_name_1, stock_name_2 in combinations(stock_names, 2):
    fig, axes = plt.subplots(2, 2)
    legend_elements = [
        Line2D([0], [0], color="C0", lw=2, label="Observed"),
        Line2D([0], [0], color="C1", lw=2, label="Real continuation"),
        Line2D([0], [0], color="C2", lw=2, label="Synthetic continuation"),
    ]

    stock_index_1, stock_index_2 = stock_names.index(stock_name_1), stock_names.index(stock_name_2)
    corr_real_12, corr_pred_12 = corr_real[stock_index_1][stock_index_2], corr_pred[stock_index_1][stock_index_2]

    price_real_1, price_real_2 = price_real[stock_index_1], price_real[stock_index_2]

    history_indexes = np.arange(390)
    continuation_indexes = np.arange(390, price_real.shape[1])

    history_1, history_2 = price_real[stock_index_1, :390], price_real[stock_index_2, :390]
    continuation_real_1, continuation_real_2 = price_real[stock_index_1, 390:], price_real[stock_index_2, 390:]
    continuation_pred_1, continuation_pred_2 = price_pred[stock_index_1, 390:], price_pred[stock_index_2, 390:]

    # Titles
    axes[0, 0].set_title(f"{stock_name_1} - Real", fontsize=FONT_SIZE_TITLE_AX)
    axes[0, 1].set_title(f"{stock_name_2} - Real", fontsize=FONT_SIZE_TITLE_AX)
    axes[1, 0].set_title(f"{stock_name_1} - Synthetic", fontsize=FONT_SIZE_TITLE_AX)
    axes[1, 1].set_title(f"{stock_name_2} - Synthetic", fontsize=FONT_SIZE_TITLE_AX)

    # Histories
    axes[0, 0].plot(history_indexes, history_1, color="C0")
    axes[0, 1].plot(history_indexes, history_2, color="C0")
    axes[1, 0].plot(history_indexes, history_1, color="C0")
    axes[1, 1].plot(history_indexes, history_2, color="C0")

    # Reals
    axes[0, 0].plot(continuation_indexes, continuation_real_1, color="C1")
    axes[0, 1].plot(continuation_indexes, continuation_real_2, color="C1")

    # Synthetic
    axes[1, 0].plot(continuation_indexes, continuation_pred_1, color="C2")
    axes[1, 1].plot(continuation_indexes, continuation_pred_2, color="C2")

    # y_lim
    axes[0, 0].set_ylim(
        [min(price_real_1.min(), continuation_pred_1.min()), max(price_real_1.max(), continuation_pred_1.max())]
    )
    axes[1, 0].set_ylim(
        [min(price_real_1.min(), continuation_pred_1.min()), max(price_real_1.max(), continuation_pred_1.max())]
    )
    axes[0, 1].set_ylim(
        [min(price_real_2.min(), continuation_pred_2.min()), max(price_real_2.max(), continuation_pred_2.max())]
    )
    axes[1, 1].set_ylim(
        [min(price_real_2.min(), continuation_pred_2.min()), max(price_real_2.max(), continuation_pred_2.max())]
    )

    for axs in axes:
        for ax in axs:
            ax.set_yticklabels([])
            ax.set_xticklabels([])

    axes[0, 1].set_ylabel(
        round(corr_real_12, 2), rotation="horizontal", horizontalalignment="right", fontsize=FONT_SIZE_LABEL
    )
    axes[1, 1].set_ylabel(
        round(corr_pred_12, 2), rotation="horizontal", horizontalalignment="right", fontsize=FONT_SIZE_LABEL
    )

    fig.legend(
        handles=legend_elements,
        loc="upper center",
        ncol=3,
        fontsize=FONT_SIZE_LEGEND,
        bbox_to_anchor=(0.5, 0.97),
        frameon=False,
    )
    fig.suptitle("Prices", fontsize=FONT_SIZE_TITLE_PLOT, y=1.0)
    fig.tight_layout()
    plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/couples/{stock_name_1}-{stock_name_2}.pdf")
    # plt.show()
    plt.close(fig)

# VOLUMES

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 9))
axes = axes.ravel()

add_label = True
for (
    ax,
    stock_name,
    real,
    synthetic,
) in zip(axes, stock_names, volume_real, volume_pred):
    ax.plot(history_indexes, real[:390], color="C0", label="Observed" if add_label else None)
    ax.plot(continuation_indexes, real[390:], color="C1", label="Real continuation" if add_label else None)
    ax.plot(
        continuation_indexes,
        synthetic[390:],
        color="C2",
        alpha=0.7,
        label="Synthetic continuation" if add_label else None,
    )
    ax.axvline(x=390, color="r")

    ax.set_title(stock_name, fontsize=FONT_SIZE_TITLE_AX)
    ax.set_xlabel("Steps", fontsize=FONT_SIZE_LABEL)
    ax.set_ylabel("Shares", fontsize=FONT_SIZE_LABEL, rotation=90)
    ax.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax.set_xticks(list(ax.get_xticks()[1:-1]) + [390])

    add_label = False

fig.suptitle("Volumes", fontsize=FONT_SIZE_TITLE_PLOT, y=1)
fig.legend(loc="upper center", ncol=3, fontsize=FONT_SIZE_LEGEND, frameon=False, bbox_to_anchor=(0.5, 0.97))
fig.tight_layout()
# plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/prices.pdf")
plt.show()
plt.close(fig)

# STYLISED FACT

In [None]:
minutes_in_a_day = 6 * 60 + 30

In [None]:
day = 3
assert day > 0, "The first day is the same for real and pred"
assert day < 11, "The sequence is 10-days long"
one_day_price_real = price_real[:, minutes_in_a_day * day : minutes_in_a_day * (day + 1)]
one_day_price_pred = price_pred[:, minutes_in_a_day * day : minutes_in_a_day * (day + 1)]
one_day_price_real.shape, one_day_price_pred.shape

In [None]:
minutely_returns_real = np.diff(np.log(one_day_price_real))
minutely_returns_synthetic = np.diff(np.log(one_day_price_pred))

n_minutes = 15
n_minutely_returns_real = np.diff(np.log(one_day_price_real[:, ::n_minutes]))
n_minutely_returns_synthetic = np.diff(np.log(one_day_price_pred[:, ::n_minutes]))

xlim = (
    min(
        minutely_returns_real.min(),
        minutely_returns_synthetic.min(),
        n_minutely_returns_real.min(),
        n_minutely_returns_synthetic.min(),
    ),
    max(
        minutely_returns_real.max(),
        minutely_returns_synthetic.max(),
        n_minutely_returns_real.max(),
        n_minutely_returns_synthetic.max(),
    ),
)
xlim = (-0.02, 0.02)

## RETURNS DISTRIBUTION

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 9))
axes = axes.ravel()

add_label = True
for ax, stock_name, real, synthetic in zip(axes, stock_names, minutely_returns_real, minutely_returns_synthetic):
    ax.set_xlim(xlim)

    left = min(min(real), min(synthetic))
    right = max(max(real), max(synthetic))
    bins = np.linspace(left, right, 15)

    ax.hist(
        x=[synthetic, real],
        label=["Synthetic", "Real"] if add_label else None,
        color=["C2", "C1"],
        bins=bins,
        density=True,
        log=True,
        histtype="step",
        linewidth=3,
    )

    add_label = False
    ax.set_title(f"{stock_name}", fontsize=FONT_SIZE_TITLE_AX)
    ax.set_xlabel("Minutely Log-Returns", fontsize=FONT_SIZE_LABEL)
    ax.set_ylabel("Density", fontsize=FONT_SIZE_LABEL)
    ax.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)

fig.suptitle("Returns distribution", fontsize=FONT_SIZE_TITLE_PLOT, y=1.0)
fig.legend(loc="upper center", ncol=2, fontsize=FONT_SIZE_LEGEND, bbox_to_anchor=(0.5, 0.97), frameon=False)
fig.tight_layout()
# plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/return_distribution.pdf")
plt.show()
plt.close(fig)

## AGGREGATIONAL GAUSSIANITY

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 9))
axes = axes.ravel()

add_label = True
for ax, stock_name, real, synthetic in zip(axes, stock_names, n_minutely_returns_real, n_minutely_returns_synthetic):
    ax.set_xlim(xlim)

    left = min(min(real), min(synthetic))
    right = max(max(real), max(synthetic))
    bins = np.linspace(left, right, 15)

    ax.hist(
        x=[synthetic, real],
        label=["Synthetic", "Real"] if add_label else None,
        color=["C2", "C1"],
        bins=bins,
        density=True,
        log=True,
        histtype="step",
        linewidth=3,
    )

    add_label = False
    ax.set_title(f"{stock_name}", fontsize=FONT_SIZE_TITLE_AX)
    ax.set_xlabel(f"{n_minutes} Minute Log-Returns", fontsize=FONT_SIZE_LABEL)
    ax.set_ylabel("Density", fontsize=FONT_SIZE_LABEL)
    ax.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)

fig.suptitle("Aggregational Gaussianity", fontsize=FONT_SIZE_TITLE_PLOT, y=1.0)
fig.legend(loc="upper center", ncol=2, fontsize=FONT_SIZE_LEGEND, bbox_to_anchor=(0.5, 0.97), frameon=False)
fig.tight_layout()
# plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/agg_gauss.pdf")
plt.show()
plt.close(fig)

## ABSENCE OF AUTOCORRELATION

In [None]:
from numpy.lib.stride_tricks import sliding_window_view

In [None]:
window_shape = 30
rolled_minutely_returns_real = sliding_window_view(minutely_returns_real, window_shape=window_shape, axis=1)
rolled_minutely_returns_synthetic = sliding_window_view(minutely_returns_synthetic, window_shape=window_shape, axis=1)
rolled_minutely_returns_real.shape, rolled_minutely_returns_synthetic.shape

In [None]:
lag = 1
d = dict()
for stock_name, real, synthetic in zip(stock_names, rolled_minutely_returns_real, rolled_minutely_returns_synthetic):
    # fixed stock
    # real.shape = synthetic.shape = [n_windows, window_size]
    l_real, l_synthetic = list(), list()
    for window_real, window_synthetic in zip(real, synthetic):
        # fixed window
        # window_real.shape = window_real.shape = [window_size]
        autocorr_real = np.corrcoef(window_real[:-lag], window_real[lag:])[0, 1]
        autocorr_synthetic = np.corrcoef(window_synthetic[:-lag], window_synthetic[lag:])[0, 1]
        l_real.append(autocorr_real)
        l_synthetic.append(autocorr_synthetic)
    d[stock_name] = (np.asarray(l_real), np.asarray(l_synthetic))

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 9))
axes = axes.ravel()

add_label = True
for ax, stock_name in zip(axes, stock_names):
    ax.set_xlim((-1, 1))
    real, synthetic = d[stock_name][0], d[stock_name][1]

    left = min(min(real), min(synthetic))
    right = max(max(real), max(synthetic))
    bins = np.linspace(left, right, 15)

    ax.hist(
        x=[synthetic, real],
        label=["Synthetic", "Real"] if add_label else None,
        color=["C2", "C1"],
        bins=bins,
        density=True,
        log=True,
        histtype="step",
        linewidth=3,
    )

    add_label = False
    ax.set_title(f"{stock_name}", fontsize=FONT_SIZE_TITLE_AX)
    ax.set_xlabel("Correlation Coefficient", fontsize=FONT_SIZE_LABEL)
    ax.set_ylabel("Density", fontsize=FONT_SIZE_LABEL)
    ax.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)

fig.suptitle(f"Returns Autocorrelations Window={window_shape}, Lag={lag}", fontsize=FONT_SIZE_TITLE_PLOT, y=1.0)
fig.legend(loc="upper center", ncol=2, fontsize=FONT_SIZE_LEGEND, bbox_to_anchor=(0.5, 0.97), frameon=False)
fig.tight_layout()
# plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/absence_autocorrelation.pdf")
plt.show()
plt.close(fig)

## VOLATILITY CLUSTERING

In [None]:
minutes_in_a_day

In [None]:
minutely_returns_real = np.diff(np.log(price_real))
minutely_returns_synthetic = np.diff(np.log(price_pred))

In [None]:
df_minutely_returns_real = pd.DataFrame(minutely_returns_real.T)
df_minutely_returns_synthetic = pd.DataFrame(minutely_returns_synthetic.T)
df_minutely_returns_real.shape, df_minutely_returns_synthetic.shape

In [None]:
window_size, stride = 30, 1

real_rolling_volatility = df_minutely_returns_real.rolling(window=window_size).std().dropna() / np.sqrt(window_size)
real_rolling_volatility = real_rolling_volatility.values[::stride].T

synthetic_rolling_volatility = df_minutely_returns_synthetic.rolling(window=window_size).std().dropna() / np.sqrt(
    window_size
)
synthetic_rolling_volatility = synthetic_rolling_volatility.values[::stride].T

real_rolling_volatility.shape, synthetic_rolling_volatility.shape

In [None]:
x_indices, y_indices = [k1 for k1 in range(len(stock_names))], [
    k2 for k2 in range(len(stock_names), 2 * len(stock_names))
]

lags = np.asarray(range(1, 6)) * minutes_in_a_day

l_real, l_synthetic = list(), list()
for lag in lags:
    real = np.corrcoef(real_rolling_volatility[:, lag:], real_rolling_volatility[:, :-lag])[x_indices, y_indices]
    l_real.append(real)

    synthetic = np.corrcoef(synthetic_rolling_volatility[:, lag:], synthetic_rolling_volatility[:, :-lag])[
        x_indices, y_indices
    ]
    l_synthetic.append(synthetic)

corrs_real = np.asarray(l_real).T
corrs_synthetic = np.asarray(l_synthetic).T

corrs_real.shape, corrs_synthetic.shape

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 9))
axes = axes.ravel()

add_label = True
for ax, stock_name, real, synthetic in zip(axes, stock_names, corrs_real, corrs_synthetic):
    ax.plot(range(1, 6), real, color="C1", label="Real" if add_label else None)
    ax.plot(range(1, 6), synthetic, color="C2", label="Synthetic" if add_label else None)

    add_label = False
    ax.set_title(f"{stock_name}", fontsize=FONT_SIZE_TITLE_AX)
    ax.set_xlabel("Lag (Days)", fontsize=FONT_SIZE_LABEL)
    ax.set_ylabel("Correlation\nCoefficient", fontsize=FONT_SIZE_LABEL)
    ax.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)


fig.suptitle(f"Volatility clustering Window={window_size}", fontsize=FONT_SIZE_TITLE_PLOT, y=1)
fig.legend(loc="upper center", ncol=2, fontsize=FONT_SIZE_LEGEND, bbox_to_anchor=(0.5, 0.97), frameon=False)
fig.tight_layout()
# plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/volatility_clustering.pdf")
plt.show()
plt.close(fig)

## VOLUME VOLATILITY CORRELATION

In [None]:
price_real.shape, price_pred.shape, volume_real.shape, volume_pred.shape

In [None]:
def compute_avg_log_returns(x, delta):
    # x.shape = [sequence_length, n_stocks]
    x = pd.DataFrame(x)
    x = np.log1p(x.pct_change()[1:])
    x = x.rolling(delta).mean().to_numpy().squeeze()
    x = x[::delta][1:]
    return x.T


def compute_avg_volumes(x, delta):
    # x.shape = [sequence_length, n_stocks]
    x = pd.DataFrame(x)
    x = np.log1p(x)
    x = x.rolling(delta).mean().to_numpy().squeeze()
    x = x[::delta][1:]
    return x.T

In [None]:
real_avg_log_returns = compute_avg_log_returns(price_real.T, 15)
real_avg_volumes = compute_avg_volumes(volume_real.T, 15)

pred_avg_log_returns = compute_avg_log_returns(price_pred.T, 15)
pred_avg_volumes = compute_avg_volumes(volume_pred.T, 15)

In [None]:
real_avg_log_returns.shape, real_avg_volumes.shape, pred_avg_log_returns.shape, pred_avg_volumes.shape

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 9))
axes = axes.ravel()

for target_idx, ax in enumerate(axes):
    stock_name = stock_names[target_idx]

    ax.set_title(stock_name, fontsize=FONT_SIZE_TITLE_AX)
    ax.set_xlabel("Avg log-returns", fontsize=FONT_SIZE_LABEL)
    ax.set_ylabel("Avg log-volumes", fontsize=FONT_SIZE_LABEL)
    ax.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)

    ax.scatter(
        real_avg_log_returns[target_idx],
        real_avg_volumes[target_idx],
        color="orange",
        label="Real" if target_idx == 0 else None,
    )

    ax.scatter(
        pred_avg_log_returns[target_idx],
        pred_avg_volumes[target_idx],
        color="green",
        label="Synthetic" if target_idx == 0 else None,
        alpha=0.5,
    )

fig.suptitle("Volume-Volatility Correlation", fontsize=FONT_SIZE_TITLE_PLOT, y=1.00)
fig.legend(loc="upper center", ncol=2, fontsize=FONT_SIZE_LEGEND, bbox_to_anchor=(0.5, 0.96), frameon=False)
fig.tight_layout()
plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/volume_volatility_correlation.pdf")
# plt.show()
plt.close(fig)