In [None]:
import matplotlib.pyplot as plt
import numpy as np
from numpy.lib.stride_tricks import sliding_window_view
import pandas as pd
import pickle

In [None]:
plt.rcParams["figure.figsize"] = [16, 9]
# plt.rcParams["figure.dpi"] = 300
plt.rcParams["font.size"] = 20
plt.rcParams["axes.labelsize"] = 20
plt.rcParams["axes.titlesize"] = 24
plt.rcParams["xtick.labelsize"] = 16
plt.rcParams["ytick.labelsize"] = 16
plt.rcParams["font.family"] = "serif"

In [None]:
FONT_SIZE_TITLE_PLOT = 48  # 40
FONT_SIZE_TITLE_AX = 36  # 30
FONT_SIZE_LABEL = 30  # 24
FONT_SIZE_TICKS = 24  # 20
FONT_SIZE_LEGEND = 32  # 28

In [None]:
PROJECT_FOLDER = "PycharmProjects/thesis-gan"

In [None]:
stock_names = [
    "AAPL",
    "AMGN",
    "AXP",
    "BA",
    "CAT",
    "CRM",
    "CSCO",
    "CVX",
    "DIS",
    "GE",
    "GS",
    "HD",
    "HON",
    "IBM",
    "INTC",
    "JNJ",
    "JPM",
    "KO",
    "MCD",
    "MMM",
    "MRK",
    "MSFT",
    "NKE",
    "PG",
    "TRV",
    "UNH",
    "V",
    "VZ",
    "WBA",
    "WMT",
]
n_stocks = len(stock_names)

In [None]:
RUN_ID = "1sf7oq6a"
PATH_PICKLE_REAL = f"{PROJECT_FOLDER}/storage/thesis-gan/{RUN_ID}/reals.pickle"
with open(PATH_PICKLE_REAL, "rb") as f:
    real = pickle.load(f)["prices"]
corr_real = np.corrcoef(real)
real.shape, corr_real.shape

In [None]:
CHOSEN_EPOCH = 255
PATH_PICKLE_PRED = f"{PROJECT_FOLDER}/storage/thesis-gan/{RUN_ID}/preds_epoch={CHOSEN_EPOCH}-target_price=mid_price-target_volume=None.pickle"
with open(PATH_PICKLE_PRED, "rb") as f:
    synthetic = pickle.load(f)["pred_prices"]
synthetic.shape

In [None]:
MINUTES1DAY = 6 * 60 + 30

In [None]:
history_indexes = np.arange(MINUTES1DAY)
continuation_indexes = np.arange(MINUTES1DAY, real.shape[1])
history_indexes.shape, continuation_indexes.shape

In [None]:
# fig, axes = plt.subplots(6, 5, figsize=(16, 9))
# axes = axes.ravel()
#
# add_label = True
# for ax, stock_name, real1stock, synthetic1stock in zip(axes, stock_names, real, synthetic):
#     ax.set_title(stock_name, fontsize=FONT_SIZE_TITLE_AX-16)
#     ax.set_xticklabels([])
#     ax.set_yticklabels([])
#     ax.tick_params(bottom=False, left=False)
#
#     ax.plot(history_indexes, real1stock[:MINUTES1DAY], color='C0', label='Observed' if add_label else None)
#     ax.plot(continuation_indexes, real1stock[MINUTES1DAY:], color='C1', label='Real' if add_label else None)
#     ax.plot(continuation_indexes, synthetic1stock[MINUTES1DAY:], color='C2', label='Synthetic' if add_label else None)
#
#     add_label = False
#
# fig.suptitle("Prices", fontsize=FONT_SIZE_TITLE_PLOT-18, y=1)
# fig.legend(loc="upper center", ncol=3, fontsize=FONT_SIZE_LEGEND-8, frameon=False, bbox_to_anchor=(0.5, 0.97))
# plt.tight_layout()
# # plt.savefig(f"{PROJECT_FOLDER}/plot_finali/dowjones/prices.pdf")
# plt.show()
# plt.close(fig)

In [None]:
df_real_corr = pd.DataFrame(real.T, columns=stock_names).corr().round(2)
df_synthetic_corr = pd.DataFrame(synthetic.T, columns=stock_names).corr().round(2)
df_corr_dist = np.power(df_real_corr - df_synthetic_corr, 2)

In [None]:
i = 4
stock_name1, stock_name2 = stock_names[i * 2 : (i * 2) + 2]
df_real_corr[stock_name1][stock_name2], df_synthetic_corr[stock_name1][stock_name2], df_corr_dist[stock_name1][
    stock_name2
]

In [None]:
real1, synthetic1 = real[stock_names.index(stock_name1)], synthetic[stock_names.index(stock_name1)]
real2, synthetic2 = real[stock_names.index(stock_name2)], synthetic[stock_names.index(stock_name2)]
real12, synthetic12 = np.stack((real1, real2)), np.stack((synthetic1, synthetic2))
real12.shape, synthetic12.shape

In [None]:
real12 = np.reshape(real12, newshape=(2, -1, MINUTES1DAY))
synthetic12 = np.reshape(synthetic12, newshape=(2, -1, MINUTES1DAY))
real12.shape, synthetic12.shape

In [None]:
minutely_returns_real = np.diff(np.log(real12)).reshape(2, -1)
minutely_returns_synthetic = np.diff(np.log(synthetic12)).reshape(2, -1)
minutely_returns_real.shape, minutely_returns_synthetic.shape

In [None]:
n_minutes = 15
n_minutely_returns_real = np.diff(np.log(real12[:, :, ::n_minutes])).reshape(2, -1)
n_minutely_returns_synthetic = np.diff(np.log(synthetic12[:, :, ::n_minutes])).reshape(2, -1)
n_minutely_returns_real.shape, n_minutely_returns_synthetic.shape

In [None]:
xlim = (
    min(
        minutely_returns_real.min(),
        minutely_returns_synthetic.min(),
        n_minutely_returns_real.min(),
        n_minutely_returns_synthetic.min(),
    ),
    max(
        minutely_returns_real.max(),
        minutely_returns_synthetic.max(),
        n_minutely_returns_real.max(),
        n_minutely_returns_synthetic.max(),
    ),
)

In [None]:
real12 = real12.reshape(2, -1) / 1e4
synthetic12 = synthetic12.reshape(2, -1) / 1e4

In [None]:
window_shape = MINUTES1DAY
rolled_minutely_returns_real = sliding_window_view(minutely_returns_real, window_shape=window_shape, axis=1)
rolled_minutely_returns_synthetic = sliding_window_view(minutely_returns_synthetic, window_shape=window_shape, axis=1)

In [None]:
lags = [1, 10, 20, 30]
stock_name2lag2autocorr = dict()
for stock_name, real, synthetic in zip(
    [stock_name1, stock_name2], rolled_minutely_returns_real, rolled_minutely_returns_synthetic
):
    lag2autocorr = dict()

    for lag in lags:
        l_real, l_synthetic = list(), list()

        for window_real, window_synthetic in zip(real, synthetic):
            autocorr_real = np.corrcoef(window_real[:-lag], window_real[lag:])[0, 1]
            autocorr_synthetic = np.corrcoef(window_synthetic[:-lag], window_synthetic[lag:])[0, 1]
            l_real.append(autocorr_real)
            l_synthetic.append(autocorr_synthetic)

        lag2autocorr[lag] = (np.asarray(l_real), np.asarray(l_synthetic))

    stock_name2lag2autocorr[stock_name] = lag2autocorr

In [None]:
window_shape = 20
windowed_price_real = sliding_window_view(real12, window_shape=window_shape, axis=-1)
windowed_price_synthetic = sliding_window_view(synthetic12, window_shape=window_shape, axis=-1)
rolled_volatility_real = (windowed_price_real.std(axis=-1) / np.sqrt(window_shape)).reshape(2, -1)
rolled_volatility_synthetic = (windowed_price_synthetic.std(axis=-1) / np.sqrt(window_shape)).reshape(2, -1)

In [None]:
x_indices, y_indices = [k1 for k1 in range(len([stock_name1, stock_name2]))], [
    k2 for k2 in range(len([stock_name1, stock_name2]), 2 * len([stock_name1, stock_name2]))
]

max_days = 11
lags = np.asarray(range(1, max_days)) * MINUTES1DAY

l_real, l_synthetic = list(), list()
for lag in lags:
    real = np.corrcoef(rolled_volatility_real[:, lag:], rolled_volatility_real[:, :-lag])[x_indices, y_indices]
    l_real.append(real)
    synthetic = np.corrcoef(rolled_volatility_synthetic[:, lag:], rolled_volatility_synthetic[:, :-lag])[
        x_indices, y_indices
    ]
    l_synthetic.append(synthetic)

corrs_real = np.asarray(l_real).T
corrs_synthetic = np.asarray(l_synthetic).T

In [None]:
fig, axes = plt.subplots(5, 2, figsize=(16, 24))

for ax in axes.ravel():
    ax.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)

# PRICES
ax_price_s1, ax_price_s2 = axes[0]
ax_price_s1.set_title("Price", fontsize=FONT_SIZE_TITLE_AX)
ax_price_s2.set_title("Price", fontsize=FONT_SIZE_TITLE_AX)
ax_price_s1.plot(history_indexes, real12[0, :MINUTES1DAY], label="Observed")
ax_price_s2.plot(history_indexes, real12[1, :MINUTES1DAY])
ax_price_s1.plot(continuation_indexes, real12[0, MINUTES1DAY:], label="Real")
ax_price_s2.plot(continuation_indexes, real12[1, MINUTES1DAY:])
ax_price_s1.plot(continuation_indexes, synthetic12[0, MINUTES1DAY:], label="Synthetic")
ax_price_s2.plot(continuation_indexes, synthetic12[1, MINUTES1DAY:])
ax_price_s1.axvline(MINUTES1DAY, color="red")
ax_price_s2.axvline(MINUTES1DAY, color="red")
ax_price_s1.set_xlabel("Steps", fontsize=FONT_SIZE_LABEL)
ax_price_s2.set_xlabel("Steps", fontsize=FONT_SIZE_LABEL)


bins = np.linspace(xlim[0], xlim[1], 75)

# RETURN DISTRIBUTION
ax_ret_s1, ax_ret_s2 = axes[1]
ax_ret_s1.set_title("Return 1m", fontsize=FONT_SIZE_TITLE_AX)
ax_ret_s2.set_title("Return 1m", fontsize=FONT_SIZE_TITLE_AX)
ax_ret_s1.hist(
    x=[minutely_returns_synthetic[0], minutely_returns_real[0]],
    color=["C2", "C1"],
    bins=bins,
    density=True,
    log=True,
    histtype="step",
    linewidth=3,
)
ax_ret_s2.hist(
    x=[minutely_returns_synthetic[1], minutely_returns_real[1]],
    color=["C2", "C1"],
    bins=bins,
    density=True,
    log=True,
    histtype="step",
    linewidth=3,
)
ax_ret_s1.set_xlabel("Corr coef", fontsize=FONT_SIZE_LABEL)
ax_ret_s2.set_xlabel("Corr coef", fontsize=FONT_SIZE_LABEL)
ax_ret_s1.set_ylabel("Density", fontsize=FONT_SIZE_LABEL)
ax_ret_s2.set_ylabel("Density", fontsize=FONT_SIZE_LABEL)

# AGGREGATIONAL GAUSSIANITY
ax_agg_s1, ax_agg_s2 = axes[2]
ax_agg_s1.set_title(f"Return {n_minutes}m", fontsize=FONT_SIZE_TITLE_AX)
ax_agg_s2.set_title(f"Return {n_minutes}m", fontsize=FONT_SIZE_TITLE_AX)
ax_agg_s1.hist(
    x=[n_minutely_returns_synthetic[0], n_minutely_returns_real[0]],
    color=["C2", "C1"],
    bins=bins,
    density=True,
    log=True,
    histtype="step",
    linewidth=3,
)
ax_agg_s2.hist(
    x=[n_minutely_returns_synthetic[1], n_minutely_returns_real[1]],
    color=["C2", "C1"],
    bins=bins,
    density=True,
    log=True,
    histtype="step",
    linewidth=3,
)
ax_agg_s1.set_xlabel("Corr coef", fontsize=FONT_SIZE_LABEL)
ax_agg_s2.set_xlabel("Corr coef", fontsize=FONT_SIZE_LABEL)
ax_agg_s1.set_ylabel("Density", fontsize=FONT_SIZE_LABEL)
ax_agg_s2.set_ylabel("Density", fontsize=FONT_SIZE_LABEL)

# AUTOCORRELATION
bins = np.linspace(-1, 1, 50)
lag = 20
ax_auto_s1, ax_auto_s2 = axes[3]
ax_auto_s1.set_xlim((-1, 1))
ax_auto_s2.set_xlim((-1, 1))
ax_auto_s1.set_title(f"Return autocorr", fontsize=FONT_SIZE_TITLE_AX)
ax_auto_s2.set_title(f"Return autocorr", fontsize=FONT_SIZE_TITLE_AX)
ax_auto_s1.hist(
    x=[stock_name2lag2autocorr[stock_name1][lag][1], stock_name2lag2autocorr[stock_name1][lag][0]],
    color=["C2", "C1"],
    bins=bins,
    density=True,
    log=True,
    histtype="step",
    linewidth=3,
)
ax_auto_s2.hist(
    x=[stock_name2lag2autocorr[stock_name2][lag][1], stock_name2lag2autocorr[stock_name2][lag][0]],
    color=["C2", "C1"],
    bins=bins,
    density=True,
    log=True,
    histtype="step",
    linewidth=3,
)
ax_auto_s1.set_xlabel("Corr coef", fontsize=FONT_SIZE_LABEL)
ax_auto_s2.set_xlabel("Corr coef", fontsize=FONT_SIZE_LABEL)
ax_auto_s1.set_ylabel("Density", fontsize=FONT_SIZE_LABEL)
ax_auto_s2.set_ylabel("Density", fontsize=FONT_SIZE_LABEL)

# VOLATILITY CLUSTERING
ax_voclu_s1, ax_voclu_s2 = axes[4]
ax_voclu_s1.set_title(f"Volatility clustering", fontsize=FONT_SIZE_TITLE_AX)
ax_voclu_s2.set_title(f"Volatility clustering", fontsize=FONT_SIZE_TITLE_AX)
ax_voclu_s1.plot(range(1, max_days), corrs_real[0], color="C1", linewidth=3)
ax_voclu_s2.plot(range(1, max_days), corrs_real[1], color="C1", linewidth=3)
ax_voclu_s1.plot(range(1, max_days), corrs_synthetic[0], color="C2", linewidth=3)
ax_voclu_s2.plot(range(1, max_days), corrs_synthetic[1], color="C2", linewidth=3)

ax_voclu_s1.set_xlabel("Lag (days)", fontsize=FONT_SIZE_LABEL)
ax_voclu_s2.set_xlabel("Lag (days)", fontsize=FONT_SIZE_LABEL)
ax_voclu_s1.set_ylabel("Corr coef", fontsize=FONT_SIZE_LABEL)
ax_voclu_s2.set_ylabel("Corr coef", fontsize=FONT_SIZE_LABEL)

fig.suptitle(f"{stock_name1} - {stock_name2}", fontsize=FONT_SIZE_TITLE_PLOT, y=1.0)
fig.legend(loc="upper center", ncol=3, fontsize=FONT_SIZE_LEGEND, bbox_to_anchor=(0.5, 0.99), frameon=False)
fig.tight_layout()
plt.savefig(f"{PROJECT_FOLDER}/plot_finali/dowjones/{stock_name1}_{stock_name2}.pdf")
plt.show()
plt.close(fig)