In [None]:
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
import pickle
from matplotlib.lines import Line2D
import scipy.stats as scs
from itertools import combinations

In [None]:
!pwd

In [None]:
plt.rcParams["figure.figsize"] = [16, 9]
# plt.rcParams["figure.dpi"] = 300
plt.rcParams["font.size"] = 20
plt.rcParams["axes.labelsize"] = 20
plt.rcParams["axes.titlesize"] = 24
plt.rcParams["xtick.labelsize"] = 16
plt.rcParams["ytick.labelsize"] = 16
plt.rcParams["font.family"] = "serif"

In [None]:
PROJECT_FOLDER = "PycharmProjects/thesis-gan"

In [None]:
stock_names = ["KO", "PEP", "NVDA", "KSU"]

In [None]:
RUN_ID_PRICE = "iksf94ne"
EPOCH_PRICE = 260
PATH_PICKLE_REAL_PRICE = f"{PROJECT_FOLDER}/storage/thesis-gan/{RUN_ID_PRICE}/reals.pickle"
PATH_PICKLE_PRED_PRICE = f"{PROJECT_FOLDER}/storage/thesis-gan/{RUN_ID_PRICE}/preds_epoch={EPOCH_PRICE}-target_price=mid_price-target_volume=None.pickle"

In [None]:
RUN_ID_VOLUME = "3ogpusxf"
EPOCH_VOLUME = 71
PATH_PICKLE_REAL_VOLUME = f"{PROJECT_FOLDER}/storage/thesis-gan/{RUN_ID_VOLUME}/reals.pickle"
PATH_PICKLE_PRED_VOLUME = f"{PROJECT_FOLDER}/storage/thesis-gan/{RUN_ID_VOLUME}/preds_epoch={EPOCH_VOLUME}-target_price=None-target_volume=volume.pickle"

In [None]:
with open(PATH_PICKLE_REAL_PRICE, "rb") as handle:
    real_price_dict = pickle.load(handle)
with open(PATH_PICKLE_PRED_PRICE, "rb") as handle:
    pred_price_dict = pickle.load(handle)

In [None]:
with open(PATH_PICKLE_REAL_VOLUME, "rb") as handle:
    real_volume_dict = pickle.load(handle)
with open(PATH_PICKLE_PRED_VOLUME, "rb") as handle:
    pred_volume_dict = pickle.load(handle)

In [None]:
real_price_dict.keys(), pred_price_dict.keys()

In [None]:
real_volume_dict.keys(), pred_volume_dict.keys()

In [None]:
x = real_price_dict["x"]
x_hat = pred_price_dict["x_hat"]
x.shape, x_hat.shape

In [None]:
price_real = real_price_dict["prices"]
price_pred = pred_price_dict["pred_prices"]
price_real.shape, price_pred.shape

In [None]:
volume_real = real_volume_dict["volumes"]
volume_pred = pred_volume_dict["pred_volumes"]
volume_real.shape, volume_pred.shape

In [None]:
history_indexes = np.arange(390)
continuation_indexes = np.arange(390, price_real.shape[1])
history_indexes.shape, continuation_indexes.shape

In [None]:
price_real.shape, price_pred.shape

In [None]:
price_real_ = price_real / 10000
price_pred_ = price_pred / 10000
price_real_[0, 1000], price_pred_[0, 1000]

In [None]:
FONT_SIZE_TITLE_PLOT = 48  # 40
FONT_SIZE_TITLE_AX = 36  # 30
FONT_SIZE_LABEL = 30  # 24
FONT_SIZE_TICKS = 24  # 20
FONT_SIZE_LEGEND = 32  # 28

# PRICES

In [None]:
fig, axes = plt.subplots(2, 2)
legend_elements = [
    Line2D([0], [0], color="C0", lw=2, label="Observed"),
    Line2D([0], [0], color="C1", lw=2, label="Real continuation"),
    Line2D([0], [0], color="C2", lw=2, label="Synthetic continuation"),
]

for i in range(2):
    for j in range(2):
        linear_index = i * 2 + j
        axes[i, j].set_title(f"{stock_names[linear_index]}", fontsize=FONT_SIZE_TITLE_AX)
        axes[i, j].set_xlabel("Steps", fontsize=FONT_SIZE_LABEL)
        # axes[i, j].set_ylabel('$', fontsize=FONT_SIZE_LABEL, rotation=0)
        axes[i, j].xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
        axes[i, j].yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)

        axes[i, j].plot(
            history_indexes,
            price_real_[linear_index, :390],
            color="C0",
        )
        axes[i, j].plot(
            continuation_indexes,
            price_real_[linear_index, 390:],
            color="C1",
        )
        axes[i, j].plot(
            continuation_indexes,
            price_pred_[linear_index, 390:],
            color="C2",
        )

        axes[i, j].axvline(x=390, color="r")
        axes[i, j].set_xticks(list(axes[i, j].get_xticks()[1:-1]) + [390])

fig.suptitle("Prices", fontsize=FONT_SIZE_TITLE_PLOT, y=1)
fig.legend(
    handles=legend_elements,
    loc="upper center",
    ncol=3,
    fontsize=FONT_SIZE_LEGEND,
    frameon=False,
    bbox_to_anchor=(0.5, 0.97),
)
fig.tight_layout(rect=[0, -0.03, 1, 1])
# plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/prices.pdf")
plt.show()
plt.close(fig)

# Couples

In [None]:
corr_real = np.corrcoef(price_real)
corr_pred = np.corrcoef(price_pred)
corr_real, corr_pred

In [None]:
for stock_name_1, stock_name_2 in combinations(stock_names, 2):
    fig, axes = plt.subplots(2, 2)
    legend_elements = [
        Line2D([0], [0], color="C0", lw=2, label="Observed"),
        Line2D([0], [0], color="C1", lw=2, label="Real continuation"),
        Line2D([0], [0], color="C2", lw=2, label="Synthetic continuation"),
    ]

    stock_index_1, stock_index_2 = stock_names.index(stock_name_1), stock_names.index(stock_name_2)
    corr_real_12, corr_pred_12 = corr_real[stock_index_1][stock_index_2], corr_pred[stock_index_1][stock_index_2]

    price_real_1, price_real_2 = price_real[stock_index_1], price_real[stock_index_2]

    history_indexes = np.arange(390)
    continuation_indexes = np.arange(390, price_real.shape[1])

    history_1, history_2 = price_real[stock_index_1, :390], price_real[stock_index_2, :390]
    continuation_real_1, continuation_real_2 = price_real[stock_index_1, 390:], price_real[stock_index_2, 390:]
    continuation_pred_1, continuation_pred_2 = price_pred[stock_index_1, 390:], price_pred[stock_index_2, 390:]

    # Titles
    axes[0, 0].set_title(f"{stock_name_1} - Real", fontsize=FONT_SIZE_TITLE_AX)
    axes[0, 1].set_title(f"{stock_name_2} - Real", fontsize=FONT_SIZE_TITLE_AX)
    axes[1, 0].set_title(f"{stock_name_1} - Synthetic", fontsize=FONT_SIZE_TITLE_AX)
    axes[1, 1].set_title(f"{stock_name_2} - Synthetic", fontsize=FONT_SIZE_TITLE_AX)

    # Histories
    axes[0, 0].plot(history_indexes, history_1, color="C0")
    axes[0, 1].plot(history_indexes, history_2, color="C0")
    axes[1, 0].plot(history_indexes, history_1, color="C0")
    axes[1, 1].plot(history_indexes, history_2, color="C0")

    # Reals
    axes[0, 0].plot(continuation_indexes, continuation_real_1, color="C1")
    axes[0, 1].plot(continuation_indexes, continuation_real_2, color="C1")

    # Synthetic
    axes[1, 0].plot(continuation_indexes, continuation_pred_1, color="C2")
    axes[1, 1].plot(continuation_indexes, continuation_pred_2, color="C2")

    # y_lim
    axes[0, 0].set_ylim(
        [min(price_real_1.min(), continuation_pred_1.min()), max(price_real_1.max(), continuation_pred_1.max())]
    )
    axes[1, 0].set_ylim(
        [min(price_real_1.min(), continuation_pred_1.min()), max(price_real_1.max(), continuation_pred_1.max())]
    )
    axes[0, 1].set_ylim(
        [min(price_real_2.min(), continuation_pred_2.min()), max(price_real_2.max(), continuation_pred_2.max())]
    )
    axes[1, 1].set_ylim(
        [min(price_real_2.min(), continuation_pred_2.min()), max(price_real_2.max(), continuation_pred_2.max())]
    )

    for axs in axes:
        for ax in axs:
            ax.set_yticklabels([])
            ax.set_xticklabels([])

    axes[0, 1].set_ylabel(
        round(corr_real_12, 2), rotation="horizontal", horizontalalignment="right", fontsize=FONT_SIZE_LABEL
    )
    axes[1, 1].set_ylabel(
        round(corr_pred_12, 2), rotation="horizontal", horizontalalignment="right", fontsize=FONT_SIZE_LABEL
    )

    fig.legend(
        handles=legend_elements,
        loc="upper center",
        ncol=3,
        fontsize=FONT_SIZE_LEGEND,
        bbox_to_anchor=(0.5, 0.97),
        frameon=False,
    )
    fig.suptitle("Prices", fontsize=FONT_SIZE_TITLE_PLOT, y=1.0)
    fig.tight_layout()
    plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/couples/{stock_name_1}-{stock_name_2}.pdf")
    # plt.show()
    plt.close(fig)

# VOLUMES

In [None]:
fig, axes = plt.subplots(2, 2)
legend_elements = [
    Line2D([0], [0], color="C0", lw=2, label="Observed"),
    Line2D([0], [0], color="C1", lw=2, label="Real continuation"),
    Line2D([0], [0], color="C2", lw=2, label="Synthetic continuation"),
]

for i in range(2):
    for j in range(2):
        linear_index = i * 2 + j
        axes[i, j].set_title(f"{stock_names[linear_index]}", fontsize=FONT_SIZE_TITLE_AX)
        axes[i, j].set_xlabel("Steps", fontsize=FONT_SIZE_LABEL)
        axes[i, j].set_ylabel("Shares", fontsize=FONT_SIZE_LABEL)
        axes[i, j].xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
        axes[i, j].yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)

        axes[i, j].plot(
            history_indexes,
            volume_real[linear_index, :390],
            color="C0",
        )
        axes[i, j].plot(
            continuation_indexes,
            volume_real[linear_index, 390:],
            color="C1",
        )
        axes[i, j].plot(continuation_indexes, volume_pred[linear_index, 390:], color="C2", alpha=0.7)

        axes[i, j].axvline(x=390, color="r")
        axes[i, j].set_xticks(list(axes[i, j].get_xticks()[1:-1]) + [390])

fig.suptitle("Volumes", fontsize=FONT_SIZE_TITLE_PLOT, y=1.00)
fig.legend(
    handles=legend_elements,
    loc="upper center",
    ncol=3,
    fontsize=FONT_SIZE_LEGEND,
    bbox_to_anchor=(0.5, 0.96),
    frameon=False,
)
fig.tight_layout(rect=[0, -0.03, 1, 1])
# plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/volumes.pdf")
plt.show()
plt.close(fig)

# STYLISED FACT

In [None]:
4300 / 60

In [None]:
class StylisedFacts:
    def __init__(self, stock_names, price_real=None, price_synthetic=None, volume_real=None, volume_pred=None):
        self.stock_names = stock_names
        self.price_real = price_real
        self.price_synthetic = price_synthetic
        self.volume_real = volume_real
        self.volume_pred = volume_pred

    @staticmethod
    def __compute_log_returns(data):
        return np.diff(np.log(data))

    @staticmethod
    def __compute_rolling_autocorrelation(data, window, lag):
        l = list()
        for series in data:
            l.append(
                pd.Series(series)
                .rolling(window, center=True)
                .apply(lambda x: x.autocorr(lag=lag), raw=False)
                .dropna()
                .values
            )
        return np.asarray(l)

    @staticmethod
    def __compute_autocorrelation(data, lags):
        l = list()
        for series in data:
            l.append([pd.Series(series).autocorr(lag) for lag in range(1, lags + 1)])
        return np.asarray(l)

    def fat_tailed_distribution(self, log=False, n_bins=75):
        assert self.price_real is not None and self.price_synthetic is not None
        minutely_log_returns_real = self.__compute_log_returns(self.price_real)
        minutely_log_returns_pred = self.__compute_log_returns(self.price_synthetic)
        self.__hist(
            minutely_log_returns_real,
            minutely_log_returns_pred,
            title="Minutely log-returns",
            xlabel="Returns",
            log=log,
            n_bins=n_bins,
        )

    def aggregation_normality(self, n_minutes=10, log=False, n_bins=75):
        assert self.price_real is not None and self.price_synthetic is not None
        log_returns_real = self.__compute_log_returns(self.price_real[:, ::n_minutes])
        log_returns_pred = self.__compute_log_returns(self.price_synthetic[:, ::n_minutes])
        self.__hist(
            log_returns_real,
            log_returns_pred,
            title=f"{n_minutes} Minutes log-returns",
            xlabel="Returns",
            log=log,
            n_bins=n_bins,
        )

    def absence_autocorrelation(self, lag, window):
        assert self.price_real is not None and self.price_synthetic is not None
        minutely_log_returns_real = self.__compute_log_returns(self.price_real)
        minutely_log_returns_pred = self.__compute_log_returns(self.price_synthetic)

        autocorelation_real = self.__compute_rolling_autocorrelation(minutely_log_returns_real, window, lag)
        autocorelation_pred = self.__compute_rolling_autocorrelation(minutely_log_returns_pred, window, lag)

        self.__hist(
            autocorelation_real,
            autocorelation_pred,
            title="Autocorrelation (lag={}, window={})".format(lag, window),
            xlabel="Correlation coefficient",
            log=False,
        )

    def volatility_clustering(self, lags):
        assert self.price_real is not None and self.price_synthetic is not None
        minutely_log_returns_real = self.__compute_log_returns(self.price_real)
        minutely_log_returns_pred = self.__compute_log_returns(self.price_synthetic)

        autocorelations_real = self.__compute_autocorrelation(minutely_log_returns_real, lags)
        autocorelations_pred = self.__compute_autocorrelation(minutely_log_returns_pred, lags)

        self.__line(
            autocorelations_real,
            autocorelations_pred,
            "Volatility Clustering/Long Range Dependence",
            "Lag",
            "Correlation\ncoefficient",
        )

    def volume_volatility_correlation(
        self,
    ):
        assert (
            self.price_real is not None
            and self.price_synthetic is not None
            and self.volume_real is not None
            and self.volume_pred is not None
        )

        minutely_log_returns_real = self.__compute_log_returns(self.price_real)
        minutely_log_returns_pred = self.__compute_log_returns(self.price_synthetic)

        volatility_real = abs(minutely_log_returns_real)
        volatility_pred = abs(minutely_log_returns_pred)

        volume_real = self.volume_real[:, 1:]
        volume_pred = self.volume_pred[:, 1:]

        corr_real = np.corrcoef(volume_real, volatility_real)
        corr_pred = np.corrcoef(volume_pred, volatility_pred)

        x_indices, y_indices = [k1 for k1 in range(len(stock_names))], [
            k2 for k2 in range(len(stock_names), 2 * len(stock_names))
        ]

        corr_real = corr_real[x_indices, y_indices]
        corr_pred = corr_pred[x_indices, y_indices]

        print(corr_real.shape)

        self.__hist(corr_real, corr_pred, title="Volume/Volatility Correlation", xlabel="Correlation coefficient")

    @staticmethod
    def __hist(data_real, data_synthetic, title="Simulation data histogram", xlabel="Values", log=False, n_bins=75):
        fig, axes = plt.subplots(2, 2, figsize=(16, 9))
        add_label = True
        for i, ax in enumerate(axes.ravel()):
            stock_name, real, synthetic = stock_names[i], data_real[i], data_synthetic[i]
            left = min(real)
            right = max(real)
            bins = np.linspace(left, right, n_bins)
            ax.hist(
                real,
                bins=bins,
                color="C1",
                log=log,
                alpha=1,
                histtype="step",
                linewidth=3,
                label="Real" if add_label else None,
            )
            ax.hist(
                synthetic,
                bins=bins,
                color="C2",
                log=log,
                alpha=1,
                histtype="step",
                linewidth=3,
                label="Synthetic" if add_label else None,
            )
            ax.set_title(stock_name, fontsize=FONT_SIZE_TITLE_AX)
            ax.set_xlabel(xlabel, fontsize=FONT_SIZE_LABEL)
            ax.set_ylabel(("Log\n" if log else "") + "Frequency", fontsize=FONT_SIZE_LABEL)
            ax.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
            ax.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
            add_label = False

        fig.suptitle(title, y=1, fontsize=FONT_SIZE_TITLE_PLOT)
        fig.legend(loc="upper center", ncol=3, fontsize=FONT_SIZE_LEGEND, frameon=False, bbox_to_anchor=(0.5, 0.96))
        plt.tight_layout()
        plt.show()
        plt.close(fig)

    @staticmethod
    def __line(data_real, data_synthetic, title="Simulation data line", xlabel="X", ylabel="Y"):
        fig, axes = plt.subplots(2, 2, figsize=(16, 9))
        x = np.arange(data_real.shape[1]) + 1

        add_label = True
        for i, ax in enumerate(axes.ravel()):
            stock_name, real, synthetic = stock_names[i], data_real[i], data_synthetic[i]

            ax.plot(x, real, color="C1", linewidth=4, label="Real" if add_label else None)
            ax.plot(x, synthetic, color="C2", linewidth=4, label="Synthetic" if add_label else None)
            ax.set_xticks(x)

            ax.set_title(stock_name, fontsize=FONT_SIZE_TITLE_AX)
            ax.set_xlabel(xlabel, fontsize=FONT_SIZE_LABEL)
            ax.set_ylabel(ylabel, fontsize=FONT_SIZE_LABEL)
            ax.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
            ax.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
            add_label = False

        fig.suptitle(title, y=1, fontsize=FONT_SIZE_TITLE_PLOT)
        fig.legend(loc="upper center", ncol=3, fontsize=FONT_SIZE_LEGEND, frameon=False, bbox_to_anchor=(0.5, 0.96))
        plt.tight_layout()
        plt.show()
        plt.close(fig)

In [None]:
sfs = StylisedFacts(
    stock_names, price_real=price_real, price_synthetic=price_pred, volume_real=volume_real, volume_pred=volume_pred
)

In [None]:
sfs.volatility_clustering(lags=10)

In [None]:
sfs.absence_autocorrelation(1, 30)

In [None]:
sfs.fat_tailed_distribution(log=True, n_bins=75)

In [None]:
sfs.aggregation_normality(30, log=True, n_bins=75)

## VOLUME VOLATILITY CORRELATION

In [None]:
price_real.shape, price_pred.shape, volume_real.shape, volume_pred.shape

In [None]:
def compute_avg_log_returns(x, delta):
    # x.shape = [sequence_length, n_stocks]
    x = pd.DataFrame(x)
    x = np.log1p(x.pct_change()[1:])
    x = x.rolling(delta).mean().to_numpy().squeeze()
    x = x[::delta][1:]
    return x.T


def compute_avg_volumes(x, delta):
    # x.shape = [sequence_length, n_stocks]
    x = pd.DataFrame(x)
    x = np.log1p(x)
    x = x.rolling(delta).mean().to_numpy().squeeze()
    x = x[::delta][1:]
    return x.T

In [None]:
real_avg_log_returns = compute_avg_log_returns(price_real.T, 15)
real_avg_volumes = compute_avg_volumes(volume_real.T, 15)

pred_avg_log_returns = compute_avg_log_returns(price_pred.T, 15)
pred_avg_volumes = compute_avg_volumes(volume_pred.T, 15)

In [None]:
real_avg_log_returns.shape, real_avg_volumes.shape, pred_avg_log_returns.shape, pred_avg_volumes.shape

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 9))
axes = axes.ravel()

for target_idx, ax in enumerate(axes):
    stock_name = stock_names[target_idx]

    ax.set_title(stock_name, fontsize=FONT_SIZE_TITLE_AX)
    ax.set_xlabel("Avg log-returns", fontsize=FONT_SIZE_LABEL)
    ax.set_ylabel("Avg\nlog-volumes", fontsize=FONT_SIZE_LABEL)
    ax.xaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)
    ax.yaxis.set_tick_params(labelsize=FONT_SIZE_TICKS)

    ax.scatter(
        real_avg_log_returns[target_idx],
        real_avg_volumes[target_idx],
        color="orange",
        label="Real" if target_idx == 0 else None,
    )

    ax.scatter(
        pred_avg_log_returns[target_idx],
        pred_avg_volumes[target_idx],
        color="green",
        label="Synthetic" if target_idx == 0 else None,
        alpha=0.5,
    )

fig.suptitle("Volume-Volatility Correlation", fontsize=FONT_SIZE_TITLE_PLOT, y=1.00)
fig.legend(loc="upper center", ncol=2, fontsize=FONT_SIZE_LEGEND, bbox_to_anchor=(0.5, 0.96), frameon=False)
fig.tight_layout()
# plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/volume_volatility_correlation.pdf")
plt.show()
plt.close(fig)