In [None]:
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import pickle
import tqdm
from sklearn.metrics import mean_squared_error

In [None]:
plt.rcParams["figure.figsize"] = [16, 9]
# plt.rcParams["figure.dpi"] = 300
plt.rcParams["font.size"] = 20
plt.rcParams["axes.labelsize"] = 20
plt.rcParams["axes.titlesize"] = 24
plt.rcParams["xtick.labelsize"] = 16
plt.rcParams["ytick.labelsize"] = 16
plt.rcParams["font.family"] = "serif"

In [None]:
FONT_SIZE_TITLE_PLOT = 48  # 40
FONT_SIZE_TITLE_AX = 36  # 30
FONT_SIZE_LABEL = 30  # 24
FONT_SIZE_TICKS = 24  # 20
FONT_SIZE_LEGEND = 32  # 28

In [None]:
PROJECT_FOLDER = "PycharmProjects/thesis-gan"

In [None]:
with open(PROJECT_FOLDER + f"/storage/thesis-gan/136a5wci/reals.pickle", "rb") as f:
    volume_real = pickle.load(f)["volumes"]
corr_real = np.corrcoef(volume_real)
corr_real

In [None]:
corr_dist = list()
for i in range(300):
    with open(
        PROJECT_FOLDER
        + f"/storage/thesis-gan/136a5wci/preds_epoch={i}-seed=42-target_price=None-target_volume=volume-sampling_seed=599121577.pickle",
        "rb",
    ) as f:
        synthetic = pickle.load(f)["pred_volumes"]
    corr_synthetic = np.corrcoef(synthetic)
    dist = mean_squared_error(corr_real, corr_synthetic)
    corr_dist.append(dist)
corr_dist = np.asarray(corr_dist)
corr_dist.shape

In [None]:
indexes = np.where((corr_dist < 0.01) & (corr_dist >= 0.005))
print(len(indexes[0]))
indexes
251

In [None]:
df_cross_corr_prices = pd.read_csv(f"{PROJECT_FOLDER}/data/cross_corr_dist_train_prices.csv")
df_cross_corr_prices = df_cross_corr_prices[
    [
        "New data, multistock, prices, conv - train_corr_dist/KO_mid_price-KSU_mid_price",
        "New data, multistock, prices, conv - train_corr_dist/KO_mid_price-NVDA_mid_price",
        "New data, multistock, prices, conv - train_corr_dist/KO_mid_price-NVDA_mid_price",
        "New data, multistock, prices, conv - train_corr_dist/KO_mid_price-PEP_mid_price",
        "New data, multistock, prices, conv - train_corr_dist/NVDA_mid_price-KSU_mid_price",
        "New data, multistock, prices, conv - train_corr_dist/PEP_mid_price-KSU_mid_price",
        "New data, multistock, prices, conv - train_corr_dist/PEP_mid_price-NVDA_mid_price",
    ]
]
cross_corr_distance_price = df_cross_corr_prices.mean(axis=1).values

In [None]:
df_cross_corr_volumes = pd.read_csv(f"{PROJECT_FOLDER}/data/cross_corr_dist_train_volumes.csv")
df_cross_corr_volumes = df_cross_corr_volumes[
    [
        "New data, multistock, volumes, conv - train_corr_dist/KO_volume-KSU_volume",
        "New data, multistock, volumes, conv - train_corr_dist/KO_volume-NVDA_volume",
        "New data, multistock, volumes, conv - train_corr_dist/KO_volume-NVDA_volume",
        "New data, multistock, volumes, conv - train_corr_dist/KO_volume-PEP_volume",
        "New data, multistock, volumes, conv - train_corr_dist/NVDA_volume-KSU_volume",
        "New data, multistock, volumes, conv - train_corr_dist/PEP_volume-KSU_volume",
        "New data, multistock, volumes, conv - train_corr_dist/PEP_volume-NVDA_volume",
    ]
]
cross_corr_distance_volume = df_cross_corr_volumes.mean(axis=1).values

In [None]:
fig = plt.figure(1, figsize=(16, 9))
plt.plot(cross_corr_distance_price[:300], color="C8", label="Price")
plt.plot(cross_corr_distance_volume[:300], color="C9", label="Volume")

plt.xlabel("Epoch", fontsize=FONT_SIZE_LABEL)
plt.ylabel(r"$MSE(\rho(\cdot, \cdot), \rho(\cdot, \cdot))$", fontsize=FONT_SIZE_LABEL, rotation=90)
plt.xticks(fontsize=FONT_SIZE_TICKS)
plt.yticks(fontsize=FONT_SIZE_TICKS)

fig.suptitle("Average Cross-Correlation Distance", fontsize=FONT_SIZE_TITLE_PLOT, y=1)
fig.legend(loc="upper center", ncol=2, fontsize=FONT_SIZE_LEGEND, frameon=False, bbox_to_anchor=(0.5, 0.97))
plt.tight_layout()
plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/cross_corr_dist_train.pdf")
# plt.show()
plt.close(fig)

In [None]:
indexes = np.where((cross_corr_distance_price < 0.01) & (cross_corr_distance_price > 0.003))
indexes

In [None]:
cross_corr_distance_price[indexes]

In [None]:
stock_names = ["KO", "PEP", "NVDA", "KSU"]

In [None]:
df_train = pd.read_csv(f"{PROJECT_FOLDER}/data/midprice_volume__KO_PEP_NVDA_KSU__train.csv", index_col=0)
df_train = df_train[
    [
        "mid_price_KO",
        "mid_price_PEP",
        "mid_price_NVDA",
        "mid_price_KSU",
    ]
]
df_train_corr = df_train.corr()

In [None]:
df_val = pd.read_csv(f"{PROJECT_FOLDER}/data/midprice_volume__KO_PEP_NVDA_KSU__val.csv", index_col=0)
df_val = df_val[
    [
        "mid_price_KO",
        "mid_price_PEP",
        "mid_price_NVDA",
        "mid_price_KSU",
    ]
]
df_val_corr = df_val.corr()

In [None]:
df_test = pd.read_csv(f"{PROJECT_FOLDER}/data/midprice_volume__KO_PEP_NVDA_KSU__test.csv", index_col=0)
df_test = df_test[
    [
        "mid_price_KO",
        "mid_price_PEP",
        "mid_price_NVDA",
        "mid_price_KSU",
    ]
]
df_test_corr = df_test.corr()

In [None]:
df_train.shape, df_val.shape, df_test.shape

In [None]:
fig, axes = plt.subplots(3, 2, figsize=(16, 9))
axes = axes.ravel()

add_label = True
for ax, stock_name in zip(axes[:4], stock_names):
    ax.set_title(stock_name)
    ax.set_xticks([])
    ax.set_yticks([])

    ax.plot(range(68070), df_train[f"mid_price_{stock_name}"].values, label="train" if add_label else None)
    ax.plot(range(68070, 68070 + 9360), df_val[f"mid_price_{stock_name}"].values, label="val" if add_label else None)
    ax.plot(
        range(68070 + 9360, 68070 + 9360 + 17200),
        df_test[f"mid_price_{stock_name}"],
        label="test" if add_label else None,
    )
    add_label = False

axes[4].set_title("PEP")
axes[4].set_xticks([])
axes[4].set_yticks([])
axes[4].plot(range(68070), df_train[f"mid_price_PEP"].values, label="train" if add_label else None)
axes[4].plot(range(68070, 68070 + 9360), df_val[f"mid_price_PEP"].values, label="val" if add_label else None)
axes[4].plot(range(68070 + 9360, 68070 + 9360 + 17200), df_test[f"mid_price_PEP"], label="test" if add_label else None)

axes[5].set_title("KO")
axes[5].set_xticks([])
axes[5].set_yticks([])
axes[5].plot(range(68070), df_train[f"mid_price_KO"].values, label="train" if add_label else None)
axes[5].plot(range(68070, 68070 + 9360), df_val[f"mid_price_KO"].values, label="val" if add_label else None)
axes[5].plot(range(68070 + 9360, 68070 + 9360 + 17200), df_test[f"mid_price_KO"], label="test" if add_label else None)


axes[0].text(
    0.1,
    -0.05,
    round(df_train_corr["mid_price_KO"]["mid_price_NVDA"], 2),
    transform=axes[0].transAxes,
    fontsize=20,
    horizontalalignment="center",
    color="C0",
)
axes[0].text(
    0.5,
    -0.05,
    round(df_val_corr["mid_price_KO"]["mid_price_NVDA"], 2),
    transform=axes[0].transAxes,
    fontsize=20,
    horizontalalignment="center",
    color="C1",
)
axes[0].text(
    0.9,
    -0.05,
    round(df_test_corr["mid_price_KO"]["mid_price_NVDA"], 2),
    transform=axes[0].transAxes,
    fontsize=20,
    horizontalalignment="center",
    color="C2",
)

axes[0].text(
    1.05,
    0.9,
    round(df_train_corr["mid_price_KO"]["mid_price_PEP"], 2),
    transform=axes[0].transAxes,
    fontsize=20,
    verticalalignment="center",
    color="C0",
)
axes[0].text(
    1.05,
    0.5,
    round(df_val_corr["mid_price_KO"]["mid_price_PEP"], 2),
    transform=axes[0].transAxes,
    fontsize=20,
    verticalalignment="center",
    color="C1",
)
axes[0].text(
    1.05,
    0.1,
    round(df_test_corr["mid_price_KO"]["mid_price_PEP"], 2),
    transform=axes[0].transAxes,
    fontsize=20,
    verticalalignment="center",
    color="C2",
)

axes[1].text(
    0.1,
    -0.05,
    round(df_train_corr["mid_price_PEP"]["mid_price_KSU"], 2),
    transform=axes[1].transAxes,
    fontsize=20,
    horizontalalignment="center",
    color="C0",
)
axes[1].text(
    0.5,
    -0.05,
    round(df_val_corr["mid_price_PEP"]["mid_price_KSU"], 2),
    transform=axes[1].transAxes,
    fontsize=20,
    horizontalalignment="center",
    color="C1",
)
axes[1].text(
    0.9,
    -0.05,
    round(df_test_corr["mid_price_PEP"]["mid_price_KSU"], 2),
    transform=axes[1].transAxes,
    fontsize=20,
    horizontalalignment="center",
    color="C2",
)

axes[2].text(
    1.05,
    0.9,
    round(df_train_corr["mid_price_NVDA"]["mid_price_KSU"], 2),
    transform=axes[2].transAxes,
    fontsize=20,
    verticalalignment="center",
    color="C0",
)
axes[2].text(
    1.05,
    0.5,
    round(df_val_corr["mid_price_NVDA"]["mid_price_KSU"], 2),
    transform=axes[2].transAxes,
    fontsize=20,
    verticalalignment="center",
    color="C1",
)
axes[2].text(
    1.05,
    0.1,
    round(df_test_corr["mid_price_NVDA"]["mid_price_KSU"], 2),
    transform=axes[2].transAxes,
    fontsize=20,
    verticalalignment="center",
    color="C2",
)

axes[4].text(
    0.1,
    0.9,
    round(df_train_corr["mid_price_PEP"]["mid_price_NVDA"], 2),
    transform=axes[4].transAxes,
    fontsize=20,
    horizontalalignment="center",
    color="C0",
)
axes[4].text(
    0.5,
    0.9,
    round(df_val_corr["mid_price_PEP"]["mid_price_NVDA"], 2),
    transform=axes[4].transAxes,
    fontsize=20,
    horizontalalignment="center",
    color="C1",
)
axes[4].text(
    0.9,
    0.9,
    round(df_test_corr["mid_price_PEP"]["mid_price_NVDA"], 2),
    transform=axes[4].transAxes,
    fontsize=20,
    horizontalalignment="center",
    color="C2",
)

axes[4].text(
    1.05,
    0.9,
    round(df_train_corr["mid_price_KO"]["mid_price_PEP"], 2),
    transform=axes[4].transAxes,
    fontsize=20,
    verticalalignment="center",
    color="C0",
)
axes[4].text(
    1.05,
    0.5,
    round(df_val_corr["mid_price_KO"]["mid_price_PEP"], 2),
    transform=axes[4].transAxes,
    fontsize=20,
    verticalalignment="center",
    color="C1",
)
axes[4].text(
    1.05,
    0.1,
    round(df_test_corr["mid_price_KO"]["mid_price_PEP"], 2),
    transform=axes[4].transAxes,
    fontsize=20,
    verticalalignment="center",
    color="C2",
)

axes[5].text(
    0.1,
    0.9,
    round(df_train_corr["mid_price_KO"]["mid_price_KSU"], 2),
    transform=axes[5].transAxes,
    fontsize=20,
    horizontalalignment="center",
    color="C0",
)
axes[5].text(
    0.5,
    0.9,
    round(df_val_corr["mid_price_KO"]["mid_price_KSU"], 2),
    transform=axes[5].transAxes,
    fontsize=20,
    horizontalalignment="center",
    color="C1",
)
axes[5].text(
    0.9,
    0.9,
    round(df_test_corr["mid_price_KO"]["mid_price_KSU"], 2),
    transform=axes[5].transAxes,
    fontsize=20,
    horizontalalignment="center",
    color="C2",
)

fig.legend(ncol=3, frameon=False, loc="upper center")
plt.show()
plt.tight_layout()
plt.close(fig)