In [None]:
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import pickle
import tqdm
from sklearn.metrics import mean_squared_error

In [None]:
plt.rcParams["figure.figsize"] = [16, 9]
# plt.rcParams["figure.dpi"] = 300
plt.rcParams["font.size"] = 20
plt.rcParams["axes.labelsize"] = 20
plt.rcParams["axes.titlesize"] = 24
plt.rcParams["xtick.labelsize"] = 16
plt.rcParams["ytick.labelsize"] = 16
plt.rcParams["font.family"] = "serif"

In [None]:
FONT_SIZE_TITLE_PLOT = 48  # 40
FONT_SIZE_TITLE_AX = 36  # 30
FONT_SIZE_LABEL = 30  # 24
FONT_SIZE_TICKS = 24  # 20
FONT_SIZE_LEGEND = 32  # 28

In [None]:
PROJECT_FOLDER = "PycharmProjects/thesis-gan"

In [None]:
df_cross_corr_prices_train = pd.read_csv(f"{PROJECT_FOLDER}/data/cross_corr_dist/cross_corr_dist_price_train.csv")
df_cross_corr_prices_train = df_cross_corr_prices_train[
    [
        "New data, multistock, prices, conv - train_corr_dist/KO_mid_price-KSU_mid_price",
        "New data, multistock, prices, conv - train_corr_dist/KO_mid_price-NVDA_mid_price",
        "New data, multistock, prices, conv - train_corr_dist/KO_mid_price-NVDA_mid_price",
        "New data, multistock, prices, conv - train_corr_dist/KO_mid_price-PEP_mid_price",
        "New data, multistock, prices, conv - train_corr_dist/NVDA_mid_price-KSU_mid_price",
        "New data, multistock, prices, conv - train_corr_dist/PEP_mid_price-KSU_mid_price",
        "New data, multistock, prices, conv - train_corr_dist/PEP_mid_price-NVDA_mid_price",
    ]
]
cross_corr_distance_price_train = df_cross_corr_prices_train.mean(axis=1).values

In [None]:
df_cross_corr_prices_val = pd.read_csv(f"{PROJECT_FOLDER}/data/cross_corr_dist/cross_corr_dist_price_val.csv")
df_cross_corr_prices_val = df_cross_corr_prices_val[
    [
        "New data, multistock, prices, conv - val_corr_dist/KO_mid_price-KSU_mid_price",
        "New data, multistock, prices, conv - val_corr_dist/KO_mid_price-NVDA_mid_price",
        "New data, multistock, prices, conv - val_corr_dist/KO_mid_price-NVDA_mid_price",
        "New data, multistock, prices, conv - val_corr_dist/KO_mid_price-PEP_mid_price",
        "New data, multistock, prices, conv - val_corr_dist/NVDA_mid_price-KSU_mid_price",
        "New data, multistock, prices, conv - val_corr_dist/PEP_mid_price-KSU_mid_price",
        "New data, multistock, prices, conv - val_corr_dist/PEP_mid_price-NVDA_mid_price",
    ]
]
cross_corr_distance_price_val = df_cross_corr_prices_val.mean(axis=1).values

In [None]:
df_cross_corr_volumes_train = pd.read_csv(f"{PROJECT_FOLDER}/data/cross_corr_dist/cross_corr_dist_volume_train.csv")
df_cross_corr_volumes_train = df_cross_corr_volumes_train[
    [
        "New data, multistock, volumes, conv - train_corr_dist/KO_volume-KSU_volume",
        "New data, multistock, volumes, conv - train_corr_dist/KO_volume-NVDA_volume",
        "New data, multistock, volumes, conv - train_corr_dist/KO_volume-NVDA_volume",
        "New data, multistock, volumes, conv - train_corr_dist/KO_volume-PEP_volume",
        "New data, multistock, volumes, conv - train_corr_dist/NVDA_volume-KSU_volume",
        "New data, multistock, volumes, conv - train_corr_dist/PEP_volume-KSU_volume",
        "New data, multistock, volumes, conv - train_corr_dist/PEP_volume-NVDA_volume",
    ]
]
cross_corr_distance_volume_train = df_cross_corr_volumes_train.mean(axis=1).values

In [None]:
df_cross_corr_volumes_val = pd.read_csv(f"{PROJECT_FOLDER}/data/cross_corr_dist/cross_corr_dist_volume_val.csv")
df_cross_corr_volumes_val = df_cross_corr_volumes_val[
    [
        "New data, multistock, volumes, conv - val_corr_dist/KO_volume-KSU_volume",
        "New data, multistock, volumes, conv - val_corr_dist/KO_volume-NVDA_volume",
        "New data, multistock, volumes, conv - val_corr_dist/KO_volume-NVDA_volume",
        "New data, multistock, volumes, conv - val_corr_dist/KO_volume-PEP_volume",
        "New data, multistock, volumes, conv - val_corr_dist/NVDA_volume-KSU_volume",
        "New data, multistock, volumes, conv - val_corr_dist/PEP_volume-KSU_volume",
        "New data, multistock, volumes, conv - val_corr_dist/PEP_volume-NVDA_volume",
    ]
]
cross_corr_distance_volume_val = df_cross_corr_volumes_val.mean(axis=1).values

In [None]:
MAX_EPOCHS = 150

In [None]:
fig = plt.figure(1, figsize=(16, 9))
plt.plot(cross_corr_distance_price_train[:MAX_EPOCHS], color="C8", label="Price")
plt.plot(cross_corr_distance_volume_train[:MAX_EPOCHS], color="C9", label="Volume")

plt.xlabel("Epoch", fontsize=FONT_SIZE_LABEL)
plt.ylabel(r"$MSE(\rho(\cdot, \cdot), \rho(\cdot, \cdot))$", fontsize=FONT_SIZE_LABEL, rotation=90)
plt.xticks(fontsize=FONT_SIZE_TICKS)
plt.yticks(fontsize=FONT_SIZE_TICKS)

fig.suptitle("Cross-Correlation Distance - Train", fontsize=FONT_SIZE_TITLE_PLOT, y=1)
fig.legend(loc="upper center", ncol=2, fontsize=FONT_SIZE_LEGEND, frameon=False, bbox_to_anchor=(0.5, 0.97))
plt.tight_layout()
plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/corr_dist/cross_corr_dist_train.pdf")
# plt.show()
plt.close(fig)

In [None]:
fig = plt.figure(1, figsize=(16, 9))
plt.plot(cross_corr_distance_price_val[:MAX_EPOCHS], color="C8", label="Price")
plt.plot(cross_corr_distance_volume_val[:MAX_EPOCHS], color="C9", label="Volume")

plt.xlabel("Epoch", fontsize=FONT_SIZE_LABEL)
plt.ylabel(r"$MSE(\rho(\cdot, \cdot), \rho(\cdot, \cdot))$", fontsize=FONT_SIZE_LABEL, rotation=90)
plt.xticks(fontsize=FONT_SIZE_TICKS)
plt.yticks(fontsize=FONT_SIZE_TICKS)

fig.suptitle("Cross-Correlation Distance - Validation", fontsize=FONT_SIZE_TITLE_PLOT, y=1)
fig.legend(loc="upper center", ncol=2, fontsize=FONT_SIZE_LEGEND, frameon=False, bbox_to_anchor=(0.5, 0.97))
plt.tight_layout()
plt.savefig(f"{PROJECT_FOLDER}/plot_finali/multistock/corr_dist/cross_corr_dist_val.pdf")
# plt.show()
plt.close(fig)

In [None]:
df_val = pd.read_csv(f"{PROJECT_FOLDER}/data/midprice_volume__KO_PEP_NVDA_KSU__val.csv")
df_val.corr()