In [None]:
from research.datasets import Historical
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from research.interfaces import AssetData
import pandas as pd

In [None]:
df = pd.read_csv("samples_100_100.csv", index_col=0)

In [None]:
grouped = (
    df.groupby(["method", "benchmark"])[["backlog", "deficit", "sample", "n_assets"]]
    .mean()
    .reset_index()
)
grouped

In [None]:
sns.barplot(grouped, x="method", y="backlog", hue="method")
plt.suptitle("Backlog Risk")
plt.title("100 Sample Average of 100 Asset Portfolios")
plt.xlabel("Method")
plt.ylabel("Backlog Risk")
plt.savefig("Experiment1.png")

In [None]:
# plt.figure(figsize=(8,5))
for method in ["qp_ceil", "qp_floor", "qp_mid", "two_stage_qp"]:
    sns.kdeplot(df[df["method"] == method], x="backlog", label=method, fill=True)

plt.suptitle("Distribution of Backlog Risk")
plt.title("100 Samples of 100 Asset Portfolios")
plt.xlabel("Backlog")
plt.legend()
plt.xlim(-0.15, 0.15)
plt.savefig("Experiment1b.png")

In [None]:
df[df["method"] == method]["backlog"].quantile(0.99)

In [None]:
df.groupby("method")["backlog"].describe().round(3)

In [None]:
df_list = []
for x in range(1, 9):
    file_name = f"samples_{x*10}.csv"
    df = pd.read_csv(file_name, index_col=0)
    df_list.append(df)

df = pd.concat(df_list)

df = df.reset_index(drop=True)

df

In [None]:
grouped = df.groupby(["n_assets", "method"])[["backlog", "deficit"]].mean().reset_index()

grouped

In [None]:
# plt.figure(figsize=(10,6))
grouped = grouped[grouped["method"] != "qp"]
sns.barplot(grouped, x="n_assets", y="backlog", hue="method")
plt.suptitle("Backlog Risk as Assets Scale")
plt.title("Average Across 30 Samples")
plt.ylabel("Backlog")
plt.xlabel("Number of Assets")
plt.savefig("Experiment2.png")

In [None]:
df = pd.read_csv("budget_100.csv", index_col=0)

df

In [None]:
grouped = df.groupby("budget")["backlog_ratio"].mean().reset_index()

grouped["log_backlog_ratio"] = np.log(grouped["backlog_ratio"])

grouped

In [None]:
plt.suptitle("Log Backlog ratio as Budget Scales")
plt.title("30 Sample Averages for 100 Asset Portfolios")
sns.barplot(grouped, x="budget", y="log_backlog_ratio", color="firebrick")
plt.xlabel("Budget")
plt.ylabel("Log Backlog Ratio")
plt.savefig("Experiment3.png")