In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
print(f"Old working dir {os.getcwd()}")
os.chdir('../')
print(f"New working dir {os.getcwd()}")

In [None]:
import numpy as np
from scipy.stats import norm, multivariate_normal
import torch
from torch.utils.data import TensorDataset, DataLoader

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline


In [None]:
from conformal.real_datasets.reproducible_split import get_dataset_split
from conformal.classes.method_desc import ConformalMethodDescription
from conformal.score_calculators import CVQRegressor, CVQRegressorRF, CVQRegressorY, CVQRegressorYRF

In [None]:
alpha = 0.3
scale = 0.1
n = 10000
rng = np.random.default_rng(31337)
x = np.linspace(0, 1, n)
y_true =  x ** 1.6
y = y_true + rng.normal(scale=scale, size=n)
interval_1a = norm.interval(1 - alpha, loc=0, scale=scale)

In [None]:
plt.plot(x, y, label=r"Data $y=f(x)+\epsilon$", alpha=0.5)
plt.plot(x, y_true, "g", label=rf'$y=f(x) + {int((1 - alpha) * 100)}\%$')
plt.fill_between(x, y_true + interval_1a[0], y_true + interval_1a[1], color="g", alpha=0.3)
#plt.plot(x, x, "k--", label=r'$y=x$')
plt.legend()

In [None]:
reg = CVQRegressor(
    feature_dimension=1,
    response_dimension=1,
    hidden_dimension=8,
    number_of_hidden_layers=4,
    batch_size=512,
    n_epochs=150,
    learning_rate=0.01,
    dtype=torch.float32,
    betas=(0.5, 0.5),
    weight_decay=1e-4,
    warmup_iterations=5
)
fn_model = "cvqregressor_for_1d_check_hpd3.pth"

In [None]:
if os.path.isfile(fn_model):
    reg.model.load(fn_model)
else:
    reg.fit(x.reshape(-1, 1), y.reshape(-1, 1))
    reg.model.eval()
    reg.model.save(fn_model)

In [None]:
#reg.model.eval()

In [None]:
y_pred = reg.predict_mean(x.reshape(-1, 1))
interval_99 = norm.interval(1 - alpha, loc=0, scale=1)
y_pred_low = reg.predict_inverse_quantile(x.reshape(-1, 1), np.repeat(interval_99[0], repeats=n, axis=0).reshape(-1, 1))
y_pred_high = reg.predict_inverse_quantile(x.reshape(-1, 1), np.repeat(interval_99[1], repeats=n, axis=0).reshape(-1, 1))

In [None]:
y_pred_low.shape, y_pred_high.shape

In [None]:
y_pred_low[:5]

In [None]:
plt.plot(x, y, label=r"Data $y=f(x)+\epsilon$", alpha=0.5)
plt.plot(x, y_true, "g", label=rf'$y=f(x) + {int((1 - alpha) * 100)}\%$')
plt.plot(x, y_pred, "r", label=rf'$y=\hat{{f}}(x) + {int((1 - alpha) * 100)}\%$')
plt.fill_between(x, y_true + interval_1a[0], y_true + interval_1a[1], color="g", alpha=0.3)
plt.fill_between(x, y_pred_low[:, 0], y_pred_high[:, 0], color="r", alpha=0.3)
#plt.plot(x, x, "k--", label=r'$y=x$')
plt.legend()

In [None]:
quantiles = reg.predict_quantile(x.reshape(-1, 1), y.reshape(-1, 1))

In [None]:
t = np.linspace(-4, 4, 1000)
sns.histplot(quantiles, kde=True, stat="density")
plt.plot(t, norm.pdf(t), "k--")

In [None]:
# Look at log density at the point x = 0.5
x0 = 0.9 * np.ones((1000, 1))
#u_sample = rng.random.normal(size=1000)
scores_smaples = reg.calculate_scores(x0.reshape(-1, 1), t.reshape(-1, 1))

In [None]:
plt.plot(t, np.exp(scores_smaples["Log Density"]), label="Log Density Estimate") #scores["Log Density"]
plt.plot(t, norm.pdf(t, loc=x0[0]**1.6, scale=scale), "k--", label="True Density")
plt.legend()
plt.show()

In [None]:
import os
username = ""
os.chdir('/home/{username}/repos/conditional_quantile_function')

from argparse import Namespace
from pathlib import Path

import numpy as np
from scipy.stats import norm, multivariate_normal
import torch
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from pushforward_operators import AmortizedNeuralQuantileRegression
from conformal.real_datasets.reproducible_split import get_dataset_split
from conformal.score_calculators import CVQRegressorRF, CVQRegressorY, CVQRegressor, CPFlowRegressor
from conformal.classes.conformalizers import QuantileEstimatePredictor, SplitConformalPredictor


In [None]:
ds_sgemm_a = get_dataset_split("rf1", seed=1239)
ds_sgemm_b = get_dataset_split("rf1", seed=1239)

In [None]:
ds_sgemm_a.X_cal[0, 0], ds_sgemm_b.X_cal[0, 0]

In [None]:
#Multiple dimensions

seed = 0
dataset = "scm20d"
ds = get_dataset_split(dataset, seed=seed)
args = Namespace(
    dataset=dataset,
    seed=seed,
    n_cpus=8,
)

#model_cpflow = CPFlowRegressor.create_or_load(
#    path=Path(f"./conformal_results_u/{dataset}/{seed}"), args=args, dataset_split=ds
#)
#model_cpflow.model.eval()

model_u = CVQRegressor.create_or_load(
    path=Path(f"./conformal_results_u/{dataset}/{seed}"), args=args, dataset_split=ds
)
model_u.model.eval()
#model_y = CVQRegressorY.create_or_load(
#    path=Path(f"./conformal_results_u/{dataset}/{seed}"), args=args, dataset_split=ds
#)
#model_y.model.eval()


In [None]:
#model_u.model.init_dict, model_y.model.init_dict

In [None]:
#scores_cpflow_cal = model_cpflow.calculate_scores(ds.X_cal, ds.Y_cal)
#scores_cpflow_test = model_cpflow.calculate_scores(ds.X_test, ds.Y_test)

scores_u_cal = model_u.calculate_scores(ds.X_cal, ds.Y_cal)
scores_u_test = model_u.calculate_scores(ds.X_test, ds.Y_test)

#scores_y_cal = model_y.calculate_scores(ds.X_cal, ds.Y_cal)
#scores_y_test = model_y.calculate_scores(ds.X_test, ds.Y_test)

In [None]:
#scores_u_cal, scores_y_cal, scores_cpflow_cal

In [None]:
X_tensor = torch.tensor(ds.X_test)
Y_tensor = torch.tensor(ds.Y_test)
raw_model.to(X_tensor)
U_pullback = raw_model.push_y_given_x(x=X_tensor, y=Y_tensor)

In [None]:
t = np.linspace(-4, 4, 1000)
sns.histplot(U_pullback.numpy(force=True), kde=True, stat="density")
plt.plot(t, norm.pdf(t), "k--")

In [None]:
t = np.linspace(-4, 4, 1000)
sns.histplot(scores_u_test["MK Quantile"], kde=True, stat="density", common_norm=False)
plt.plot(t, norm.pdf(t), "k--")

In [None]:
sns.histplot(np.exp(scores_u_test["Log Density"]))

In [None]:
q_method = QuantileEstimatePredictor(d_y=ds.n_outputs, seed=0, alpha=0.1)
pb_method = SplitConformalPredictor(d_y=ds.n_outputs, seed=0, alpha=0.1, lower_is_better=True)

In [None]:
q_method.fit(
    X_cal=ds.X_cal,
    scores_cal=scores_u_cal["MK Quantile"],
    alpha=0.1,
)

pb_method.fit(
    X_cal=ds.X_cal,
    scores_cal=scores_u_cal["MK Rank"],
    alpha=0.1,
)


In [None]:
q_method.is_covered(ds.X_test, scores_u_test["MK Quantile"]).mean(), pb_method.is_covered(ds.X_test, scores_u_test["MK Rank"]).mean()

In [None]:
pb_method.threshold, scores_u_test["MK Rank"].min(), scores_u_test["MK Rank"].max()

In [None]:
# Calculate areas with smapling
n_samples = 10_000

rng = np.random.default_rng(args.seed)
ymin = ds.Y_train.min(axis=0)
ymax = ds.Y_train.max(axis=0)

scale = np.prod(ymax - ymin)
print(f"Bounding box volume: {scale}")

i = 101
X_samples = np.repeat(ds.X_test[i:i + 1], repeats=n_samples, axis=0)
Y_smaples = ymin + rng.random((n_samples, ds.n_outputs)) * (ymax - ymin)

scores_smaples = model_u.calculate_scores(X_samples, Y_smaples)
volume_i = np.mean(pb_method.is_covered(X_samples, scores_smaples["MK Rank"])) * scale
print(f"Volume estimate (sampling): {volume_i}, {np.log(volume_i) / ds.n_outputs}")

In [None]:
log_volumes = []
for _ in range(200):
    log_volumes.append(
        model_u.model.get_log_volume(
            torch.tensor(ds.X_test[i], dtype=torch.float32),
            pb_method.threshold,
            number_of_points_to_estimate_bounding_box=100,
            number_of_points_to_estimate_volume=10000,
        )
    )
    mean, std = torch.tensor(log_volumes).mean().item(), torch.tensor(log_volumes).std().item()
    print(f"{mean=}, {std=}")



log_v = model_u.model.get_log_volume(torch.tensor(ds.X_test[i], dtype=torch.float32), pb_method.threshold)

In [None]:
from tqdm import trange
test_log_volumes, cal_log_volumes = [], []

test_progress_bar = trange(ds.X_test.shape[0])
for x_index in test_progress_bar:
    x = ds.X_test[x_index]
    test_log_volumes.append(
        model_u.model.get_log_volume(
            torch.tensor(x, dtype=torch.float32),
            pb_method.threshold
        )
    )
    mean, std = torch.tensor(test_log_volumes).mean().item(), torch.tensor(test_log_volumes).std().item()
    test_progress_bar.set_postfix({
        "index":x_index,
        "mean":mean,
        "std":std,
    })

calibration_progress_bar = trange(ds.X_cal.shape[0])
for x_index in calibration_progress_bar:
    x = ds.X_test[x_index]
    test_log_volumes.append(
        model_u.model.get_log_volume(
            torch.tensor(x, dtype=torch.float32),
            pb_method.threshold
        )
    )
    mean, std = torch.tensor(cal_log_volumes).mean().item(), torch.tensor(cal_log_volumes).std().item()
    calibration_progress_bar.set_postfix({
        "index":x_index,
        "mean":mean,
        "std":std,
    })

In [None]:
ds.X_cal