# UQ for G and h

In [None]:
import pandas as pd
import numpy as np
from methods import binomial_method, random_effects_dl, random_effects_hksj, birge, binomial_sigmacdf, vniim
import matplotlib.pyplot as plt
from scipy.stats import norm, t

plt.rc("text", usetex=True)

In [None]:
# Gravitational Constant
g_df = pd.read_csv("data/G.csv")
g_df

In [None]:
# Planck Constant
h_df = pd.read_csv("data/h.csv")
h_df

In [None]:
# define datasets
datasets = {
    "G": {
        "values": np.array(g_df["value"]),
        "uncertainties": np.array(g_df["sigma"]),
        "names": g_df["id"],
        "xlabel": "Gravitational constant $G$ [10$^{-11}$ m$^3$ kg$^{-1}$ s$^{-2}$]",
        "codata_value": 6.67430,
        "codata_sigma": 0.00015,
    },
    "h": {
        "values": np.array(h_df["value"]),
        "uncertainties": np.array(h_df["sigma"]),
        "names": h_df["id"],
        "xlabel": "Planck constant $h$ [J s]",
        "codata_value": 6.62606957,
        "codata_sigma": 0.00000029,
    },
}

In [None]:
for var, d in datasets.items():
    values = d["values"]
    uncertainties = d["uncertainties"]
    names = d["names"]
    # plot figure of the dataframe values and corresponding intervals
    # so that each value and interval is in one row
    n = len(values)
    p = 0.5

    for i in range(2):
        plt.figure(figsize=(4, 4))
        plt.errorbar(values, -np.arange(n), xerr=uncertainties, fmt="o", color="black", markersize=3, linewidth=1)
        plt.yticks(-np.arange(n), names)
        # point xticks inwards, and add top ticks
        plt.tick_params(axis="x", direction="in", top=True)

        target = 0.6827
        tail_prob = (1 - target) / 2

        lower, tail_prob_achieved = binomial_method(
            np.sort(values), p=p, target=tail_prob, which="lower"
        )
        upper, _ = binomial_method(
            np.sort(values), p=p, target=tail_prob, which="upper"
        )

        interval_binomial = [lower, upper]

        # for s in range(200):
        #     values_b = np.random.normal(values, uncertainties)
        #     values_b = np.sort(values_b)
        #     lower, _ = binomial_method(values_b, p=p, target=tail_prob, which="lower")
        #     upper, _ = binomial_method(values_b, p=p, target=tail_prob, which="upper")
        #     plt.axvline(lower, color="grey", alpha=0.05)
        #     plt.axvline(upper, color="grey", alpha=0.05)

        if i == 0:
            z_alpha = norm.ppf(1 - tail_prob)
            t_alpha = t.ppf(1 - tail_prob, n - 1)
            target_cov = 1 - (2 * tail_prob)
        else:
            z_alpha = norm.ppf(1 - tail_prob_achieved)
            t_alpha = t.ppf(1 - tail_prob_achieved, n - 1)
            target_cov = 1 - (2 * tail_prob_achieved)

        # calculate using random-effects model (DL)
        interval_re_dl, muhat, sigma, _ = random_effects_dl(
            values, uncertainties, zalpha=z_alpha
        )

        # calculate using random-effects model (HKSJ)
        interval_re_hksj, muhat, sigma, _ = random_effects_hksj(
            values, uncertainties, talpha=t_alpha
        )

        # calculate using Birge ratio
        interval_birge, muhat, sigma, ratio = birge(values, uncertainties, zalpha=z_alpha)
        print(ratio)

        interval_vniim, muhat = vniim(values, uncertainties, zalpha=z_alpha)


        # for s in range(400):
        #     values_b = np.random.normal(values, uncertainties)
        #     values_b = np.sort(values_b)
        #     lower, _ = binomial_method(values_b, p=p, target=tail_prob, which="lower")
        #     upper, _ = binomial_method(values_b, p=p, target=tail_prob, which="upper")
        #     plt.axvline(lower, color="red", alpha=0.015)
        #     plt.axvline(upper, color="red", alpha=0.015)

        if i == 0:
            z_alpha = norm.ppf(1 - tail_prob)
            t_alpha = t.ppf(1 - tail_prob, n - 1)
            target_cov = 1 - (2 * tail_prob)
        else:
            z_alpha = norm.ppf(1 - tail_prob_achieved)
            t_alpha = t.ppf(1 - tail_prob_achieved, n - 1)
            target_cov = 1 - (2 * tail_prob_achieved)

        # calculate using random-effects model (DL)
        # CODATA
        muhat, sigma = d["codata_value"], d["codata_sigma"]
        interval_codata = [muhat - sigma * z_alpha, muhat + sigma * z_alpha]

        interval_bsigma, cvg = binomial_sigmacdf(values, uncertainties, p=p, coverage=target_cov)
        # print(cvg)

        plt.axvline(interval_codata[0], color="green", linestyle=":", label="CODATA")
        plt.axvline(interval_codata[1], color="green", linestyle=":")

        plt.axvline(interval_binomial[0], color="red", linestyle="--", label="Binomial")
        plt.axvline(interval_binomial[1], color="red", linestyle="--")

        # plt.axvline(interval_bsigma[0], color="green", linestyle="-.", label="Binomial $\sigma$CDF")
        # plt.axvline(interval_bsigma[1], color="green", linestyle="-.")

        plt.axvline(
            interval_re_hksj[0],
            color="blue",
            linestyle="-.",
            label="Random Effects (RE)",
        )
        plt.axvline(interval_re_hksj[1], color="blue", linestyle="-.")

        plt.axvline(interval_vniim[0], color='purple', linestyle='--')
        plt.axvline(interval_vniim[1], color='purple', linestyle='--')


        # plt.axvline(
        #     interval_birge[0], color="grey", linestyle="--", label="Birge Ratio (BR)"
        # )
        # plt.axvline(interval_birge[1], color="grey", linestyle="--")

        # plt.legend(frameon=False, loc="lower left")

        plt.xlabel(d["xlabel"])
        # plt.title(
        #     rf"Aggregating estimates of ${var}$ with target coverage {np.round(target_cov*100, 1)}\%"
        # )
        print(np.round(target_cov*100,1))
        plt.savefig(f"figs/{var}{i}.pdf", bbox_inches="tight")
        plt.savefig(f"figs/{var}{i}.png", bbox_inches="tight", dpi=300)
        plt.show()

In [None]:
c_df = pd.read_csv('data/c.csv', comment='#')
# c_df = c_df.groupby('author').apply(lambda x: x.sample(1), include_groups=False).reset_index(drop=True)
# c_df = c_df.sort_values(by='year')
c_df

In [None]:
from statsmodels.tsa.stattools import acf, pacf
series = np.array(c_df.value > 299792.458).astype(int)
acf(series)


In [None]:
import statsmodels.api as sm
sm.graphics.tsa.plot_acf(c_df.value, lags=50)

In [None]:
c_df.value

In [None]:
print(np.sum(c_df.value<299792.458), np.sum(c_df.value>299792.458))

In [None]:
from methods import sign_rank
def get_lowers_uppers(df, years, get_flip_covs=False):
    lowers, uppers, covs, flip_covs = [], [], [], []
    for year in years:
        subset = df[df.year <= year]
        values = np.sort(subset.value)
        # lower, alpha = binomial_method(values, which='lower')
        # upper, _ = binomial_method(values, which='upper')
        (lower, upper), alpha = sign_rank(values)
        lowers.append(lower)
        uppers.append(upper)
        covs.append(1-2*alpha)
        if get_flip_covs:
            flip_covs.append(1 - flip_test(values, h0=lower, tail='lower', mode='median') - flip_test(values, h0=upper, tail='upper', mode='median'))
    lowers = np.array(lowers)
    uppers = np.array(uppers)
    covs = np.array(covs)
    flip_covs = np.array(flip_covs)
    return lowers, uppers, covs, flip_covs

In [None]:
lowers = []
uppers = []
years = np.arange(c_df.year.iloc[2], c_df.year.iloc[-1]+1, 0.5)
lowers, uppers, covs, flip_covs = get_lowers_uppers(c_df, years, get_flip_covs=True)

c = 299792.458
fig, axs = plt.subplots(1, 2, figsize=(4.5, 4), sharey=True, width_ratios=(3, 1))
# plt.figure(figsize=(4,4))
axs[0].plot(lowers-c, years, color='red')
axs[0].plot(uppers-c, years, color='red')
for i in range(10):
    smaller = c
    unique_author = c_df.groupby('author').apply(lambda x: x.sample(1), include_groups=False).reset_index(drop=True)
    unique_author = unique_author.sort_values(by='year')
    lowers, uppers, _, _ = get_lowers_uppers(unique_author, years)
    axs[0].fill_betweenx(years, lowers-c, uppers-c, color='red', alpha=0.04)
axs[0].plot(c_df.value[2:]-c, c_df.year[2:], 'o', color='black', markersize=3)
axs[0].set_xscale('symlog')
axs[0].axvline(0, color='black', linestyle='--')
axs[0].set_xlabel('Deviation from true value [km/s]')
axs[0].set_ylabel('Year')
# reverse y axis
axs[0].invert_yaxis()
axs[1].plot(covs, years, color='black', linewidth=1)
axs[1].plot(flip_covs, years, color='grey', linewidth=1)
axs[1].set_xlabel('Nominal coverage')
# point ticks inwards and add top and right ticks
for ax in axs:
    ax.tick_params(axis='both', which='both', direction='in', top=True, right=True)
plt.tight_layout()
for n, label in enumerate(axs[0].xaxis.get_ticklabels()):
    if n % 2 != 0:
        label.set_visible(False)
plt.savefig('figs/c.pdf', bbox_inches='tight')
plt.savefig('figs/c.png', bbox_inches='tight', dpi=300)
plt.show()

In [None]:
plt.plot(covs)

In [None]:
from methods import sign_rank_test

sign_rank_test(c_df.value, h0_median=299792.458)

In [None]:
from methods import sign_rank
sign_rank(np.array(c_df.value), coverage=0.70)

In [None]:
from methods import flip_test, flip_interval
print(flip_test(c_df.value, h0=299784.3, tail='both', mode='median'))
print(flip_test(c_df.value, h0=299794.0, tail='both', mode='median'))

In [None]:
p_values = []
in_intervals = []
covs = []
from tqdm import tqdm
for i in tqdm(range(200)):
    values = np.random.standard_normal(33)
    p_values.append(flip_test(values, mode='median'))
    [lower, upper], cov = flip_interval(values, mode='median', coverage=0.95, boot=False)
    in_intervals.append(lower <= 0 <= upper)
    # covs.append(cov)

print(np.nanmean(in_intervals))
plt.hist(covs)
plt.show()

In [None]:
plt.hist(p_values)
plt.show()

In [None]:
print(values, lower, upper)

In [None]:
# usual 95% confidence interval for mean of normal
z = 1.96
mean = np.mean(values)
lower = mean - z * np.std(values) / np.sqrt(len(values))
upper = mean + z * np.std(values) / np.sqrt(len(values))
print(lower, upper)

In [None]:
print(flip_test(values, h0=lower, mode='median', tail='lower'))
print(flip_test(values, h0=upper, mode='median', tail='upper'))

In [None]:
from methods import flip_interval
flip_interval(values, mode='mean', coverage=0.95)

In [None]:
plt.hist(p_values, bins=20, cumulative=True, density=True)
plt.show()

In [None]:
rho_df = pd.read_csv("data/rho.csv", comment='#')

datasets = {
    'rho': rho_df,
    'h': h_df,
    'G': g_df,
    # 'c': c_df,
}
truths = {
    'rho': 5.513,
    'c': 299792.458,
    'G': None,
    'h': None,
}
yscales = {
    'rho': 'linear',
    'c': 'symlog',
    'G': 'linear',
    'h': 'linear',
}

In [None]:
print(np.sum(rho_df.value<truths['rho']), np.sum(rho_df.value>truths['rho']))
rho_df = rho_df.groupby('author').apply(lambda x: x.sample(1), include_groups=False).reset_index(drop=True)
rho_df = rho_df.sort_values(by='year')
lowers = []
uppers = []
years = np.arange(rho_df.year.iloc[2], rho_df.year.iloc[-1]+1, 0.5)
covs = []
for year in years:
    subset = rho_df[rho_df.year <= year]
    values = np.sort(subset.value)
    lower, alpha = binomial_method(values, which='lower')
    upper, _ = binomial_method(values, which='upper')
    lowers.append(lower)
    uppers.append(upper)
    covs.append(1-2*alpha)
fig, axs = plt.subplots(1, 2, figsize=(4.5, 4), sharey=True, width_ratios=(3, 1))
# plt.figure(figsize=(4,4))
axs[0].plot(np.array(lowers), years, color='red')
axs[0].plot(np.array(uppers), years, color='red')
axs[0].plot(rho_df.value[2:], rho_df.year[2:], 'o', color='black', markersize=3)
# axs[0].set_xscale('symlog')
axs[0].axvline(truths['rho'], color='black', linestyle='--')
axs[0].set_xlabel(r'Estimate of $\rho_\bigoplus$')
axs[0].set_ylabel('Year')
# reverse y axis
axs[0].invert_yaxis()
axs[1].plot(covs, years, color='black', linewidth=1)
axs[1].set_xlabel('Nominal coverage')
# point ticks inwards and add top and right ticks
for ax in axs:
    ax.tick_params(axis='both', which='both', direction='in', top=True, right=True)
plt.tight_layout()
for n, label in enumerate(axs[0].xaxis.get_ticklabels()):
    if n % 2 != 0:
        label.set_visible(False)
# plt.savefig('figs/c.pdf', bbox_inches='tight')
plt.savefig('figs/rho.png', bbox_inches='tight', dpi=300)
plt.show()

In [None]:
np.mean(c_df.value<299792.458)

In [None]:
plt.scatter(c_df.year,c_df.value)

In [None]:
g_df = pd.read_csv("data/G.csv", comment='#')
h_df = pd.read_csv("data/h.csv")
rho_df = pd.read_csv("data/rho.csv", comment='#')
rho_df = rho_df.groupby('author').apply(lambda x: x.sample(1), include_groups=False).reset_index(drop=True)
rho_df = rho_df.sort_values(by='year')
# rho_df = rho_df.dropna()
h_df = h_df.sort_values(by='year')
rho_df

In [None]:
which = 'rho'
use_sigma = False
dataset = datasets[which]
if yscales[which] == 'symlog':
    offset = truths[which]
else:
    offset = 0

lowers_b = [binomial_method(np.sort(dataset.value[:i]), which='lower')[0] for i in range(3,len(dataset)+1)]
uppers_b = [binomial_method(np.sort(dataset.value[:i]), which='upper')[0] for i in range(3,len(dataset)+1)]
if 'sigma' in dataset.columns and use_sigma:
    intervals_re = [random_effects_hksj(dataset.value[:i], dataset.sigma[:i], coverage=0.6827)[0] for i in range(3,len(dataset)+1)]
    lowers_re = [interval[0] for interval in intervals_re]
    uppers_re = [interval[1] for interval in intervals_re]
    intervals_birge = [birge(dataset.value[:i], dataset.sigma[:i], coverage=0.6827)[0] for i in range(3,len(dataset)+1)]
    lowers_birge = [interval[0] for interval in intervals_birge]
    uppers_birge = [interval[1] for interval in intervals_birge]
years = dataset.year

plt.plot(years[2:], np.array(lowers_b)-offset, color='red')
plt.plot(years[2:], np.array(uppers_b)-offset, color='red')

if 'sigma' in dataset.columns and use_sigma:
    plt.plot(years[2:], np.array(lowers_re)-offset, color='black')
    plt.plot(years[2:], np.array(uppers_re)-offset, color='black')
    plt.plot(years[2:], np.array(lowers_birge)-offset, color='blue')
    plt.plot(years[2:], np.array(uppers_birge)-offset, color='blue')

if 'sigma' in dataset.columns and use_sigma:
    plt.errorbar(dataset.year, dataset.value-offset, yerr=dataset.sigma, fmt='o', color='grey')
else:
    plt.errorbar(dataset.year, dataset.value-offset, fmt='o', color='grey')

if truths[which] is not None:
    plt.axhline(truths[which]-offset, color='red', linestyle='--')
if which == 'rho':
    plt.axhline(5.5247+0.0013,color='grey',linewidth=0.2)
    plt.axhline(5.5247-0.0013,color='grey',linewidth=0.2)
if yscales[which] == 'symlog':
    plt.yscale('symlog')
plt.ylabel('Estimate of $G$')
plt.xlabel('Year')
plt.title(r'Intervals for $G$ over time')
plt.show()