In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# use tex
plt.rc("text", usetex=True)

In [None]:

c_df = pd.read_csv('data/c.csv', comment='#')
c_df

In [None]:
rho_df = pd.read_csv('data/rho.csv', comment='#')
rho_df

In [None]:
au_df = pd.read_csv('data/au.csv', comment='#')
au_df

In [None]:
ho_df = pd.read_csv('data/h-o-mass.csv', comment='#')
ho_df['uncertainty'] = ho_df['proberr'] / 0.6745
agcl_df = pd.read_csv('data/ag-cl-mass.csv', comment='#')
agcl_df['uncertainty'] = agcl_df['proberr'] / 0.6745

In [None]:
datasets = {
    'rho': rho_df,
    # 'h': h_df,
    # 'G': g_df,
    'c': c_df,
    'au': au_df,
    'ho': ho_df,
    'agcl': agcl_df,
}
truths = {
    'rho': 5.513,
    'c': 299792.458,
    'au': 149597870700,
    'ho':15.87202381,
    'agcl':32.866961718,
    # 'G': None,
    # 'h': None,
}
yscales = {
    'rho': 'symlog',
    'c': 'symlog',
    'au': 'symlog',
    'ho': 'linear',
    'agcl': 'linear',
    # 'G': 'linear',
    # 'h': 'linear',
}
linthresh = {
    'rho': 0.01,
    'c': 0.1,
    'au': 1,
    'ho': 0.001,
    'agcl': 0.001,
}
names = list(datasets.keys())
nice_names = {
    'c': 'Speed of light',
    'rho': 'Density of Earth',
    'au': 'Astronomical Unit',
    'ho': 'Oxygen to Hydrogen mass ratio',
    'agcl': 'Silver to Chlorine mass ratio',
}

In [None]:
counts = {n: len(datasets[n]) for n in names}
num_over = {n: np.sum(datasets[n].value > truths[n]) for n in names}
prop_over = {n: num_over[n] / counts[n] for n in names}

from scipy.stats import binomtest
binom_p_values = {}
for n in names:
    binom_p_values[n] = binomtest(num_over[n], counts[n], p=0.5, alternative='two-sided').pvalue
from methods import sign_rank_test
sign_rank_p_values = {}
for n in names:
    sign_rank_p_values[n] = sign_rank_test(datasets[n].value, truths[n])

binom_p_values
sign_rank_p_values

# put all the results in a pandas dataframe
results = pd.DataFrame({
    'name': nice_names,
    'count': counts,
    'num_over': num_over,
    'prop_over': prop_over,
    'binom_p_value': binom_p_values,
    'sign_rank_p_value': sign_rank_p_values,
})
print(results.to_latex(index=False, float_format='%.4f'))


In [None]:
fig, axs = plt.subplots(1, 3, figsize=(10, 5))
xlabels = {
    'rho': r'Difference from true value $[\mathrm{g/cm^3}]$',
    'c': r'Difference from true value $[\mathrm{km/s}]$',
    'au': r'Difference from true value $[\mathrm{km}]$',
    'ho': r'Difference from true ratio',
    'agcl': r'Difference from true ratio',
}
for i, name in enumerate(['rho', 'c', 'au']):
    name = names[i]
    ax = axs[i]

    values = datasets[name].value - truths[name]
    if name == 'au':
        values = values / 1000
    dates = datasets[name].year

    ax.plot(values, dates, '.', color='black')
    ax.axvline(0, color='black', linestyle='--', linewidth=1)
    # reverse y axis
    ax.invert_yaxis()
    
    if yscales[name] == 'symlog':
        print(name)
        ax.set_xscale('symlog', linthresh=linthresh[name])
        # skip every other tick
        n_ticklabels = len(ax.xaxis.get_ticklabels())
        for n, label in enumerate(ax.xaxis.get_ticklabels()):
            if n % 2 != 0 and label.get_text() != '$\\mathdefault{0}$':
                label.set_visible(False)
        ax.tick_params(axis='both', which='both', direction='in', top=True, right=True)
    ax.set_xlabel(xlabels[name])
    ax.set_ylabel('Year')
    ax.set_ylim(2000, 1650)
    ax.set_title(nice_names[name])
    # make top x limit and bottom x limit equal
    xlim = max(abs(ax.get_xlim()[0]), abs(ax.get_xlim()[1]))
    ax.set_xlim(-xlim, xlim)

plt.tight_layout()
plt.savefig('figs/historical.pdf', bbox_inches='tight')
plt.show()

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(10, 5))

for i, name in enumerate(['ho', 'agcl']):
    ax = axs[i]
    # values = np.array(datasets[name].value - truths[name])
    values = np.array(datasets[name].value)
    errs = np.array(datasets[name].uncertainty)
    # sort by decreasing error
    sort_idx = np.argsort(errs)[::-1]
    values = values[sort_idx]
    errs = errs[sort_idx]

    ax.errorbar(values, np.arange(len(values)), xerr=errs, fmt='.', markersize=2, linewidth=1, color='black')
    ax.axvline(truths[name], color='black', linestyle='--', linewidth=1)
    ax.invert_yaxis()
    # xlim = max(abs(ax.get_xlim()[0]), abs(ax.get_xlim()[1]))
    # ax.set_xlim(-xlim, xlim)
    ax.set_title(nice_names[name])
    # ax.set_xlabel(xlabels[name])
    # remove y ticks
    ax.set_yticks([])
plt.tight_layout()