Code to get $chi^2$ results to Claudio

Created by Linnea on October 11, 2023

In [22]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf
from pathlib import Path
from collections import defaultdict
import h5py
import keras_core as keras
import tensorflow_io as tfio

import sys
sys.path.append('../')
import preprocess.preprocess
import utils
import jax
import jax.numpy as jnp

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [23]:
# Load metadata for this interval
def index_mcmc_runs():
    """Make a list of combinations for which we want to run MCMC."""
    experiments = ['AMS02_H-PRL2021']
    dfs = []
    for experiment_name in experiments:
        filename = f'../../data/2023/{experiment_name}_heliosphere.dat'
        df = utils.index_experiment_files(filename) 
        df['experiment_name'] = experiment_name
        df['filename_heliosphere'] = filename
        dfs.append(df)
    df = pd.concat(dfs, axis=0, ignore_index=0)
    return df

# Select experiment parameters
df = index_mcmc_runs()  # List of all ~200 experiments.
data = df.iloc[0]

In [24]:
# Load parameters
polarity = 'neg'
path = '/home/linneamw/sadow_koastore/personal/linneamw/research/gcr/data/2023_07_01'
f_original = f'{path}/{polarity}/model_collection_1AU_90deg_0deg.h5'

# 8 input parameters for the NN: alpha, cmf, vspoles, cpa, pwr1par, pwr2par, pwr1perr, and pwr2perr.
# features = ['alpha', 'cmf', 'cpa', 'pwr1par', 'pwr1perr', 'pwr2par', 'pwr2perr', 'vspoles']
with h5py.File(f_original, 'r') as h5:
    print(h5.keys())

    info = h5['info']
    model = h5['model']

    print(info.keys())
    print(model.keys())

    # Get relevant parameters
    alpha = h5['model/alpha'][:]
    cmf = h5['model/cmf'][:]
    cpa = h5['model/cpa'][:]
    pwr1par = h5['model/pwr1par'][:]
    pwr1perr = h5['model/pwr1perr'][:]
    pwr2par = h5['model/pwr2par'][:]
    pwr2perr = h5['model/pwr2perr'][:]
    vspoles = h5['model/vspoles'][:]
    imodel = h5['model/imodel'][:]
    rigidity = h5['info/rigidity'][:]

<KeysViewHDF5 ['info', 'model']>
<KeysViewHDF5 ['LIS', 'rigidity']>
<KeysViewHDF5 ['alpha', 'cmf', 'cpa', 'flux', 'imodel', 'ipar', 'pwr1par', 'pwr1perr', 'pwr2par', 'pwr2perr', 'quality', 'vseq', 'vspoles']>


In [25]:
# Find parameters associated with index 1889802
index = 1889802

# Load parameters for NN: ['alpha', 'cmf', 'cpa', 'pwr1par', 'pwr1perr', 'pwr2par', 'pwr2perr', 'vspoles']
parameters = (alpha[index], cmf[index], cpa[index], pwr1par[index], pwr1perr[index], pwr2par[index], pwr2perr[index], vspoles[index])

print(f'imodel: {imodel[index]}')
print(f'(alpha, cmf, cpa, pwr1par, pwr1perr, pwr2par, pwr2perr, vspoles): {parameters}')

imodel: 1889802
(alpha, cmf, cpa, pwr1par, pwr1perr, pwr2par, pwr2perr, vspoles): (55.0, 4.5, 390.0, 1.3, 1.0, 2.3, 0.4, 600.0)


In [36]:
model_version = 'v3.0'
data_path = f'../../data/oct2022/{data.experiment_name}/{data.experiment_name}_{data.interval}.dat'  # This data is the same.
model_path = f'../../models/model_{model_version}_{data.polarity}.keras'
penalty = 1e6

print(f'model_path: {model_path}')
print(f'data_path: {data_path}')

# Load trained NN model that maps 7 parameters to predicted flux at RIGIDITY_VALS.
model = keras.models.load_model(model_path)
model.run_eagerly = True # Settable attribute (in elegy). Required to be true for ppmodel.

# Load observation data from Claudio
bins, observed, uncertainty = utils.load_data_ams(data_path)
bin_midpoints = (bins[:-1] * bins[1:]) ** 0.5  # Geometric mean seemed to work better in exp.

# Transform parameters
parameters_transformed = utils.transform_input(jnp.array(parameters))

model_path: ../../models/model_v3.0_neg.keras
data_path: ../../data/oct2022/AMS02_H-PRL2021/AMS02_H-PRL2021_20110520-20110610.dat


In [37]:
# Include logprior in loglikelihood. This keeps HMC from going off into no-mans land.
nlogprior = 0.
for i in range(5):
    nlogprior += penalty * jnp.abs((jnp.minimum(0., parameters[i]))) # Penalty for being <0
    nlogprior += penalty * jnp.abs((jnp.maximum(1., parameters[i]) - 1.))  # Penalty for being >1

xs = parameters_transformed.reshape((1,-1)) # Reshape to (,8) for keras.
yhat = model(xs)
yhat = yhat.numpy()[0] # Convert to float32

# Interpolate to get predicted flux at midpoint of bin points.
yhat = jnp.interp(bin_midpoints, utils.RIGIDITY_VALS, yhat)
yhat = utils.untransform_output(yhat.reshape((1,-1))).reshape(-1) # Undo scaling and minmax.

# Compute log prob
chi2 = (((yhat - observed)/uncertainty)**2)
log_prob = -chi2/2.  - nlogprior

print(f'chi2: {chi2}')
print(f'log_prob: {log_prob}')

chi2: [1.97798705e+00 1.90102506e+00 9.36039925e-01 1.42470773e-04
 1.29559889e-01 1.43418634e+00 3.91116524e+00 6.69540215e+00
 1.06795349e+01 1.55536318e+01 1.32952309e+01 1.56514778e+01
 1.69665146e+01 1.23801899e+01 1.33109989e+01 1.63942223e+01
 1.00889015e+01 8.23560619e+00 1.37464008e+01 9.14031506e+00
 1.10917306e+00 3.58611512e+00 5.57641077e+00 6.14140593e-02
 3.36890012e-01 1.04261196e+00 3.88310361e+00 2.51810217e+00
 4.38601494e+00 4.71536970e+00]
log_prob: [-4.468e+08 -4.468e+08 -4.468e+08 -4.468e+08 -4.468e+08 -4.468e+08
 -4.468e+08 -4.468e+08 -4.468e+08 -4.468e+08 -4.468e+08 -4.468e+08
 -4.468e+08 -4.468e+08 -4.468e+08 -4.468e+08 -4.468e+08 -4.468e+08
 -4.468e+08 -4.468e+08 -4.468e+08 -4.468e+08 -4.468e+08 -4.468e+08
 -4.468e+08 -4.468e+08 -4.468e+08 -4.468e+08 -4.468e+08 -4.468e+08]


In [38]:
# For each data bin, print the rigidity (geometric mean + left and right edges); the data value and uncertainty; 
# the model interpolated in the geometric mean and its residual (data - model)/uncertainty; 
# the integral of the model over the bin and its residual; and the cumulative chi2 using the model integral.
# print in this format: [000] rig = 1.077033 [ 1.00,   1.16]; data = 9.542576e+02; unc = 2.811600e+01; mod = 9.192716e+02; res = +1.244347e+00; mod_int = 9.189937e+02; res_int = +1.254230e+00; cum_chi2_int = 1.573092e+00
for i in range(len(bin_midpoints)):
    res = (observed[i] - yhat[i])/uncertainty[i]

    print(f'[0{i}] rig = {bin_midpoints[i]:.6f} [{bins[i]:.2f}, {bins[i+1]:.2f}]; data = {observed[i]:.6e}; unc = {uncertainty[i]:.6e}; mod = {yhat[i]:.6e}; res = {res:+.6e}; chi2 = {chi2[i]:.6e}; cum_chi2 = {np.sum(chi2[:i+1]):.6e}')

[00] rig = 1.077033 [1.00, 1.16]; data = 9.542576e+02; unc = 2.811600e+01; mod = 9.147150e+02; res = +1.406409e+00; chi2 = 1.977987e+00; cum_chi2 = 1.977987e+00
[01] rig = 1.242095 [1.16, 1.33]; data = 9.411921e+02; unc = 2.148905e+01; mod = 9.115635e+02; res = +1.378777e+00; chi2 = 1.901025e+00; cum_chi2 = 3.879012e+00
[02] rig = 1.417145 [1.33, 1.51]; data = 8.769211e+02; unc = 1.643800e+01; mod = 8.610175e+02; res = +9.674916e-01; chi2 = 9.360399e-01; cum_chi2 = 4.815052e+00
[03] rig = 1.606891 [1.51, 1.71]; data = 8.003528e+02; unc = 1.293201e+01; mod = 8.005071e+02; res = -1.193611e-02; chi2 = 1.424708e-04; cum_chi2 = 4.815195e+00
[04] rig = 1.811960 [1.71, 1.92]; data = 7.088688e+02; unc = 1.028839e+01; mod = 7.125720e+02; res = -3.599443e-01; chi2 = 1.295599e-01; cum_chi2 = 4.944755e+00
[05] rig = 2.031748 [1.92, 2.15]; data = 6.185521e+02; unc = 8.293686e+00; mod = 6.284844e+02; res = -1.197575e+00; chi2 = 1.434186e+00; cum_chi2 = 6.378941e+00
[06] rig = 2.271563 [2.15, 2.40]; 