In [4]:
import sys
sys.path.append('../')
import utils
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf
from pathlib import Path

In [18]:
def index_mcmc_runs():
    """Make a list of combinations for which we want to run MCMC."""
    experiments = ['AMS02_H-PRL2021', 'PAMELA_H-ApJ2013', 'PAMELA_H-ApJL2018']
    dfs = []
    for experiment_name in experiments:
        filename = f'../../data/2023/{experiment_name}_heliosphere.dat'
        df = utils.index_experiment_files(filename) 
        df['experiment_name'] = experiment_name
        df['filename_heliosphere'] = filename
        dfs.append(df)
    df = pd.concat(dfs, axis=0, ignore_index=0)
    return df

df = index_mcmc_runs()
print(df.shape)
df.head()

(133, 12)


Unnamed: 0,interval,alpha,cmf,vspoles,alpha_std,cmf_std,vspoles_std,beginning,ending,polarity,experiment_name,filename_heliosphere
0,20110520-20110610,51.49,4.85,632.52,10.69,1.87,22.41,2011-05-20,2011-05-20,neg,AMS02_H-PRL2021,../../data/2023/AMS02_H-PRL2021_heliosphere.dat
1,20110611-20110707,53.61,4.87,627.62,9.8,1.82,22.7,2011-06-11,2011-06-11,neg,AMS02_H-PRL2021,../../data/2023/AMS02_H-PRL2021_heliosphere.dat
2,20110708-20110803,55.39,4.92,622.22,9.63,1.83,22.7,2011-07-08,2011-07-08,neg,AMS02_H-PRL2021,../../data/2023/AMS02_H-PRL2021_heliosphere.dat
3,20110804-20110830,57.12,4.95,616.76,9.44,1.86,22.8,2011-08-04,2011-08-04,neg,AMS02_H-PRL2021,../../data/2023/AMS02_H-PRL2021_heliosphere.dat
4,20110831-20110926,58.72,5.05,611.21,9.59,1.92,23.02,2011-08-31,2011-08-31,neg,AMS02_H-PRL2021,../../data/2023/AMS02_H-PRL2021_heliosphere.dat


### Compare MCMC logacceptratio and number of samples to HMC

In [30]:
# Compare mcmc and hmc logacceptratio and num generated samples
# Acceptratio values are stored for each experiement in a file like this: "results_dir/logacceptratio_{i}.csv" where indices are 0-132
# Samples are stored for each experiement in file like this: "results_dir/samples_{i}_{experiment_name}_{interval}_{polarity}.csv" where indices are 0-132
results_dir_mcmc = f'../../../results/v21.0/'
results_dir_hmc = f'../../../results/v16.0/'

# Load results from MCMC run
mcmc_acc_ratio = []
mcmc_num_samples = []
num_results = 1_000_000

for i in range(0, 132):
    experiment_name = df.iloc[i]['experiment_name']
    interval = df.iloc[i]['interval']
    polarity = df.iloc[i]['polarity']
    samples = pd.read_csv(f'{results_dir_mcmc}samples_{i}_{experiment_name}_{interval}_{polarity}.csv')
    mcmc_num_samples.append(samples.shape[0])

    mcmc_acc_ratio.append(samples.shape[0]/num_results)

mcmc_acc_ratio = np.array(mcmc_acc_ratio)
mcmc_num_samples = np.array(mcmc_num_samples)
mcmc_avg_acc_ratio = mcmc_acc_ratio.mean()
mcmc_avg_num_samples = mcmc_num_samples.mean()
print(f"MCMC acceptance ratio average: {mcmc_avg_acc_ratio}")
print(f"MCMC average number of samples: {mcmc_avg_num_samples}")

MCMC acceptance ratio average: 0.0005075151515151516
MCMC average number of samples: 507.5151515151515


In [31]:
# Load results from HMC run
hmc_acc_ratio = []
hmc_num_samples = []
num_pos = 0
num_results = 110_000

for i in range(0, 209):
    if i in range(50, 125): 
        num_pos += 1
        continue
    elif i in range(208, 209):
        continue
    else:
        index = i - num_pos

    experiment_name = df.iloc[index]['experiment_name']
    interval = df.iloc[index]['interval']
    polarity = df.iloc[index]['polarity']
    samples = pd.read_csv(f'{results_dir_hmc}samples_{i}_{experiment_name}_{interval}_{polarity}.csv')
    hmc_num_samples.append(samples.shape[0])

    hmc_acc_ratio.append(samples.shape[0]/num_results)

hmc_acc_ratio = np.array(hmc_acc_ratio)
hmc_num_samples = np.array(hmc_num_samples)
hmc_avg_acc_ratio = hmc_acc_ratio.mean()
hmc_avg_num_samples = hmc_num_samples.mean()
print(f"HMC acceptance ratio average: {hmc_avg_acc_ratio}")
print(f"HMC average number of samples: {hmc_avg_num_samples}")

HMC acceptance ratio average: 0.999990909090909
HMC average number of samples: 109999.0


### Create plots

In [None]:
# Select experiment parameters
df = index_mcmc_runs()  # List of all ~200 experiments.

# Model specification
version = 'v21.0' # 'v1.0'
reduce_by = 1 # Reduce the number of MCMC samples by this factor. 9 for v2.0/v5.0, 1 for v1.0/v3.0/v4.0, 30 for v6.0

# Setup  output directory.
results_dir = f'../../../results/{version}/'
figs_dir = f'{results_dir}figs/'
Path(figs_dir).mkdir(parents=True, exist_ok=True)

print(df.head())
print(df.shape)

In [None]:
# Load samples, logprobs, and predictions to each respective index in the dataframe
for i in [21]: #len(df)):
    experiment_name = df["experiment_name"].iloc[i]
    interval = df.interval.iloc[i]
    polarity = df.polarity.iloc[i]

    filename = f'{results_dir}samples_{i}_{experiment_name}_{interval}_{polarity}.csv'
    print(f"Filename: {filename}")

    samples = np.loadtxt(filename, delimiter=',')
    # logprobs = np.loadtxt(f'{results_dir}logprobs_{i}_{experiment_name}_{interval}_{polarity}.csv', delimiter=',')
    # predictions = np.loadtxt(f'{results_dir}predictions_{i}_{experiment_name}_{interval}_{polarity}.csv', delimiter=',')
    
    # Examine only a few samples. Take 1 out of every 9 samples.
    samples = samples[::reduce_by, :]
    samples_small = samples[::50, :]
    print(f"Samples shape: {samples.shape}. Small samples shape: {samples_small.shape}")

    # Make a 5 by 2 plot of the trace and acf of the samples
    fig, axes = plt.subplots(5, 2, figsize=(22, 20))
    axes[0, 0].plot(samples_small[:, 0])
    axes[1, 0].plot(samples_small[:, 1])
    axes[2, 0].plot(samples_small[:, 2])
    axes[3, 0].plot(samples_small[:, 3])
    axes[4, 0].plot(samples_small[:, 4])

    lags = 500
    plot_acf(samples[:, 0], lags=lags, ax=axes[0, 1], title="", markersize=0)
    plot_acf(samples[:, 1], lags=lags, ax=axes[1, 1], title="", markersize=0)
    plot_acf(samples[:, 2], lags=lags, ax=axes[2, 1], title="", markersize=0)
    plot_acf(samples[:, 3], lags=lags, ax=axes[3, 1], title="", markersize=0)
    plot_acf(samples[:, 4], lags=lags, ax=axes[4, 1], title="", markersize=0)

    # Get min and maxc from acf plots
    min_y = -0.05
    max_y = 0.5

    axes[0, 1].set_ylim(min_y, max_y)
    axes[1, 1].set_ylim(min_y, max_y)
    axes[2, 1].set_ylim(min_y, max_y)
    axes[3, 1].set_ylim(min_y, max_y)
    axes[4, 1].set_ylim(min_y, max_y)

    # Make a title for each column and each row
    for ax, col in zip(axes[0], ['Trace', 'ACF']):
        ax.set_title(col)
    for ax, col in zip(axes[4], ['Iteration', 'Lag']):
        ax.set_xlabel(col, size='large')
    for ax, row in zip(axes[:,0], ['cpa', 'pwr1par', 'pwr2par', 'pwr1perr', 'pwr2perr']):
        ax.set_ylabel(row, size='large')

    # Set super title that is the filename
    fig.suptitle(f'samples_{i}_{experiment_name}_{interval}_{polarity}', fontsize=16)

    # Remove whitespace below title and between columns
    fig.tight_layout(rect=[0, 0.03, 1, 0.95])   

    plt.savefig(f'{figs_dir}{i}_{experiment_name}_{interval}_{polarity}.png')
    plt.show()

: 