In [None]:
from baynes.model_utils import get_model, inits_from_priors
from baynes.plotter import FitPlotter
from scipy import stats
import itertools as it
import numpy as np
import matplotlib.pyplot as plt
import h5py
import scipy
from matplotlib import gridspec
from matplotlib import rc
import os

import cmdstanpy
import logging
%config IPCompleter.use_jedi=False

# Useful Functions

In [None]:
def add_subplot_axes(ax,rect,axisbg='w'):
    fig = plt.gcf()
    box = ax.get_position()
    width = box.width
    height = box.height
    inax_position  = ax.transAxes.transform(rect[0:2])
    transFigure = fig.transFigure.inverted()
    infig_position = transFigure.transform(inax_position)    
    x = infig_position[0]
    y = infig_position[1]
    width *= rect[2]
    height *= rect[3]  
    subax = fig.add_axes([x,y,width,height],facecolor=axisbg)  
    x_labelsize = subax.get_xticklabels()[0].get_size()
    y_labelsize = subax.get_yticklabels()[0].get_size()
    x_labelsize *= rect[2]**0.5
    y_labelsize *= rect[3]**0.5
    subax.xaxis.set_tick_params(labelsize=x_labelsize)
    subax.yaxis.set_tick_params(labelsize=y_labelsize)
    return subax
    

In [None]:
def plot_fit_results(plotter=None,edges=None,x=None,counts=None,savepath =None, pvalue = None, logscale = True,
                    label_gen_data = "counts_rep"):
    rc('text', usetex=False)
    rc('font', family='serif', size=20)
    rc('figure', figsize=(12,8))
    rc('axes',linewidth=2)

    
    bins = counts.shape[0]
    res =plotter.get_fit(None)
    if x is None:
        step = edges[1]-edges[0]
        xhist = np.linspace(edges[0]+step/2,edges[-1]-step/2,bins)
    else:
        step = x[1]-x[0]
        xhist = x
    gs = gridspec.GridSpec(2, 1,height_ratios=[3,1])
    gs.update(wspace = 0.4, hspace=0.1)

    ax = plt.subplot(gs[0, 0])
    ax.hist(xhist,weights=counts,bins=counts.shape[0],histtype="step",color='k',range=(xhist[0]-step/2,xhist[-1]+step/2))
    yfit = np.mean(res.stan_variables()[label_gen_data],axis=0)
    std_fit = np.std(res.stan_variables()[label_gen_data],axis=0)
    ax.plot(xhist,yfit,c='r',lw=0.4)
    ax.fill_between(xhist,yfit,yfit+std_fit,color='r',alpha=0.3)
    ax.fill_between(xhist,yfit,yfit-std_fit,color='r',alpha=0.3)

    ax.set_ylabel("Counts / "+str(np.round(step,2)) + " eV")
    ax.set_xlim(xhist[0],xhist[-1])
    if logscale == True:
        ax.set_yscale("log")
    ax.set_ylim(0.1)
    #ax.grid(True,which="both",alpha=0.6)
    ax.set_xticklabels([],fontsize=0)

    ax = plt.subplot(gs[1, 0])
    if pvalue is None:
        ax.plot(xhist,(counts-yfit)/std_fit,ls="",marker='o',color='k',ms=2,lw=0.2)
        ax.axhline(0,color="b",ls="--")
        ax.set_xlabel("Energy (eV)")
        ax.set_ylabel(r"r ($\sigma$)")
        #ax.grid(alpha=0.6)
        ax.set_yticks([-3,-2,-1,0,1,2,3])
        ax.set_yticklabels([-3,-2,-1,0,1,2,3],fontsize=15)
        ax.set_xlim(xhist[0],xhist[-1])
        ax.set_ylim(-4,4)
    else:
        ax.plot(xhist,pvalue,ls="",marker='o',color='k',ms=2)
        ax.set_xlabel("Energy (eV)")
        ax.set_ylabel(r"p-value")
        ax.set_yticks([0,0.25,0.5,0.75,1])
        ax.set_yticklabels([0,0.25,0.5,0.75,1],fontsize=15)
        ax.set_xlim(xhist[0],xhist[-1])


    if savepath is not None:
        plt.savefig(savepath,bbox_inches='tight')
    plt.show()
    plt.rcdefaults()
    return

In [1]:
from decimal import Decimal

def plot_Prior_Post(variables_name=[],variables_name2=[],ncol=4,prior_label = "prior_fit",posterior_label = None,
                   savefig=None):
    rc('text', usetex=False)
    rc('font', family='serif', size=15)
    rc('figure', figsize=(12,8))
    rc('axes',linewidth=2)

    nrow = len(variables_name)//ncol
    if  len(variables_name)% ncol != 0:
        nrow += 1

    gs = gridspec.GridSpec(nrow, ncol)
    gs.update(wspace = 0.4, hspace=0.5)

    nr = 0
    nc = 0
    for i in range(len(variables_name)):
        if nc == ncol:
            nr +=1
            nc = 0
        ax = plt.subplot(gs[nr, nc])
        nc +=1
        varprior = plotter.get_fit(prior_label).stan_variables()[variables_name[i]]
        varpost = plotter.get_fit(posterior_label).stan_variables()[variables_name[i]]
        xmin = np.min(varprior)
        xmax = np.max([np.max(varprior),np.max(varpost)])

        rect = [0.7,0.8,0.5,0.3]
        subax = add_subplot_axes(ax,rect)
        subax.set_yticks([])
        subax.set_xticks([])
        subax.text(0.5,0.4,variables_name2[i],horizontalalignment='center',verticalalignment='center',transform=subax.transAxes)

        counts, edge = np.histogram(varpost,bins=100,range=(xmin,xmax))
        xb = (edge[:-1] + edge[1:]) / 2
        ax.hist(xb,weights=counts/np.max(counts),histtype="step",color="firebrick",bins=100,range=(xmin,xmax))
        counts, edge = np.histogram(varprior,bins=100,range=(xmin,xmax))
        xb = (edge[:-1] + edge[1:]) / 2
        ax.hist(xb,weights=counts/np.max(counts),histtype="step",color="darkblue",bins=100,range=(xmin,xmax))
        ax.set_xlim(xmin,xmax)

        if ((xmin <= 1e-4) | (xmin>=1e4) | ((xmax>=1e4))):
            ss = xmax-xmin
            ax.set_xticks([xmin+0.15*ss,xmax-0.15*ss],
                         ['%.1E'%Decimal(xmin+0.15*ss),'%.1E'%Decimal(xmax-0.15*ss)])
        else:
            if xmin <=1e2:
                ax.set_xticks([np.round(xmin,1),np.round(xmin+(xmax-xmin)/2,1),np.round(xmax,1)])
            else:
                ax.set_xticks([np.round(xmin,0),np.round(xmin+(xmax-xmin)/2,0),np.round(xmax,0)])
        ax.set_yticks([])
    if savefig is not None:
        plt.savefig(savefig)
    plt.show()
    plt.rcdefaults()

In [None]:
from matplotlib.colors import LogNorm
from matplotlib.pyplot import *

def plot_par1_par2(plotter=None,posterior_label = None,pname1 = "",pname2 = "",bins=(50,50),savefig=None):
    rc('text', usetex=False)
    rc('font', family='serif', size=20)
    rc('figure', figsize=(12,8))
    rc('axes',linewidth=2)

    res =plotter.get_fit(posterior_label)

    gs = gridspec.GridSpec(2, 2)
    gs.update(wspace = 0.0, hspace=0.0)

    ax = plt.subplot(gs[1, 0])

    counts,xbins,ybins,_ = ax.hist2d(res.stan_variables()[pname1].flatten(),res.stan_variables()[pname2].flatten(),bins=bins,cmin=1)
    #ax.set_xlim(np.min(res.stan_variables()[pname2].flatten()))
    ax.set_ylabel(pname1)
    ax.set_xlabel(pname2)
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()


    ax = plt.subplot(gs[1, 1])
    FF = res.stan_variables()[pname2]
    ax.hist(FF,bins=100,range=(ylim[0],ylim[1]),histtype="step",color='k',density=True,lw=2,orientation="horizontal")
    ax.set_ylim(ylim)
    ax.set_yticks([])
    ax.set_xticks([])

    ax = plt.subplot(gs[0, 0])
    FF = res.stan_variables()[pname1]
    ax.hist(FF,bins=100,range=(xlim[0],xlim[1]),histtype="step",color='k',density=True,lw=2)
    ax.set_xlim(xlim)
    ax.set_yticks([])
    ax.set_xticks([])
    if savefig is not None:
        plt.savefig(savefig)
    plt.show()
    plt.rcdefaults()

In [None]:
from scipy import stats

def plot_PearsonCorr(plotter=None,posterior_label=None,variables_name=[],variables_name2=[],fontsize=10,savefig=None):
    corr_mat = np.zeros((len(variables_name),len(variables_name)))

    nv = len(variables_name)
    for j in range(len(variables_name)):
        nvar1 = variables_name[j] 
        varpost1 = (plotter.get_fit(posterior_label).stan_variables()[nvar1]).flatten()
    
        for i in range(len(variables_name)):
            nvar2 =  variables_name[i] 
            varpost2 = (plotter.get_fit(posterior_label).stan_variables()[nvar2]).flatten()
            corr_mat[j,i] = stats.pearsonr(varpost1, varpost2)[0]
    corr_mat2 = corr_mat.copy()
    corr_mat[np.arange(nv),np.arange(nv)] = np.zeros(nv)
    plt.matshow(corr_mat2,cmap="coolwarm",vmin = -np.max(np.abs(corr_mat)),vmax = np.max(np.abs(corr_mat)))
    plt.xticks(np.arange(len(variables_name2)),variables_name2,fontsize=fontsize,rotation=90)
    plt.yticks(np.arange(len(variables_name2)),variables_name2,fontsize=fontsize)

    cb = plt.colorbar()
    if savefig is not None:
        plt.savefig(savefig)
    plt.show()    

# Load model

In [None]:
from cmdstanpy import CmdStanModel
import baynes

# dic is optional, if you want to use it, it will overwrite default options
dic ={
    "cpp_options": {
      "STAN_THREADS": True,
      "jN": 4
    },
    "stanc_options": {
      "include-paths": "/home/installs/work/baynes/stan/include"
    }
}

model = CmdStanModel(stan_file="/home/matteo/Scrivania/lavoracci/Bayes/models/Ho_roi_shakeoff.stan",**baynes.get_config()["STAN_COMPILER_KWARGS"])
# uncomment the following line if you want to use dic options
#model = CmdStanModel(stan_file="/home/mborghesi/Desktop/stan/stan_models/Ho_par_roi.stan",**dic) 

# Load data

Load the data block for STAN.

The data block will be a python dictionary named "data"

## Option 1: From json

In [None]:
import json
# Opening JSON file 
f = open('/home/matteo/Scrivania/lavoracci/Juno/notebook/FixNuSpectra.json') 

# returns JSON object as a list 
data_2 = json.load(f) 
data = data_2.copy() #data is the dictionary with the info for the data block

In [None]:
# in these lines you can change the data code manually (optional)
par_value1 = 0.5

data["par_name1"] = par_value1

## Option 2: Custom (from txt)

At the end of this section, the important variables will be:

**COUNTS** array with the bin entries. Size (n)

**EDGES** array with the bin edges. Size (n+1)

In [None]:
# Load data from a txt file 
fdata = "MockAsimov.dat" #path for a txt file with the data. Data format: column 1 -> bin centers ; column 2 -> counts

energy_file,counts_file =np.loadtxt(fdata).T

step = energy_file[1]-energy_file[0]

plt.hist(energy_file,weights=counts_file,bins=counts_file.shape[0],histtype="step",color='k',range=(energy_file[0]-step/2,energy_file[-1]+step/2))

plt.yscale("log")
plt.ylabel("Counts / "+str(energy_file[1]-energy_file[0]) + " eV")
plt.xlabel("Energy [eV]")
plt.xlim(energy_file[0],energy_file[-1])
plt.yscale("log")
plt.show()

In [None]:
# Select data to fit

Emin = 2250
Emax = 3500

ii = np.where((energy_file>=Emin)&(energy_file<=Emax))[0]
step = energy_file[1]-energy_file[0]
bins=len(ii)
xhist = energy_file[ii]
Emin = energy_file[ii][0]
Emax = energy_file[ii][-1]
COUNTS = counts_file[ii]
EDGES = np.arange(xhist[0]-step/2,xhist[-1]+step,step)
COUNTS = COUNTS.astype(int)

plt.hist(xhist,weights=counts_file[ii],bins=len(ii),histtype="step",color='k',range=(xhist[0]-step/2,xhist[-1]+step/2))
plt.yscale("log")
plt.ylabel("Counts / "+str(step) + " eV")
plt.xlabel("Energy [eV]")
plt.xlim(energy_file[ii][0],energy_file[ii][-1])
plt.yscale("log")
plt.show()

In [None]:
p_value1 = 6
p_value1_std = 0.01
p_value2 = [33,66]
p_value2_std = [2,4]

data={'counts': COUNTS.tolist(),
      'x': EDGES.tolist(),
      'p_name1': p_value1,
      'p_name1_std':p_value1_std,
      'p_name2': p_value2,
      "p_name2_std": p_value2_std,
     }

# Prior predictive check

In [None]:
data["prior"] = 1
prior_fit = model.sample(data,
                         chains=4,
                         iter_warmup=100,
                         iter_sampling=1000,show_console=False, #set show_console = True for debugging
                         #sig_figs=9  # uncomment this line if your parameters have very small value.
                        )

plotter = FitPlotter(prior_fit, fit_title='prior_fit')  #create the plotter object for the prior data.
                                                        #The prior data will be stored in with the label "prior_fit"

In [None]:
name_generated_data = 'counts_rep' #the variable name in the STAN code for the generated data
name_y_data = "y"                  #the variable name in the STAN code for the y

plotter.predictive_check(name_generated_data,
                         data=data,
                         data_key=name_y_data)

In [None]:
#plotter.kde_plot(hue='variable')  #plot the prior distribution for all the parameters
                                   #to see all the options, check the help for the functions

# Posterior(s) evaluation

In [None]:
n_chains = 4
init_files = inits_from_priors(model, prior_fit, n_chains)
print(init_files)

In [None]:
fit_label = ""  #give a name for the fit. Optional

data['prior'] = 0
fit = model.sample(data,
                   chains=n_chains,
                   iter_warmup=500,
                   iter_sampling=1000,
                   save_warmup=True,
                   inits=init_files,
                   show_console=False,
                   #sig_figs=9
                   )
plotter.add_fit(fit, fit_title=fit_label)

print(fit.diagnose())
plotter.convergence_plot(initial_steps=100)

In [None]:
name_generated_data = 'counts_rep' #the variable name in the STAN code for the generated data
name_y_data = "y"                  #the variable name in the STAN code for the y

plotter.predictive_check(name_generated_data,
                         data=data,
                         data_key=name_y_data)

In [None]:
plot_fit_results(plotter,edges=EDGES,counts=COUNTS,savepath=None)

In [None]:
#plotter.kde_plot(hue='variable')  #plot the prior distribution for all the parameters
                                   #to see all the options, check the help for the functions

In [None]:
#plotter.pair_grid() #plot the triangular plot for all the posterios. Useful to check for collerations?
                                    #to see all the options, check the help for the functions

In [None]:
variables_name = ["p1","p2"] #variables name in STAN
variables_name2 = [r"$p_1$",r"p_2"] #name for display
spath = None #path for saving the figure. None to not save

plot_Prior_Post(variables_name,variables_name2,ncol=4,prior_label="prior_fit",posterior_label=None,savefig=spath)

In [None]:
variables_name = ["p1","p2"] #variables name in STAN
variables_name2 = [r"$p_1$",r"p_2"] #name for display
spath = None #path for saving the figure. None to not save

plot_PearsonCorr(plotter=plotter,variables_name=variables_name,variables_name2=variables_name2)

In [None]:
p1 = ""
p2 = ""
spath = None #path for saving the figure. None to not save

plot_par1_par2(plotter=plotter,posterior_label = None,pname1 = p1,pname2 = p2,bins=(50,50),savefig=None):

# Simple cmd for the plotter object

In [None]:
# to extract the fit results from the plotter object given a certain label
label = ""

results = plotter.get_fit(label)

In [None]:
# to get the sample from the posterior given a parameter name
pname = ""
variable = results.stan_variables()[pname]