In [1]:
import numpy as np
import pandas as pd
import glob
import emcee
import corner
import scipy.stats
from scipy.ndimage import gaussian_filter1d

import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator

from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KernelDensity

from fit_just_early_lc import prep_light_curve, multifcqfid_lnlike_big_unc, multifcqfid_lnprior_big_unc, multifcqfid_lnposterior_big_unc, lnlike_big_unc

from multiprocessing import Pool
import time

from corner_hack import corner_hack
from light_curve_plot import f_t, plot_both_filt

In [2]:
%matplotlib notebook

In [3]:
info_path = "../../forced_lightcurves/sample_lc_v2/"
salt_df = pd.read_csv(info_path + "../../Nobs_cut_salt2_spec_subtype_pec.csv")

## Measure the Deviance Information Criterion

$$DIC = 2 \bar{D(\theta)} - D(\bar{\theta})$$

where, $D(\theta) = -2 \log P(x|\theta)$.

Thus, we need to calculate the mean posterior parameters, AND, the mean likelihood for the posterior parameters. This requires the `multifcqfid_lnlike_big_unc` function. 

In [44]:
thin_by = 100
rel_flux_cutoff = 0.4

sn = 'ZTF18abauprj'

h5_file = info_path + 'big_unc/{}_emcee_40_varchange.h5'.format(sn)
reader = emcee.backends.HDFBackend(h5_file)
nsteps = thin_by*np.shape(reader.get_chain())[0]
tau = reader.get_autocorr_time(tol=0)
burnin = int(5*np.max(tau))
samples = reader.get_chain(discard=burnin, thin=np.max([int(np.max(tau)), 1]), flat=True)
lnpost = reader.get_log_prob(discard=burnin, thin=np.max([int(np.max(tau)), 1]), flat=True)


t_max = float(salt_df['t0_g_adopted'][salt_df['name'] == sn].values)
z = float(salt_df['z_adopt'][salt_df['name'] == sn].values)
g_max = float(salt_df['fratio_gmax_2adam'][salt_df['name'] == sn].values)
r_max = float(salt_df['fratio_rmax_2adam'][salt_df['name'] == sn].values)

t_data, f_data, f_unc_data, fcqfid_data = prep_light_curve(info_path+"{}_force_phot.h5".format(sn),
                                                                     t_max=t_max, 
                                                                     z=z,
                                                                     g_max=g_max,
                                                                     r_max=r_max,
                                                                     rel_flux_cutoff=rel_flux_cutoff)


In [50]:
loglike_samples = np.zeros(len(samples))

for samp_num, sample in enumerate(samples):
    loglike_samples[samp_num] = multifcqfid_lnlike_big_unc(sample, f_data, t_data, f_unc_data, fcqfid_data)

In [53]:
dhat = -2*multifcqfid_lnlike_big_unc(np.mean(samples, axis=0), f_data, t_data, f_unc_data, fcqfid_data)

dbar = -2*np.mean(loglike_samples)

In [56]:
dic = 2*dbar - dhat
print(dic)

-359.9673406752178


#### What about for the $t^2$ model?

In [57]:
h5_file = info_path + 'big_unc/{}_emcee_40_tsquared.h5'.format(sn)
reader = emcee.backends.HDFBackend(h5_file)
nsteps = thin_by*np.shape(reader.get_chain())[0]
tau = reader.get_autocorr_time(tol=0)
burnin = int(5*np.max(tau))
samples_tsquared = reader.get_chain(discard=burnin, thin=np.max([int(np.max(tau)), 1]), flat=True)

In [59]:
loglike_samples_tsquared = np.zeros(len(samples))

for samp_num, sample in enumerate(samples_tsquared):
    loglike_samples_tsquared[samp_num] = multifcqfid_lnlike_big_unc(sample, f_data, t_data, f_unc_data, fcqfid_data, 
                                                                    prior='delta2')

In [81]:
dhat = -2*multifcqfid_lnlike_big_unc(np.mean(samples_tsquared, axis=0), f_data, t_data, f_unc_data, fcqfid_data, 
                                     prior='delta2')

dbar = np.mean(-2*loglike_samples_tsquared)

In [82]:
dic_tsquared = 2*dbar_tsquared - dhat_tsquared
print(dic_tsquared)

-311.7311584562079


### Loop over all SNe

In [19]:
dic_uniformative_arr = np.zeros(len(salt_df))
dic_tsquared_arr = np.zeros(len(salt_df))

def get_dic(sn):

#     sn, bw = tup
    sn_num = np.where(salt_df.name == sn)[0]
    
    h5_file = info_path + 'big_unc/{}_emcee_40_varchange.h5'.format(sn)
    reader = emcee.backends.HDFBackend(h5_file)
    thin_by = 100
    nsteps = thin_by*np.shape(reader.get_chain())[0]
    tau = reader.get_autocorr_time(tol=0)
    burnin = int(5*np.max(tau))
    samples = reader.get_chain(discard=burnin, thin=np.max(int(np.max(tau)), 0), flat=True)

    rel_flux_cutoff = 0.4
    t_max = float(salt_df['t0_g_adopted'][salt_df['name'] == sn].values)
    z = float(salt_df['z_adopt'][salt_df['name'] == sn].values)
    g_max = float(salt_df['fratio_gmax_2adam'][salt_df['name'] == sn].values)
    r_max = float(salt_df['fratio_rmax_2adam'][salt_df['name'] == sn].values)

    t_data, f_data, f_unc_data, fcqfid_data = prep_light_curve(info_path+"{}_force_phot.h5".format(sn),
                                                                         t_max=t_max, 
                                                                         z=z,
                                                                         g_max=g_max,
                                                                         r_max=r_max,
                                                                         rel_flux_cutoff=rel_flux_cutoff)

    loglike_samples = np.zeros(len(samples))

    for samp_num, sample in enumerate(samples):
        loglike_samples[samp_num] = multifcqfid_lnlike_big_unc(sample, f_data, t_data, f_unc_data, fcqfid_data)
    
    dhat = -2*multifcqfid_lnlike_big_unc(np.mean(samples, axis=0), f_data, t_data, f_unc_data, fcqfid_data)
    dbar = -2*np.mean(loglike_samples)
    dic = 2*dbar - dhat
    
    h5_file = info_path + 'big_unc/{}_emcee_40_tsquared.h5'.format(sn)
    reader = emcee.backends.HDFBackend(h5_file)
    nsteps = thin_by*np.shape(reader.get_chain())[0]
    tau = reader.get_autocorr_time(tol=0)
    burnin = int(5*np.max(tau))
    samples_tsquared = reader.get_chain(discard=burnin, thin=np.max([int(np.max(tau)), 1]), flat=True)
    
    loglike_samples_tsquared = np.zeros(len(samples_tsquared))

    for samp_num, sample in enumerate(samples_tsquared):
        loglike_samples_tsquared[samp_num] = multifcqfid_lnlike_big_unc(sample, f_data, t_data, f_unc_data, fcqfid_data, 
                                                                        prior='delta2')

    dhat_tsquared = -2*multifcqfid_lnlike_big_unc(np.mean(samples_tsquared, axis=0), f_data, t_data, f_unc_data, fcqfid_data, 
                                     prior='delta2')

    dbar_tsquared = np.mean(-2*loglike_samples_tsquared)
    dic_tsquared = 2*dbar_tsquared - dhat_tsquared

    dic_uniformative_arr[sn_num] = dic
    dic_tsquared_arr[sn_num] = dic_tsquared
    
    return (dic, dic_tsquared)

pool = Pool()

dic_res = pool.map(get_dic, salt_df.name.values)

  weights=f_zp_unc_tonight[g_tonight]**(-2))
  avg = np.multiply(a, wgt, dtype=result_dtype).sum(axis)/scl
  weights=f_zp_unc_tonight[~g_tonight]**(-2))
  cutoff_g = np.where((mean_rf < 0) & (mean_g > 0) &
  (mean_g < rel_flux_cutoff))
  cutoff_r = np.where((mean_rf < 0) & (mean_r > 0) &
  (mean_r < rel_flux_cutoff))


In [20]:
dic_res

[(2391.37094383815, 2385.5704184161373),
 (2282.044274500203, 2284.5527852241644),
 (2096.2180971099406, 2087.6788427537213),
 (336.36228278976444, 334.976637042548),
 (1078.5785920779106, 1080.3530304886694),
 (387.02084208438134, 423.51141844478434),
 (697.6830174330751, 727.245491474233),
 (649.065605342614, 656.1243182525432),
 (311.625077148738, 322.23017372686843),
 (1078.3051159124343, 831.613790969587),
 (-2479.8573355069384, -330.08225263390045),
 (759.2004825329516, 761.4699438968629),
 (568.9416539055276, 568.8121356086867),
 (705.7979210783728, 703.7007665305946),
 (-43.918765940944496, -2.9922319751385444),
 (1092.6726041258212, 1091.6150688359426),
 (415.83446988984724, 417.09988044083303),
 (-24.884973075872608, 427.16673735792006),
 (644.0142240870101, 656.2194238015306),
 (-52.47501489751147, 118.60717679127586),
 (689.7124427026056, 689.0301266393567),
 (900.3281533688225, 904.004077327386),
 (561.6535632403404, 573.8158556999831),
 (587.9837888493776, 596.28141240619

In [21]:
dic_uniformative_arr = np.array(dic_res)[:,0]
dic_tsquared_arr = np.array(dic_res)[:,1]


dic_df = pd.DataFrame(salt_df.name.values, columns=['ztf_name'])
dic_df['dic_uninformative'] = dic_uniformative_arr
dic_df['dic_delta2'] = dic_tsquared_arr

In [41]:
dic_evidence = np.array(['very strong']*len(salt_df))
dic_evidence[np.where((np.exp((dic_tsquared_arr - dic_uniformative_arr)/2) <= 1))] = 'negative'
dic_evidence[np.where((np.exp((dic_tsquared_arr - dic_uniformative_arr)/2) > 1) & 
                      (np.exp((dic_tsquared_arr - dic_uniformative_arr)/2) <= 3))] = 'weak'
dic_evidence[np.where((np.exp((dic_tsquared_arr - dic_uniformative_arr)/2) > 3) & 
                      (np.exp((dic_tsquared_arr - dic_uniformative_arr)/2) <= 10))] = 'substantial'
dic_evidence[np.where((np.exp((dic_tsquared_arr - dic_uniformative_arr)/2) > 10) & 
                      (np.exp((dic_tsquared_arr - dic_uniformative_arr)/2) <= 30))] = 'strong'
dic_evidence[np.where((np.exp((dic_tsquared_arr - dic_uniformative_arr)/2) > 30) & 
                      (np.exp((dic_tsquared_arr - dic_uniformative_arr)/2) <= 100))] = 'very strong'
dic_evidence[np.where((np.exp((dic_tsquared_arr - dic_uniformative_arr)/2) > 100))] = 'decisive'

In [42]:
dic_evidence

array(['negative', 'substantial', 'negative', 'negative', 'weak',
       'decisive', 'decisive', 'very strong', 'decisive', 'negative',
       'decisive', 'substantial', 'negative', 'negative', 'decisive',
       'negative', 'weak', 'decisive', 'decisive', 'decisive', 'negative',
       'substantial', 'decisive', 'very strong', 'negative', 'weak',
       'negative', 'negative', 'negative', 'substantial', 'substantial',
       'negative', 'negative', 'substantial', 'negative', 'negative',
       'very strong', 'very strong', 'weak', 'negative', 'very strong',
       'negative', 'negative', 'substantial', 'negative', 'very strong',
       'negative', 'decisive', 'weak', 'negative', 'decisive', 'negative',
       'decisive', 'very strong', 'negative', 'negative', 'decisive',
       'substantial', 'negative', 'negative', 'negative', 'weak',
       'negative', 'negative', 'negative', 'negative', 'negative',
       'negative', 'negative', 'negative', 'negative', 'weak', 'negative',
       'n

In [43]:
np.unique(dic_evidence, return_counts=True)

(array(['decisive', 'negative', 'strong', 'substantial', 'very strong',
        'weak'], dtype='<U11'), array([19, 75,  1,  8, 10, 14]))

In [44]:
dic_df['dic_evidence'] = dic_evidence
dic_df.to_csv('dic_results.csv', index=False)