In [1]:
import numpy as np
import pandas as pd
import glob
import emcee
import corner
import scipy.stats
from scipy.ndimage import gaussian_filter1d

import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator

from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KernelDensity

from fit_just_early_lc import prep_light_curve, multifcqfid_lnlike_big_unc, multifcqfid_lnprior_big_unc, multifcqfid_lnposterior_big_unc, lnlike_big_unc

from multiprocessing import Pool
import time

from corner_hack import corner_hack
from light_curve_plot import f_t, plot_both_filt

In [2]:
%matplotlib notebook

In [3]:
info_path = "../../forced_lightcurves/sample_lc_v2/"
salt_df = pd.read_csv(info_path + "../../Nobs_cut_salt2_spec_subtype_pec.csv")

## Measure the Deviance Information Criterion

$$DIC = 2 \bar{D(\theta)} - D(\bar{\theta})$$

where, $D(\theta) = -2 \log P(x|\theta)$.

Thus, we need to calculate the mean posterior parameters, AND, the mean likelihood for the posterior parameters. This requires the `multifcqfid_lnlike_big_unc` function. 

In [44]:
thin_by = 100
rel_flux_cutoff = 0.4

sn = 'ZTF18abauprj'

h5_file = info_path + 'big_unc/{}_emcee_40_varchange.h5'.format(sn)
reader = emcee.backends.HDFBackend(h5_file)
nsteps = thin_by*np.shape(reader.get_chain())[0]
tau = reader.get_autocorr_time(tol=0)
burnin = int(5*np.max(tau))
samples = reader.get_chain(discard=burnin, thin=np.max([int(np.max(tau)), 1]), flat=True)
lnpost = reader.get_log_prob(discard=burnin, thin=np.max([int(np.max(tau)), 1]), flat=True)


t_max = float(salt_df['t0_g_adopted'][salt_df['name'] == sn].values)
z = float(salt_df['z_adopt'][salt_df['name'] == sn].values)
g_max = float(salt_df['fratio_gmax_2adam'][salt_df['name'] == sn].values)
r_max = float(salt_df['fratio_rmax_2adam'][salt_df['name'] == sn].values)

t_data, f_data, f_unc_data, fcqfid_data = prep_light_curve(info_path+"{}_force_phot.h5".format(sn),
                                                                     t_max=t_max, 
                                                                     z=z,
                                                                     g_max=g_max,
                                                                     r_max=r_max,
                                                                     rel_flux_cutoff=rel_flux_cutoff)


In [50]:
loglike_samples = np.zeros(len(samples))

for samp_num, sample in enumerate(samples):
    loglike_samples[samp_num] = multifcqfid_lnlike_big_unc(sample, f_data, t_data, f_unc_data, fcqfid_data)

In [53]:
dhat = -2*multifcqfid_lnlike_big_unc(np.mean(samples, axis=0), f_data, t_data, f_unc_data, fcqfid_data)

dbar = -2*np.mean(loglike_samples)

In [56]:
dic = 2*dbar - dhat
print(dic)

-359.9673406752178


#### What about for the $t^2$ model?

In [57]:
h5_file = info_path + 'big_unc/{}_emcee_40_tsquared.h5'.format(sn)
reader = emcee.backends.HDFBackend(h5_file)
nsteps = thin_by*np.shape(reader.get_chain())[0]
tau = reader.get_autocorr_time(tol=0)
burnin = int(5*np.max(tau))
samples_tsquared = reader.get_chain(discard=burnin, thin=np.max([int(np.max(tau)), 1]), flat=True)

In [59]:
loglike_samples_tsquared = np.zeros(len(samples))

for samp_num, sample in enumerate(samples_tsquared):
    loglike_samples_tsquared[samp_num] = multifcqfid_lnlike_big_unc(sample, f_data, t_data, f_unc_data, fcqfid_data, 
                                                                    prior='delta2')

In [81]:
dhat = -2*multifcqfid_lnlike_big_unc(np.mean(samples_tsquared, axis=0), f_data, t_data, f_unc_data, fcqfid_data, 
                                     prior='delta2')

dbar = np.mean(-2*loglike_samples_tsquared)

In [82]:
dic_tsquared = 2*dbar_tsquared - dhat_tsquared
print(dic_tsquared)

-311.7311584562079


### Loop over all SNe

In [6]:
salt_df.name.values

array(['ZTF18aailmnv', 'ZTF18aansqun', 'ZTF18aaoxryq', 'ZTF18aapqwyv',
       'ZTF18aapsedq', 'ZTF18aaqcozd', 'ZTF18aaqcqkv', 'ZTF18aaqcqvr',
       'ZTF18aaqcugm', 'ZTF18aaqffyp', 'ZTF18aaqnrum', 'ZTF18aaqqoqs',
       'ZTF18aarldnh', 'ZTF18aarqnje', 'ZTF18aasdted', 'ZTF18aasesgl',
       'ZTF18aaslhxt', 'ZTF18aatzygk', 'ZTF18aauhxce', 'ZTF18aaumeys',
       'ZTF18aaumlfl', 'ZTF18aaunfqq', 'ZTF18aauocnw', 'ZTF18aavrwhu',
       'ZTF18aavrzxp', 'ZTF18aawjywv', 'ZTF18aawpcel', 'ZTF18aawurud',
       'ZTF18aaxakhh', 'ZTF18aaxcntm', 'ZTF18aaxdrjn', 'ZTF18aaxqyki',
       'ZTF18aaxrvzj', 'ZTF18aaxsioa', 'ZTF18aaxvpsw', 'ZTF18aaxwjmp',
       'ZTF18aaydmkh', 'ZTF18aayjvve', 'ZTF18aaykjei', 'ZTF18aaytovs',
       'ZTF18aazabmh', 'ZTF18aazblzy', 'ZTF18aazcoob', 'ZTF18aazixbw',
       'ZTF18aazjztm', 'ZTF18aazsabq', 'ZTF18abatffv', 'ZTF18abauprj',
       'ZTF18abaxlpi', 'ZTF18abbpeqo', 'ZTF18abbvsiv', 'ZTF18abcecfi',
       'ZTF18abcflnz', 'ZTF18abckujg', 'ZTF18abckujq', 'ZTF18abclalx',
      

In [8]:
dic_uniformative_arr = np.zeros(len(salt_df))
dic_tsquared_arr = np.zeros(len(salt_df))
dic_alpha_r_plus_colors_arr = np.zeros(len(salt_df))

def get_dic(sn):

#     sn, bw = tup
    sn_num = np.where(salt_df.name == sn)[0]
    
    h5_file = info_path + 'big_unc/{}_emcee_40_varchange.h5'.format(sn)
    reader = emcee.backends.HDFBackend(h5_file)
    thin_by = 100
    nsteps = thin_by*np.shape(reader.get_chain())[0]
    tau = reader.get_autocorr_time(tol=0)
    burnin = int(5*np.max(tau))
    samples = reader.get_chain(discard=burnin, thin=np.max(int(np.max(tau)), 0), flat=True)

    rel_flux_cutoff = 0.4
    t_max = float(salt_df['t0_g_adopted'][salt_df['name'] == sn].values)
    z = float(salt_df['z_adopt'][salt_df['name'] == sn].values)
    g_max = float(salt_df['fratio_gmax_2adam'][salt_df['name'] == sn].values)
    r_max = float(salt_df['fratio_rmax_2adam'][salt_df['name'] == sn].values)

    t_data, f_data, f_unc_data, fcqfid_data = prep_light_curve(info_path+"{}_force_phot.h5".format(sn),
                                                                         t_max=t_max, 
                                                                         z=z,
                                                                         g_max=g_max,
                                                                         r_max=r_max,
                                                                         rel_flux_cutoff=rel_flux_cutoff)

    loglike_samples = np.zeros(len(samples))

    for samp_num, sample in enumerate(samples):
        loglike_samples[samp_num] = multifcqfid_lnlike_big_unc(sample, f_data, t_data, f_unc_data, fcqfid_data)
    
    dhat = -2*multifcqfid_lnlike_big_unc(np.mean(samples, axis=0), f_data, t_data, f_unc_data, fcqfid_data)
    dbar = -2*np.mean(loglike_samples)
    dic = 2*dbar - dhat
    
    h5_file = info_path + 'big_unc/{}_emcee_40_tsquared.h5'.format(sn)
    reader = emcee.backends.HDFBackend(h5_file)
    nsteps = thin_by*np.shape(reader.get_chain())[0]
    tau = reader.get_autocorr_time(tol=0)
    burnin = int(5*np.max(tau))
    samples_tsquared = reader.get_chain(discard=burnin, thin=np.max([int(np.max(tau)), 1]), flat=True)
    
    loglike_samples_tsquared = np.zeros(len(samples_tsquared))

    for samp_num, sample in enumerate(samples_tsquared):
        loglike_samples_tsquared[samp_num] = multifcqfid_lnlike_big_unc(sample, f_data, t_data, f_unc_data, fcqfid_data, 
                                                                        prior='delta2')

    dhat_tsquared = -2*multifcqfid_lnlike_big_unc(np.mean(samples_tsquared, axis=0), f_data, t_data, f_unc_data, fcqfid_data, 
                                     prior='delta2')

    dbar_tsquared = np.mean(-2*loglike_samples_tsquared)
    dic_tsquared = 2*dbar_tsquared - dhat_tsquared

    dic_uniformative_arr[sn_num] = dic
    dic_tsquared_arr[sn_num] = dic_tsquared
    
    h5_file = info_path + 'big_unc/{}_emcee_40_alpha_r_plus_colors.h5'.format(sn)
    reader = emcee.backends.HDFBackend(h5_file)
    nsteps = thin_by*np.shape(reader.get_chain())[0]
    tau = reader.get_autocorr_time(tol=0)
    burnin = int(5*np.max(tau))
    samples_alpha_r_plus_colors = reader.get_chain(discard=burnin, thin=np.max([int(np.max(tau)), 1]), flat=True)
    
    loglike_samples_alpha_r_plus_colors = np.zeros(len(samples_alpha_r_plus_colors))

    for samp_num, sample in enumerate(samples_alpha_r_plus_colors):
        loglike_samples_alpha_r_plus_colors[samp_num] = multifcqfid_lnlike_big_unc(sample, f_data, t_data, f_unc_data, fcqfid_data, 
                                                                        prior='alpha_r_plus_colors')

    dhat_alpha_r_plus_colors = -2*multifcqfid_lnlike_big_unc(np.mean(samples_alpha_r_plus_colors, axis=0), f_data, t_data, f_unc_data, fcqfid_data, 
                                     prior='alpha_r_plus_colors')

    dbar_alpha_r_plus_colors = np.mean(-2*loglike_samples_alpha_r_plus_colors)
    dic_alpha_r_plus_colors = 2*dbar_alpha_r_plus_colors - dhat_alpha_r_plus_colors

    dic_uniformative_arr[sn_num] = dic
    dic_alpha_r_plus_colors_arr[sn_num] = dic_alpha_r_plus_colors
    
    return (dic, dic_tsquared, dic_alpha_r_plus_colors)

pool = Pool()

dic_res = pool.map(get_dic, salt_df.name.values)

  weights=f_zp_unc_tonight[g_tonight]**(-2))
  avg = np.multiply(a, wgt, dtype=result_dtype).sum(axis)/scl
  weights=f_zp_unc_tonight[~g_tonight]**(-2))
  cutoff_g = np.where((mean_rf < 0) & (mean_g > 0) &
  (mean_g < rel_flux_cutoff))
  cutoff_r = np.where((mean_rf < 0) & (mean_r > 0) &
  (mean_r < rel_flux_cutoff))


In [9]:
dic_res

[(2391.37094383815, 2385.5704184161373, 2384.9620750441736),
 (2282.044274500203, 2284.5527852241644, 2283.3395393737474),
 (2096.2180971099406, 2087.6788427537213, 2087.516524410349),
 (336.36228278976444, 334.976637042548, 333.56727951462807),
 (1078.5785920779106, 1080.3530304886694, 1080.9015452575334),
 (387.02084208438134, 423.51141844478434, 422.5140924639711),
 (697.6830174330751, 727.245491474233, 727.999523091374),
 (649.065605342614, 656.1243182525432, 657.581877853469),
 (311.625077148738, 322.23017372686843, 320.34139503420715),
 (1078.3051159124343, 831.613790969587, 1024.6979184610743),
 (-2479.8573355069384, -330.08225263390045, -353.61364039653324),
 (759.2004825329516, 761.4699438968629, 762.4323403392162),
 (568.9416539055276, 568.8121356086867, 567.3181499805844),
 (705.7979210783728, 703.7007665305946, 702.8231337090077),
 (-43.918765940944496, -2.9922319751385444, -35.197905850376664),
 (1092.6726041258212, 1091.6150688359426, 1092.454842461883),
 (415.83446988984

In [17]:
dic_uninformative_arr = np.array(dic_res)[:,0]
dic_tsquared_arr = np.array(dic_res)[:,1]
dic_alpha_r_plus_colors_arr = np.array(dic_res)[:,2]


dic_df = pd.DataFrame(salt_df.name.values, columns=['ztf_name'])
dic_df['dic_uninformative'] = dic_uninformative_arr
dic_df['dic_delta2'] = dic_tsquared_arr
dic_df['dic_alpha_r_plus'] = dic_alpha_r_plus_colors_arr

In [25]:
len(np.where(np.exp((dic_tsquared_arr - dic_alpha_r_plus_colors_arr)/2) > 30)[0])

12

In [19]:
dic_evidence = np.array(['very strong']*len(salt_df))
dic_evidence[np.where((np.exp((dic_tsquared_arr - dic_uninformative_arr)/2) <= 1))] = 'negative'
dic_evidence[np.where((np.exp((dic_tsquared_arr - dic_uninformative_arr)/2) > 1) & 
                      (np.exp((dic_tsquared_arr - dic_uninformative_arr)/2) <= 3))] = 'weak'
dic_evidence[np.where((np.exp((dic_tsquared_arr - dic_uninformative_arr)/2) > 3) & 
                      (np.exp((dic_tsquared_arr - dic_uninformative_arr)/2) <= 10))] = 'substantial'
dic_evidence[np.where((np.exp((dic_tsquared_arr - dic_uninformative_arr)/2) > 10) & 
                      (np.exp((dic_tsquared_arr - dic_uninformative_arr)/2) <= 30))] = 'strong'
dic_evidence[np.where((np.exp((dic_tsquared_arr - dic_uninformative_arr)/2) > 30) & 
                      (np.exp((dic_tsquared_arr - dic_uninformative_arr)/2) <= 100))] = 'very strong'
dic_evidence[np.where((np.exp((dic_tsquared_arr - dic_uninformative_arr)/2) > 100))] = 'decisive'

In [20]:
dic_evidence

array(['negative', 'substantial', 'negative', 'negative', 'weak',
       'decisive', 'decisive', 'very strong', 'decisive', 'negative',
       'decisive', 'substantial', 'negative', 'negative', 'decisive',
       'negative', 'weak', 'decisive', 'decisive', 'decisive', 'negative',
       'substantial', 'decisive', 'very strong', 'negative', 'weak',
       'negative', 'negative', 'negative', 'substantial', 'substantial',
       'negative', 'negative', 'substantial', 'negative', 'negative',
       'very strong', 'very strong', 'weak', 'negative', 'very strong',
       'negative', 'negative', 'substantial', 'negative', 'very strong',
       'negative', 'decisive', 'weak', 'negative', 'decisive', 'negative',
       'decisive', 'very strong', 'negative', 'negative', 'decisive',
       'substantial', 'negative', 'negative', 'negative', 'weak',
       'negative', 'negative', 'negative', 'negative', 'negative',
       'negative', 'negative', 'negative', 'negative', 'weak', 'negative',
       'n

In [21]:
np.unique(dic_evidence, return_counts=True)

(array(['decisive', 'negative', 'strong', 'substantial', 'very strong',
        'weak'], dtype='<U11'), array([19, 75,  1,  8, 10, 14]))

In [26]:
dic_df['dic_evidence'] = dic_evidence
dic_df.to_csv('dic_results.csv', index=False)

## Analyze which SN prefer $t^2$ model

In [27]:
dic_df = pd.read_csv('dic_results.csv')
dic_df.head()

Unnamed: 0,ztf_name,dic_uninformative,dic_delta2,dic_alpha_r_plus,dic_evidence
0,ZTF18aailmnv,2391.370944,2385.570418,2384.962075,negative
1,ZTF18aansqun,2282.044275,2284.552785,2283.339539,substantial
2,ZTF18aaoxryq,2096.218097,2087.678843,2087.516524,negative
3,ZTF18aapqwyv,336.362283,334.976637,333.56728,negative
4,ZTF18aapsedq,1078.578592,1080.35303,1080.901545,weak


In [28]:
res = pd.read_csv('results_40percent.csv')

In [29]:
decisive = np.where(dic_df.dic_evidence == 'decisive')
vstrong = np.where(dic_df.dic_evidence == 'very strong')
strong = np.where(dic_df.dic_evidence == 'strong')
substantial = np.where(dic_df.dic_evidence == 'substantial')
weak = np.where(dic_df.dic_evidence == 'weak')

res[['ztf_name','final_selection', 't_rise_95', 't_rise_05', 'n_nights_gr_post']].iloc[decisive]

Unnamed: 0,ztf_name,final_selection,t_rise_95,t_rise_05,n_nights_gr_post
5,b'ZTF18aaqcozd',0,16.020238,10.333284,2.0
6,b'ZTF18aaqcqkv',0,15.345109,12.517173,1.0
8,b'ZTF18aaqcugm',0,16.58778,13.317806,3.0
10,b'ZTF18aaqnrum',0,17.217793,11.10746,0.0
14,b'ZTF18aasdted',1,18.655766,18.332722,3.0
17,b'ZTF18aatzygk',0,17.492604,14.062936,1.0
18,b'ZTF18aauhxce',0,18.036325,14.6115,1.0
19,b'ZTF18aaumeys',0,16.897107,12.317115,1.0
22,b'ZTF18aauocnw',0,16.192567,14.91537,3.0
47,b'ZTF18abauprj',1,19.38963,19.059857,7.0


In [30]:
res_tsquared = pd.read_csv('results_40_tsquared.csv')

In [31]:
colors_sample = np.where( (((dic_df.dic_evidence == 'decisive') | (dic_df.dic_evidence == 'very strong')) 
                           & (res.final_selection == 1)))

tsquared_sample = np.where( (((dic_df.dic_evidence == 'decisive') | (dic_df.dic_evidence == 'very strong')) 
                           & (res.final_selection == 0) & (res_tsquared.final_selection == 1)) | 
                            (((dic_df.dic_evidence != 'decisive') & (dic_df.dic_evidence != 'very strong')) 
                           & (res_tsquared.final_selection == 1)))

The upshot here is that the very best models (i.e. low $z$, high $N_\mathrm{det}$, and low $CR_{90}$) and the very worst, opposite of this, are the ones that show significant evidence for a departure from $\alpha = 2$ according to the DIC. These models, therefore, should not be "lumped in" with a uniform $\alpha = 2$ analysis. 