# Profile Likelihood for the SARS-CoV-2 variant model

Compute the profile likelihood with data restricted such that $t \leq t_{\max}$.

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import re
import scipy.stats as sts
import xml.etree.ElementTree as ET
import scipy.stats as sts
import pickle
import glob
from scipy.interpolate import UnivariateSpline
from scipy.optimize import minimize_scalar, root_scalar


import sys, importlib
sys.path.append("..")
from evpytools import evplot
from evpytools import auxiliary as aux
from evpytools import definitions as defn
for mod in [evplot, aux, defn]:
    importlib.reload(mod)

In [None]:
plt.rcParams.update({'font.size' : 18})

In [None]:
dupl = 2 ## number of repeated LL estimates at end of IPF

sigmas = np.linspace(0.1, 0.7, 26)
tmaxs = np.linspace() ## TODO
filenames = [[f"../data/out/ipf_result-sars_model_sigma={sigma:g}.xml" 
              for sigma in sigmas] for tmax in txmaxs]

print(filenames)

In [None]:
## TODO: use glob and re to get sigmas and files

In [None]:
## extract final loglikes
LLs = []
LLvalids = []

for filename in filenames:
    tree = ET.parse(filename)
    root = tree.getroot()
    ## extract IPF steps
    iterf_steps = root.findall("iterated_filtering_step")
    ## get log-like traces
    ll_dicts = [xs.find("log_lik").attrib for xs in iterf_steps]
    ll_vals = [float(d["val"]) for d in ll_dicts]
    ll_valids = [True if d["finite"] == 'true' else False for d in ll_dicts]
    ## get final LL
    final_lls = ll_vals[-dupl:]
    final_lls_valid = ll_valids[-dupl:]
    ## add final LL to list
    LLs.append(final_lls)
    LLvalids.append(final_lls_valid)    

In [None]:
## export the LL values to a file
with open("../data/out/prof-lik-UK-N501Y.tsv", 'w') as f:
    for s, xs, bs in zip(sigmas, LLs, LLvalids):
        for x, b in zip(xs, bs):
            f.write(f"{s}\t{x}\t{b}\n")

In [None]:
# get median LLs

fsigmas = sigmas
fLLs = LLs

meanLLs = [np.mean(lls) for lls in fLLs]
loLLs = [np.percentile(lls, 25) for lls in fLLs]
hiLLs = [np.percentile(lls, 75) for lls in fLLs]


In [None]:
## make a likelihood profile graph

fig, ax = plt.subplots(1,1, figsize=(7,5))
ax.scatter(fsigmas, meanLLs, color='k', marker='o', label="mean")
## plot error bars for LL
for s, l, h in zip(fsigmas, loLLs, hiLLs):
    #ax.plot([s, s], [l, h], color='k')
    pass

## fit a spline through the points
bounds = [fsigmas[0], fsigmas[-1]]
cs = UnivariateSpline(fsigmas, meanLLs, s=1e3, ext='raise')
xs = np.linspace(*bounds, 250)
ax.plot(xs, cs(xs), label='spline', color='k', linewidth=2)


## find max of spline and CI
res = minimize_scalar(lambda x: -cs(x), bounds=bounds, method='bounded')
max_LL = -res.fun
sigma_opt = res.x

ax.axvline(sigma_opt, color='k', linestyle='--')
print(f"s_opt = {sigma_opt:0.2f}")
print(f"max LL = {max_LL:0.2f}")

DL = sts.chi2.ppf(0.95,1)/2

lres = root_scalar(lambda x: cs(x)-max_LL + DL, bracket=[fsigmas[0], sigma_opt])
rres = root_scalar(lambda x: cs(x)-max_LL + DL, bracket=[sigma_opt, fsigmas[-1]])

lCI = lres.root
rCI = rres.root

print(f"95% CI = [{lCI:0.2f}, {rCI:0.2f}]")

ax.axvspan(lCI, rCI, color='k', alpha=0.2, linewidth=0)

ax.set_xlabel("$s$")
ax.set_ylabel("log-likelihood")

#ax.legend()

fig.savefig("../data/out/profile-likelihood-s.pdf", bbox_inches='tight')