# Compute profile likelihood and make figure

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import re
import scipy.stats as sts
import xml.etree.ElementTree as ET
import scipy.stats as sts
import pickle
import glob
import csv
from scipy.interpolate import UnivariateSpline
from scipy.optimize import minimize_scalar, root_scalar


import sys, importlib
sys.path.append("..")
from evpytools import evplot
from evpytools import auxiliary as aux
from evpytools import definitions as defn
for mod in [evplot, aux, defn]:
    importlib.reload(mod)

In [None]:
plt.rcParams.update({'font.size' : 18})

In [None]:
## import data
def import_ll_data(ll_file):
    with open(ll_file) as f:
        ll_table = [row.split('\t') for row in f.read().split('\n') if row != '']
    sigmas = aux.unique([float(row[0]) for row in ll_table])
    ll_vals = [[float(row[1]) for row in ll_table if float(row[0]) == sigma] 
               for sigma in sigmas]
    ll_oks = [[row[2] for row in ll_table if float(row[0]) == sigma] 
               for sigma in sigmas]
    return sigmas, ll_vals

In [None]:
#ll_file_reg1 = "../data/out/prof-lik-UK-D614G-wk.tsv"
#ll_file_reg2 = "../data/out/prof-lik-NL-D614G-wk.tsv"

ll_file_reg1 = "../data/out/prof-lik-UK-B117-wk-long.tsv"
ll_file_reg2 = "../data/out/prof-lik-NL-B117-wk-long.tsv"


sigmas_reg1, LLs_reg1 = import_ll_data(ll_file_reg1)
sigmas_reg2, LLs_reg2 = import_ll_data(ll_file_reg2)

In [None]:
def plot_prof_lik(ax, sigmas, LLs, spline_s, bounds=None):
    if bounds is None:
        fLLs = LLs
        fsigmas = sigmas
    else:
        l, r = bounds
        fLLs = LLs[l:r]
        fsigmas = sigmas[l:r]
    ## compute mean
    meanLLs = [np.mean(lls) for lls in fLLs]
    loLLs = [np.percentile(lls, 25) for lls in fLLs]
    hiLLs = [np.percentile(lls, 75) for lls in fLLs]
    ## plot mean
    ax.scatter(fsigmas, meanLLs, color='k', marker='o', label="mean")
    ## plot error bars for LL
    for s, l, h in zip(fsigmas, loLLs, hiLLs):
        #ax.plot([s, s], [l, h], color='k')
        pass

    ## fit a spline through the points
    bounds = [fsigmas[0], fsigmas[-1]]
    cs = UnivariateSpline(fsigmas, meanLLs, s=spline_s, ext='raise')
    xs = np.linspace(*bounds, 250)
    ax.plot(xs, cs(xs), label='spline', color='k', linewidth=2)

    ## find max of spline and CI
    res = minimize_scalar(lambda x: -cs(x), bounds=bounds, method='bounded')
    max_LL = -res.fun
    sigma_opt = res.x

    ax.axvline(sigma_opt, color='k', linestyle='--')
    print(f"s_opt = {sigma_opt:0.2f}")
    print(f"max LL = {max_LL:0.2f}")

    DL = sts.chi2.ppf(0.95,1)/2
    
    try:
        lres = root_scalar(lambda x: cs(x)-max_LL + DL, bracket=[fsigmas[0], sigma_opt])
        rres = root_scalar(lambda x: cs(x)-max_LL + DL, bracket=[sigma_opt, fsigmas[-1]])
        
        lCI = lres.root
        rCI = rres.root

        print(f"95% CI = [{lCI:0.2f}, {rCI:0.2f}]")

        ax.axvspan(lCI, rCI, color='k', alpha=0.2, linewidth=0)
    except:
        print("unable to compute CI!")



In [None]:
## import other CIs

#variant = "D614G"
variant = "B.1.1.7"

with open("../data/in/estimates-popgen-NL-UK.tsv") as f:
    reader = csv.DictReader(f, delimiter='\t')
    rows = [row for row in reader if row["variant"] == variant]
    
regions = ["United Kingdom", "Netherlands"]

snd_estimates = [next(filter(lambda x: x["region"] == region, rows))
                 for region in regions]

snd_point_estimates = [float(row["estimate"]) for row in snd_estimates]
snd_CI95s = [(float(row["low95"]), float(row["high95"])) for row in snd_estimates]
snd_CI90s = [(float(row["low90"]), float(row["high90"])) for row in snd_estimates]

In [None]:
## make a likelihood profile graph

fig, axs = plt.subplots(1, 2, figsize=(14,5))

#D614G
#bounds1 = (7,-7)
#bounds2 = (6,-6)

#B117
bounds1 = (15,-5)
bounds2 = (None,-8)


plot_prof_lik(axs[0], sigmas_reg1, LLs_reg1, 10, bounds=bounds1)
plot_prof_lik(axs[1], sigmas_reg2, LLs_reg2, 10, bounds=bounds2)

## add secondary estimates

CIpos = 1.1

for i, ax in enumerate(axs):
    ymin, ymax = ax.get_ylim()
    y = ymin + CIpos * (ymax - ymin)
    xhat = snd_point_estimates[i]
    CI95 = snd_CI95s[i]
    ax.plot(CI95, [y, y], color='tab:red', zorder=2)
    CI90 = snd_CI90s[i]
    ax.plot(CI90, [y, y], color='tab:red', linewidth=4, zorder=2)
    ax.scatter([xhat], [y], marker='o', color='tab:red', s=50, zorder=2)


for ax in axs:
    ax.set_xlabel("$s$")

axs[0].set_ylabel("log-likelihood")

axs[0].set_title("United Kingdom")
axs[1].set_title("Netherlands")

fig.savefig("../data/out/figures/Fig2RegionsProfLik.pdf", bbox_inches='tight')