# Compute profile likelihood and make figure

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import re
import scipy.stats as sts
import xml.etree.ElementTree as ET
import scipy.stats as sts
import pickle
import glob
import csv
import string
from scipy.interpolate import UnivariateSpline
from scipy.optimize import minimize_scalar, root_scalar


import sys, importlib
sys.path.append("..")
from evpytools import evplot
from evpytools import auxiliary as aux
from evpytools import definitions as defn
for mod in [evplot, aux, defn]:
    importlib.reload(mod)

In [None]:
plt.rcParams.update({'font.size' : 16})

In [None]:
## import data
def import_ll_data(ll_file):
    with open(ll_file) as f:
        ll_table = [row.split('\t') for row in f.read().split('\n') if row != '']
    sigmas = aux.unique([float(row[0]) for row in ll_table])
    ll_vals = [[float(row[1]) for row in ll_table if float(row[0]) == sigma] 
               for sigma in sigmas]
    ll_oks = [[row[2] for row in ll_table if float(row[0]) == sigma] 
               for sigma in sigmas]
    return sigmas, ll_vals

In [None]:
## place in row-major order!

ll_files = [
    "../data/out/prof-lik-NL-D614G-wk.tsv",
    "../data/out/prof-lik_Netherlands-B.1.1.7.tsv",
    "../data/out/prof-lik_Netherlands-B.1.351.tsv",
    "../data/out/prof-lik-UK-D614G-wk.tsv",
    "../data/out/prof-lik_United_Kindom-B.1.1.7.tsv",
]

IDs = ["NL D614G", "NL B.1.1.7", "NL B.1.351", "UK D614G", "UK B.1.1.7"]

sigmass = []
LLss = []

for ll_file in ll_files:
    sigmas, LLs = import_ll_data(ll_file)
    sigmass.append(sigmas)
    LLss.append(LLs)


In [None]:
def plot_prof_lik(ax, sigmas, LLs, spline_s, maxDL=10):
    ## compute mean
    meanLLs = [np.mean(lls) for lls in LLs]
    ## filter
    emL = np.max(meanLLs)
    fidx = [i for i, l in enumerate(meanLLs) if l >= emL - maxDL]
    fsigmas = [sigmas[i] for i in fidx]
    fLLs = [meanLLs[i] for i in fidx]
    ## plot mean
    print(len(fsigmas), len(fLLs))
    ax.scatter(fsigmas, fLLs, s=10, color='k', marker='o', label="mean")
    ## fit a spline through the points
    bounds = [fsigmas[0], fsigmas[-1]]
    cs = UnivariateSpline(fsigmas, fLLs, s=spline_s, ext='raise')
    xs = np.linspace(*bounds, 250)
    ax.plot(xs, cs(xs), label='spline', color='k', linewidth=2)

    ## find max of spline and CI
    res = minimize_scalar(lambda x: -cs(x), bounds=bounds, method='bounded')
    max_LL = -res.fun
    sigma_opt = res.x

    ax.axvline(sigma_opt, color='k', linestyle='--')
    print(f"s_opt = {sigma_opt:0.2f}")
    print(f"max LL = {max_LL:0.2f}")

    DL = sts.chi2.ppf(0.95,1)/2
    
    try:
        lres = root_scalar(lambda x: cs(x)-max_LL + DL, bracket=[fsigmas[0], sigma_opt])
        rres = root_scalar(lambda x: cs(x)-max_LL + DL, bracket=[sigma_opt, fsigmas[-1]])
        
        lCI = lres.root
        rCI = rres.root

        print(f"95% CI = [{lCI:0.2f}, {rCI:0.2f}]")

        ax.axvspan(lCI, rCI, color='k', alpha=0.2, linewidth=0)
    except:
        print("unable to compute CI!")

In [None]:
## import other CIs

with open("../data/in/CrIs.tsv") as f:
    reader = csv.DictReader(f, delimiter='\t')
    rows = [row for row in reader]
    
est_dict = {}

for row in rows:
    ID = row["region"] + " " + row["variant"]
    med = float(row["median"])
    CI = [float(row["low"]), float(row["high"])]
    est_dict[ID] = {
        "med" : med,
        "CI" : CI
    }

In [None]:
## make a likelihood profile graph

numcols = 3
numrows = len(IDs) // numcols + (1 if len(IDs) % numcols != 0 else 0)

fig, axss = plt.subplots(numrows, numcols, figsize=(14,5), sharex="col")

axs = axss[::-1,:].flatten()

for i, ax in enumerate(axs):
    if i >= len(IDs):
        ax.axis("off")

for i, ID in enumerate(IDs):
    plot_prof_lik(axs[i], sigmass[i], LLss[i], 2e1)
    axs[i].set_title(IDs[i], fontsize="small")

## add secondary estimates

CIpos = 1.1

for i, ID in enumerate(IDs):
    ymin, ymax = axs[i].get_ylim()
    y = ymin + CIpos * (ymax - ymin)
    xhat = est_dict[ID]["med"]
    CI95 = est_dict[ID]["CI"]
    axs[i].plot(CI95, [y, y], color='tab:red', zorder=2)
    #CI90 = snd_CI90s[i]
    #ax.plot(CI90, [y, y], color='tab:red', linewidth=4, zorder=2)
    axs[i].scatter([xhat], [y], marker='o', color='tab:red', s=50, zorder=2)


for ax in axss[-1]:
    ax.set_xlabel("selection ($s$)")

fig.tight_layout()

fig.text(0, 0.5, "log-likelihood", rotation=90, va='center')

## add labels
subplot_labels = string.ascii_uppercase

for i, ax in enumerate(axss.flatten()[[0,1,3,4,5]]):
    ax.text(-0.15, 1.0, subplot_labels[i], fontsize=22, transform=ax.transAxes)
    
fig.savefig("../data/out/FigProfLik.pdf", bbox_inches='tight')