## estimates Ne from estimated case prevalence and incidence using equations from [Volz 2021](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3249372/) as a sanity check for our phylo Ne estimates

In [14]:


import baltic as bt
import pandas as pd
import numpy as np
import matplotlib as mpl
from matplotlib import pyplot as plt
from datetime import datetime as dt
from datetime import timedelta
import time
#import pymc3
import math
import arviz as az
#from hpd import hpd
import scipy.stats as stats
from io import StringIO
import altair as alt
from altair import datum
alt.data_transformers.disable_max_rows()
import seaborn as sns
from scipy.stats import pearsonr

from zipfile import ZipFile
import scipy as sp
import re


import sys, subprocess, glob, os, shutil, re, importlib
from subprocess import call
import imp


%matplotlib inline
import matplotlib as mpl
from matplotlib import pyplot as plt
import matplotlib.patheffects as path_effects
import matplotlib.lines as mlines
from matplotlib.font_manager import FontProperties
import matplotlib.colors as clr
from matplotlib import rc
import textwrap as textwrap
from textwrap import wrap


from scipy.special import binom

In [24]:
case_prevalence = pd.read_csv("../case-rt-analysis/estimates/case-prevalence-estimates_region.tsv", sep="\t", parse_dates= ["date"])
case_prevalence[case_prevalence.region == "North America"]

Unnamed: 0,date,region,median_prev,prev_upper_50,prev_lower_50
265,2022-05-10,North America,12.386408,14.179571,10.616728
266,2022-05-11,North America,11.050711,12.542950,9.756121
267,2022-05-12,North America,10.233192,11.425304,9.202360
268,2022-05-13,North America,9.854439,10.885749,8.982046
269,2022-05-14,North America,9.895592,10.821201,9.043947
...,...,...,...,...,...
522,2023-01-22,North America,68.484420,73.466300,62.998490
523,2023-01-23,North America,65.674750,71.132660,60.423622
524,2023-01-24,North America,63.023660,68.774090,57.736840
525,2023-01-25,North America,60.477660,66.589910,55.166687


In [25]:
case_incidence = pd.read_csv("../case-rt-analysis/estimates/case-incidence-estimates_region.tsv", sep="\t", parse_dates= ["date"])
case_incidence

Unnamed: 0,date,region,median_I_smooth,I_smooth_upper_50,I_smooth_lower_50
0,2022-05-07,Western Europe,1.395747,1.802982,1.111079
1,2022-05-08,Western Europe,1.478217,1.909468,1.176746
2,2022-05-09,Western Europe,1.618982,2.059157,1.305165
3,2022-05-10,Western Europe,2.106742,2.451840,1.851942
4,2022-05-11,Western Europe,2.728011,3.013691,2.483191
...,...,...,...,...,...
1241,2023-01-22,Central Europe,0.000085,0.004152,0.000003
1242,2023-01-23,Central Europe,0.000079,0.004024,0.000003
1243,2023-01-24,Central Europe,0.000072,0.003904,0.000002
1244,2023-01-25,Central Europe,0.000068,0.003748,0.000002


In [26]:
combined_df = pd.merge(case_prevalence, case_incidence)
combined_df

Unnamed: 0,date,region,median_prev,prev_upper_50,prev_lower_50,median_I_smooth,I_smooth_upper_50,I_smooth_lower_50
0,2022-05-07,Western Europe,7.685365,9.305990,6.569647,1.395747,1.802982,1.111079
1,2022-05-08,Western Europe,9.216010,10.660913,8.152115,1.478217,1.909468,1.176746
2,2022-05-09,Western Europe,10.836695,12.183267,9.772762,1.618982,2.059157,1.305165
3,2022-05-10,Western Europe,12.488087,13.840465,11.351680,2.106742,2.451840,1.851942
4,2022-05-11,Western Europe,14.187191,15.643207,12.904485,2.728011,3.013691,2.483191
...,...,...,...,...,...,...,...,...
1241,2023-01-22,Central Europe,0.000359,0.017725,0.000012,0.000085,0.004152,0.000003
1242,2023-01-23,Central Europe,0.000330,0.017247,0.000011,0.000079,0.004024,0.000003
1243,2023-01-24,Central Europe,0.000309,0.016600,0.000010,0.000072,0.003904,0.000002
1244,2023-01-25,Central Europe,0.000298,0.016127,0.000008,0.000068,0.003748,0.000002


In [27]:
combined_df["ne_median"] = np.log((combined_df.median_prev**2)/(2*combined_df.median_I_smooth))
combined_df["ne_median_lower_95"] = np.log((combined_df.prev_lower_50**2)/(2*combined_df.I_smooth_lower_50))
combined_df["ne_median_upper_95"] = np.log((combined_df.prev_upper_50**2)/(2*combined_df.I_smooth_upper_50))

In [28]:
combined_df.to_csv("../data/case_based_ne.csv")

In [29]:
domain = ['Central Europe', 'North America', 'South America' ,"Southern Europe", "Western Europe"]
range_ = ['#EEC060', '#2664A5', '#A76BB1', "#EEA160", "#356D4C"]


In [30]:
band3 = alt.Chart(combined_df).mark_area(interpolate='monotone', opacity = 0.3 ,color = "#f58518", clip = True).encode(
    alt.X('date:T', axis=alt.Axis(title="",tickCount = "month", grid=False,format="%B %Y")),
    alt.Y('ne_median_lower_95',axis=alt.Axis(title="Estimated log Ne", grid=False), scale=alt.Scale(domain=(0, 9))),
    alt.Y2('ne_median_upper_95' ),
    alt.Color("region:N",legend=alt.Legend(title= "Region", symbolSize = 150, labelFontSize = 20, titleFontSize = 20),  scale=alt.Scale(domain=domain, range=range_))
).properties(
    width=850,
    height=300)

line3 = alt.Chart(combined_df).mark_line(
    opacity=1, interpolate='monotone', color = "#f58518"
).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False)),
    alt.Y('ne_median', axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))), 
    alt.Color("region:N",  scale=alt.Scale(domain=domain, range=range_))
).properties(
    width=800,
    height=400).transform_filter(datum.ne_median >0)

case_prev_plot = band3 + line3
case_prev_plot