In [1]:
import baltic as bt
import pandas as pd
import numpy as np
import matplotlib as mpl
from matplotlib import pyplot as plt
from datetime import datetime as dt
from datetime import timedelta
import time
#import pymc3
import math
import arviz as az
import re
#from hpd import hpd
import scipy.stats as stats
from io import StringIO
import altair as alt
from altair import datum
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')


### Load Log files

In [2]:
from datetime import date
current_date = str(date.today())

In [3]:
log_file_path = "../../../mpox_rhino/550_glm_region_air_travel.log"


In [4]:
domain = ['CentralEurope', 'NorthAmerica', 'SouthAmerica' ,"SouthernEurope", "WesternEurope"]
range_ = ['#EEC060', '#2664A5', '#A76BB1', "#EEA160", "#356D4C"]

## we can then estimate the transmission rates via the change in Ne's over time plus the uninfectious rate

In [5]:
def read_in_Ne_changes_mascot(log_file_path):
    
    Ne_skyline_dict = {"sample":[]}
    
    with open(log_file_path, "r") as infile:
        line_number = 0
        for line in infile:
            line_number += 1
            if not line.startswith("#"):  # log combiner will sometimes put the entire xml at the start of the log file
                # use the first line to find the migration rate columns
            
            # use the first line to find the migration rate columns
                if "posterior" in line:
                    all_cols = line.split("\t")
                    Ne_column_indices = []   # list to store column indices
                    Nes_key = {}   # dictionary to store the column index to map to column name

                    for i in range(len(all_cols)):
                        col = all_cols[i]
                        if "Ne." in col:
                            Ne_column_indices.append(i)

                    # make an empty dictionary to store Nes and generate dictionary to convert index to name
                    for n in Ne_column_indices:
                        name = line.split("\t")[n]
                        deme = name.split(".")[1]# the syntax here is "NeLog.state01" where 0 is deme and 1 is interval 1
                        interval = name.split(".")[2]
                       
                        Nes_key[n] = name
                        Ne_skyline_dict[name] = []


                # read in actual parameter estimates and store in dictionary
                else:
                    sample = line.split("\t")[0]
                    Ne_skyline_dict["sample"].append(sample)

                    for index in Ne_column_indices:
                        name = Nes_key[index]
                        Ne_skyline_dict[name].append(line.split("\t")[index])
                    
                
    return(Ne_skyline_dict)

In [6]:
# make a new dataframe that summarizes the 95% HPD estimate with mean for each deme and interval 
def generate_summary_df(input_df):
    
    
    new_df = pd.DataFrame()
    
    for i in input_df.columns.tolist():
        if "Ne" in i:
            deme = i.split(".")[1]
            #print(deme)
            interval = i.split(".")[2]
            #print(interval)
            #print(i)
            next_interval = int(interval)+7
            local_series = input_df[i].astype('float').to_numpy()
            #print(local_series)
            mean_log = local_series.mean()
            mean_linear = 10**mean_log
            hpd_95 = az.hdi(local_series, 0.95)
            lower_hpd_log_95 = hpd_95[0]
            lower_hpd_linear_95 = 10**lower_hpd_log_95
            upper_hpd_log_95 = hpd_95[1]
            upper_hpd_linear_95 = 10**upper_hpd_log_95
            hpd_50 = az.hdi(local_series, 0.50)
            lower_hpd_log_50 = hpd_50[0]
            lower_hpd_linear_50 = 10**lower_hpd_log_50
            upper_hpd_log_50 = hpd_50[1]
            upper_hpd_linear_50 = 10**upper_hpd_log_50
            
            try:
                next_local_series = input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float').to_numpy()
                diff_series = np.subtract(local_series, next_local_series)
                #print(local_series)
                #print(next_local_series)
                #print(diff_series)
                diff_mean_log = diff_series.mean()
                diff_hpd_95 = az.hdi(diff_series, 0.95)
                diff_lower_hpd_log_95 = diff_hpd_95[0]
                diff_lower_hpd_linear_95 = math.exp(diff_lower_hpd_log_95)
                diff_upper_hpd_log_95 = diff_hpd_95[1]
                diff_upper_hpd_linear_95 = math.exp(diff_upper_hpd_log_95)
                diff_hpd_50 = az.hdi(diff_series, 0.50)
                diff_lower_hpd_log_50 = diff_hpd_50[0]
                diff_lower_hpd_linear_50 = math.exp(diff_lower_hpd_log_50)
                diff_upper_hpd_log_50 = diff_hpd_50[1]
                diff_upper_hpd_linear_50 = math.exp(diff_upper_hpd_log_50)
            except KeyError:
                pass   
            
            try:
                local_df = pd.DataFrame.from_dict({"deme":deme, "interval":interval, "mean_Ne_log":mean_log,"mean_Ne_linear":mean_linear, 
                                                   "upper_hpd_log_95":upper_hpd_log_95,"lower_hpd_log_95":[lower_hpd_log_95], 
                                                   "upper_hpd_log_50":upper_hpd_log_50,"lower_hpd_log_50":lower_hpd_log_50,
                                                   "upper_hpd_linear":upper_hpd_linear_95,"lower_hpd_linear":lower_hpd_linear_95,
                                                   "diff_mean_Ne_log":diff_mean_log, 
                                                   "diff_upper_hpd_log_95":diff_upper_hpd_log_95,"diff_lower_hpd_log_95":diff_lower_hpd_log_95, 
                                                   "diff_upper_hpd_log_50":diff_upper_hpd_log_50,"diff_lower_hpd_log_50":diff_lower_hpd_log_50,
                                                   "diff_upper_hpd_linear":diff_upper_hpd_linear_95,"diff_lower_hpd_linear":diff_lower_hpd_linear_95,
                                                   "diff_upper_hpd_linear_50":diff_upper_hpd_linear_50,"diff_lower_hpd_linear_50":diff_lower_hpd_linear_50})
                new_df = new_df.append(local_df)
                #print(new_df)
            except:
                pass
            
    return(new_df)

In [7]:
Ne_skyline = read_in_Ne_changes_mascot(log_file_path)

In [8]:
Ne_df = pd.DataFrame.from_dict(Ne_skyline)
print(len(Ne_df))
burnin_percent = 0.3

rows_to_remove = int(len(Ne_df)* burnin_percent)
Ne_df = Ne_df.iloc[rows_to_remove:]

print(len(Ne_df))
Ne_df = Ne_df.reset_index()
Ne_df

42556
29790


Unnamed: 0,index,sample,Ne.CentralEurope.0,Ne.CentralEurope.1,Ne.CentralEurope.2,Ne.CentralEurope.3,Ne.CentralEurope.4,Ne.CentralEurope.5,Ne.CentralEurope.6,Ne.CentralEurope.7,...,Ne.WesternEurope.29,Ne.WesternEurope.30,Ne.WesternEurope.31,Ne.WesternEurope.32,Ne.WesternEurope.33,Ne.WesternEurope.34,Ne.WesternEurope.35,Ne.WesternEurope.36,Ne.WesternEurope.37,Ne.WesternEurope.38
0,12766,12766000,0.30872008655006156,0.30872008655006156,0.30872008655006156,0.30872008655006156,0.30872008655006156,0.30872008655006156,0.30872008655006156,0.30872008655006156,...,3.3772182312674413,3.041120797027443,2.411673959623484,1.3594823542842498,0.6589614676471273,0.6589614676471273,0.6589614676471273,0.6589614676471273,0.6589614676471273,0.6589614676471273
1,12767,12767000,0.32015164491599063,0.32015164491599063,0.32015164491599063,0.32015164491599063,0.32015164491599063,0.32015164491599063,0.32015164491599063,0.32015164491599063,...,3.4284076270364667,3.087290681957899,2.4484189139707655,1.3803787068539453,0.6692022584942309,0.6692022584942309,0.6692022584942309,0.6692022584942309,0.6692022584942309,0.6692022584942309
2,12768,12768000,0.34953509063803945,0.34953509063803945,0.34953509063803945,0.34953509063803945,0.34953509063803945,0.34953509063803945,0.34953509063803945,0.34953509063803945,...,3.5609247532292714,3.213009859865512,2.559364238872221,1.4587143336666406,0.7169678582734167,0.7169678582734167,0.7169678582734167,0.7169678582734167,0.7169678582734167,0.7169678582734167
3,12769,12769000,0.389541751348119,0.389541751348119,0.389541751348119,0.389541751348119,0.389541751348119,0.389541751348119,0.389541751348119,0.389541751348119,...,3.393750110094218,3.0799406261580087,2.48497678076162,1.4618521066713264,0.7478173193188962,0.7478173193188962,0.7478173193188962,0.7478173193188962,0.7478173193188962,0.7478173193188962
4,12770,12770000,0.3466734189008246,0.3466734189008246,0.3466734189008246,0.3466734189008246,0.3466734189008246,0.3466734189008246,0.3466734189008246,0.3466734189008246,...,3.533953315487492,3.190765040558096,2.545334258611481,1.4559286495108634,0.7188474995791275,0.7188474995791275,0.7188474995791275,0.7188474995791275,0.7188474995791275,0.7188474995791275
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29785,42551,42551000,0.21476944429280612,0.21476944429280612,0.21476944429280612,0.21476944429280612,0.21476944429280612,0.21476944429280612,0.21476944429280612,0.21476944429280612,...,3.890568020458064,3.424635053413683,2.5825979187459907,1.285648414087344,0.5325973873163788,0.5325973873163788,0.5325973873163788,0.5325973873163788,0.5325973873163788,0.5325973873163788
29786,42552,42552000,0.22660529046430206,0.22660529046430206,0.22660529046430206,0.22660529046430206,0.22660529046430206,0.22660529046430206,0.22660529046430206,0.22660529046430206,...,3.7860998681444653,3.343544348430756,2.5396693125331526,1.2869843857648082,0.545276683287044,0.545276683287044,0.545276683287044,0.545276683287044,0.545276683287044,0.545276683287044
29787,42553,42553000,0.2344531617795271,0.2344531617795271,0.2344531617795271,0.2344531617795271,0.2344531617795271,0.2344531617795271,0.2344531617795271,0.2344531617795271,...,4.616199532821874,4.047078418588466,3.0249999698976398,1.4731699420050026,0.5935804065978246,0.5935804065978246,0.5935804065978246,0.5935804065978246,0.5935804065978246,0.5935804065978246
29788,42554,42554000,0.2484790550934474,0.2484790550934474,0.2484790550934474,0.2484790550934474,0.2484790550934474,0.2484790550934474,0.2484790550934474,0.2484790550934474,...,3.547700644334594,3.152255241063952,2.4270278887752945,1.271783211427467,0.562120159842907,0.562120159842907,0.562120159842907,0.562120159842907,0.562120159842907,0.562120159842907


## calculating transmission rate

In [9]:
def generate_summary_diff_df(input_df):
    
    
    new_df = pd.DataFrame()
   
    for i in input_df.columns.tolist():
        if "Ne" in i:
            deme = i.split(".")[1]
            interval = i.split(".")[2]
            next_interval = int(interval)+2 #averaging over three weeks to reduce noise
            local_series = input_df[i].astype('float').to_numpy()
           
            try:
                new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
            
            
            except KeyError:
                pass 
            
            
    return(new_df)

In [10]:
ne_diff_summary = generate_summary_diff_df(Ne_df)

  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(in

  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(in

  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(in

  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(in

  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(in

In [11]:
ne_diff_summary

Unnamed: 0,Ne.CentralEurope.diff.0,Ne.CentralEurope.diff.1,Ne.CentralEurope.diff.2,Ne.CentralEurope.diff.3,Ne.CentralEurope.diff.4,Ne.CentralEurope.diff.5,Ne.CentralEurope.diff.6,Ne.CentralEurope.diff.7,Ne.CentralEurope.diff.8,Ne.CentralEurope.diff.9,...,Ne.WesternEurope.diff.27,Ne.WesternEurope.diff.28,Ne.WesternEurope.diff.29,Ne.WesternEurope.diff.30,Ne.WesternEurope.diff.31,Ne.WesternEurope.diff.32,Ne.WesternEurope.diff.33,Ne.WesternEurope.diff.34,Ne.WesternEurope.diff.35,Ne.WesternEurope.diff.36
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,8.738481,7.295944,8.755013,20.933175,33.732694,18.829050,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,8.736461,7.294257,8.752989,20.928336,33.724897,18.824697,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,8.570582,7.155761,8.586796,20.530970,33.084562,18.467273,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,8.088176,6.752990,8.103478,19.375359,31.222357,17.427820,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,8.515924,7.110126,8.532035,20.400036,32.873569,18.349500,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29785,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.633625,8.878240,10.653743,25.473025,41.048419,22.912570,0.0,0.0,0.0,0.0
29786,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.362262,8.651673,10.381866,24.822970,40.000891,22.327857,0.0,0.0,0.0,0.0
29787,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.968426,9.157773,10.989178,26.275047,42.340836,23.633977,0.0,0.0,0.0,0.0
29788,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,9.851800,8.225478,9.870439,23.600151,38.030384,21.227951,0.0,0.0,0.0,0.0


In [12]:
uninfectious_rate = 365/4.5

#taken from https://www.medrxiv.org/content/10.1101/2022.08.17.22278897v1.full.pdf


In [13]:
incubation_period = 365/8

In [14]:
seir_growth_rate = ((ne_diff_summary*2 + uninfectious_rate + incubation_period)**2 - (incubation_period- uninfectious_rate)**2)/(4*incubation_period)



seir_growth_rate



Unnamed: 0,Ne.CentralEurope.diff.0,Ne.CentralEurope.diff.1,Ne.CentralEurope.diff.2,Ne.CentralEurope.diff.3,Ne.CentralEurope.diff.4,Ne.CentralEurope.diff.5,Ne.CentralEurope.diff.6,Ne.CentralEurope.diff.7,Ne.CentralEurope.diff.8,Ne.CentralEurope.diff.9,...,Ne.WesternEurope.diff.27,Ne.WesternEurope.diff.28,Ne.WesternEurope.diff.29,Ne.WesternEurope.diff.30,Ne.WesternEurope.diff.31,Ne.WesternEurope.diff.32,Ne.WesternEurope.diff.33,Ne.WesternEurope.diff.34,Ne.WesternEurope.diff.35,Ne.WesternEurope.diff.36
0,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,107.058335,102.544324,107.110597,148.863156,199.753196,141.184616,81.111111,81.111111,81.111111,81.111111
1,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,107.051951,102.539100,107.104199,148.845275,199.720008,141.168934,81.111111,81.111111,81.111111,81.111111
2,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,106.528252,102.110525,106.579390,147.380394,197.003647,139.883944,81.111111,81.111111,81.111111,81.111111
3,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,105.012098,100.868932,105.060034,143.159598,189.206138,136.178793,81.111111,81.111111,81.111111,81.111111
4,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,106.355955,101.969491,106.406728,146.899225,196.112531,139.461760,81.111111,81.111111,81.111111,81.111111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29785,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,113.127292,107.500519,113.192561,166.091431,232.065406,156.263680,81.111111,81.111111,81.111111,81.111111
29786,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,112.248629,106.784117,112.311999,163.569119,227.294754,154.059691,81.111111,81.111111,81.111111,81.111111
29787,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,114.215814,108.387502,114.283443,169.228928,238.017614,159.003563,81.111111,81.111111,81.111111,81.111111
29788,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,110.604522,105.442587,110.664352,158.874586,218.451013,149.954375,81.111111,81.111111,81.111111,81.111111


In [15]:
#ne_diff_summary += uninfectious_rate

In [583]:
ne_diff_summary

Unnamed: 0,Ne.CentralEurope.diff.0,Ne.CentralEurope.diff.1,Ne.CentralEurope.diff.2,Ne.CentralEurope.diff.3,Ne.CentralEurope.diff.4,Ne.CentralEurope.diff.5,Ne.CentralEurope.diff.6,Ne.CentralEurope.diff.7,Ne.CentralEurope.diff.8,Ne.CentralEurope.diff.9,...,Ne.WesternEurope.diff.27,Ne.WesternEurope.diff.28,Ne.WesternEurope.diff.29,Ne.WesternEurope.diff.30,Ne.WesternEurope.diff.31,Ne.WesternEurope.diff.32,Ne.WesternEurope.diff.33,Ne.WesternEurope.diff.34,Ne.WesternEurope.diff.35,Ne.WesternEurope.diff.36
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.035771,8.379079,10.054758,24.040857,38.740557,21.624359,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.107570,8.439026,10.126693,24.212852,39.017718,21.779066,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,9.667543,8.071637,9.685833,23.158759,37.319103,20.830926,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,8.580881,7.164360,8.597115,20.555643,33.124321,18.489465,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,9.498845,7.930788,9.516816,22.754641,36.667889,20.467428,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11788,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.532291,8.793634,10.552217,25.230278,40.657246,22.694224,0.0,0.0,0.0,0.0
11789,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.996835,9.181492,11.017640,26.343101,42.450500,23.695190,0.0,0.0,0.0,0.0
11790,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.026321,8.371189,10.045289,24.018217,38.704075,21.603995,0.0,0.0,0.0,0.0
11791,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,9.654151,8.060456,9.672415,23.126678,37.267406,20.802070,0.0,0.0,0.0,0.0


In [584]:
seir_growth_rate.filter(regex='Ne.NorthAmerica.diff.2')

Unnamed: 0,Ne.NorthAmerica.diff.2,Ne.NorthAmerica.diff.20,Ne.NorthAmerica.diff.21,Ne.NorthAmerica.diff.22,Ne.NorthAmerica.diff.23,Ne.NorthAmerica.diff.24,Ne.NorthAmerica.diff.25,Ne.NorthAmerica.diff.26,Ne.NorthAmerica.diff.27,Ne.NorthAmerica.diff.28,Ne.NorthAmerica.diff.29
0,53.573107,97.422942,100.192666,113.047965,117.586858,106.818110,106.295682,103.565597,86.300076,115.081863,339.007224
1,53.394654,97.544635,100.335920,113.294226,117.870542,107.013876,106.487261,103.735425,86.337735,115.344829,341.464434
2,54.492473,96.799942,99.459472,111.788955,116.137026,105.816747,105.315690,102.696666,86.107058,113.737657,326.541800
3,57.246005,94.972315,97.310464,108.112277,111.908004,102.887492,102.448522,100.152409,85.538618,109.814256,291.089273
4,54.915982,96.515149,99.124414,111.214380,115.475646,105.359469,104.868146,102.299726,86.018697,113.124320,320.907385
...,...,...,...,...,...,...,...,...,...,...,...
11788,52.344415,98.265950,101.185292,114.756132,119.555259,108.175360,107.623850,104.742697,86.560658,116.906182,356.177786
11789,51.206270,99.057740,102.118139,116.365228,121.410876,109.452493,108.873497,105.849640,86.804788,118.625275,372.619150
11790,53.596617,97.406929,100.173816,113.015569,117.549541,106.792354,106.270476,103.543253,86.295119,115.047270,338.684431
11791,54.526041,96.777319,99.432854,111.743291,116.084457,105.780412,105.280129,102.665129,86.100042,113.688910,326.092756


In [576]:
for i in range(190,252, 1):

    seir_growth_rate = seir_growth_rate.drop(columns="Ne.NorthAmerica.diff."+str(i))

KeyError: "['Ne.NorthAmerica.diff.190'] not found in axis"

In [82]:
# make a new dataframe that summarizes the 95% HPD estimate with mean for each deme and interval 
def generate_summary_df(input_df):
    
    
    new_df = pd.DataFrame()

    for i in input_df.columns.tolist():
        if "Ne" in i:
            deme = i.split(".")[1]
            interval = i.split(".")[3]
            local_series = input_df[i].astype('float').to_numpy()
            mean_percent = local_series.mean()
            hpd_95 = az.hdi(local_series, 0.95)
            lower_hpd_log_95 = hpd_95[0]
            upper_hpd_log_95 = hpd_95[1]
            hpd_50 = az.hdi(local_series, 0.50)
            lower_hpd_log_50 = hpd_50[0]
            upper_hpd_log_50 = hpd_50[1]
            

            
            
            try:
                local_df = pd.DataFrame.from_dict({"deme": deme, "interval":interval, "mean_percent":mean_percent, 
                                                   "upper_hpd_log_95":upper_hpd_log_95,"lower_hpd_log_95":[lower_hpd_log_95], 
                                                   "upper_hpd_log_50":upper_hpd_log_50,"lower_hpd_log_50":lower_hpd_log_50})
                new_df = new_df.append(local_df)
            except:
                pass
            
    return(new_df)

In [83]:
#just checking to make sure the changes in Ne look okay
test_north = generate_summary_df(seir_growth_rate)

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

In [84]:
test_north

Unnamed: 0,deme,interval,mean_percent,upper_hpd_log_95,lower_hpd_log_95,upper_hpd_log_50,lower_hpd_log_50
0,CentralEurope,0,81.111111,81.111111,81.111111,81.111111,81.111111
0,CentralEurope,1,81.111111,81.111111,81.111111,81.111111,81.111111
0,CentralEurope,2,81.111111,81.111111,81.111111,81.111111,81.111111
0,CentralEurope,3,81.111111,81.111111,81.111111,81.111111,81.111111
0,CentralEurope,4,81.111111,81.111111,81.111111,81.111111,81.111111
...,...,...,...,...,...,...,...
0,WesternEurope,32,148.783623,159.966994,137.233499,153.038567,145.239677
0,WesternEurope,33,81.111111,81.111111,81.111111,81.111111,81.111111
0,WesternEurope,34,81.111111,81.111111,81.111111,81.111111,81.111111
0,WesternEurope,35,81.111111,81.111111,81.111111,81.111111,81.111111


In [85]:
test_north['days'] = test_north.interval.astype(int) *7
test_north['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - test_north.days.map(timedelta)
test_north.date = test_north.date.astype(str)

In [86]:
line = alt.Chart(test_north).mark_area().encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title="transmission rate"), scale = alt.Scale(zero= False)),
    alt.Y2('upper_hpd_log_50'),
    color=alt.Color('deme:N')
).properties(
    width=850,
    height=300
)

band = alt.Chart(test_north).mark_area(
    opacity=0.3
).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('lower_hpd_log_95'),
    alt.Y2('upper_hpd_log_95'),
    color=alt.Color('deme:N')
).properties(
    width=850,
    height=300
)

band + line

## calculating backward migration rates

In [16]:
def read_in_forward_migration_rates_mascot(log_file_path):
    
    mig_rates_dict = {"sample":[]}
    
    with open(log_file_path, "r") as infile:
        line_number = 0
        for line in infile:
            #print(line_number)
            line_number += 1
            if not line.startswith("#"):  # log combiner will sometimes put the entire xml at the start of the log file
                # use the first line to find the migration rate columns
                
            # use the first line to find the migration rate columns
                if "posterior" in line:
                    all_cols = line.split("\t")
                    mig_column_indices = []   # list to store column indices
                    mig_key = {}   # dictionary to store the column index to map to column name

                    for i in range(len(all_cols)):
                        col = all_cols[i]
                        if "mig." in col:
                            mig_column_indices.append(i)

                    # make an empty dictionary to store Nes and generate dictionary to convert index to name
                    for n in mig_column_indices:
                        name = line.split("\t")[n]
                        deme = name.split(".")[1]# the syntax here is "NeLog.state01" where 0 is deme and 1 is interval 1
                        #interval = name.split(".")[2]
                       
                        mig_key[n] = name
                        mig_rates_dict[name] = []


                # read in actual parameter estimates and store in dictionary
                else:
                    sample = line.split("\t")[0]
                    mig_rates_dict["sample"].append(sample)

                    for index in mig_column_indices:
                        name = mig_key[index]
                        mig_rates_dict[name].append(line.split("\t")[index])
                    
                
                
                
    return(mig_rates_dict)

In [17]:
migration_rates_f = read_in_forward_migration_rates_mascot(log_file_path)

In [18]:
mig_df_f = pd.DataFrame.from_dict(migration_rates_f)


In [19]:
burnin_percent = 0.3
print(len(mig_df_f))
rows_to_remove = int(len(mig_df_f)* burnin_percent)
mig_df_f = mig_df_f.iloc[rows_to_remove:]

print(len(mig_df_f))
mig_df_f = mig_df_f.reset_index()
mig_df_f.head()

42556
29790


Unnamed: 0,index,sample,mig.CentralEurope_to_NorthAmerica,mig.CentralEurope_to_SouthAmerica,mig.CentralEurope_to_SouthernEurope,mig.CentralEurope_to_WesternEurope,mig.NorthAmerica_to_CentralEurope,mig.NorthAmerica_to_SouthAmerica,mig.NorthAmerica_to_SouthernEurope,mig.NorthAmerica_to_WesternEurope,...,mig.SouthAmerica_to_SouthernEurope,mig.SouthAmerica_to_WesternEurope,mig.SouthernEurope_to_CentralEurope,mig.SouthernEurope_to_NorthAmerica,mig.SouthernEurope_to_SouthAmerica,mig.SouthernEurope_to_WesternEurope,mig.WesternEurope_to_CentralEurope,mig.WesternEurope_to_NorthAmerica,mig.WesternEurope_to_SouthAmerica,mig.WesternEurope_to_SouthernEurope
0,12766,12766000,0.0651351326459637,0.0193728846675638,0.1571694646659934,0.2875067054083743,0.0698615201606602,0.6417971627970338,0.7030399773110344,1.16771462196812,...,0.4266628225373586,0.2769797015560402,0.1546767254467141,0.6765163211182026,0.4041797525417092,1.7952766601064083,0.2916661606756312,1.1498869022852285,0.2862723890064533,1.827313424610531
1,12767,12767000,0.0663521134895491,0.0196953022633373,0.159785192875611,0.2922916004984549,0.0710242062424596,0.6524784166089517,0.7147404784551302,1.167788385349544,...,0.4337636546441406,0.2815893986138705,0.1572509676825202,0.6877753963410914,0.4109064050929942,1.825154956211112,0.2965202804366891,1.1690241540062878,0.2910367417078519,1.834548742074589
2,12768,12768000,0.0246340398777874,0.0053750606259789,0.0915318257653782,0.205088223049778,0.0269898916571821,0.5327116666213948,0.5075995952693206,0.9462579203483,...,0.2782946147692488,0.1594487987903423,0.0898638011457256,0.4856439586890299,0.2600197262269369,2.1733465581758797,0.2081439132098567,0.9436975947078216,0.1666837193175954,2.196219659933963
3,12769,12769000,0.0417768133864789,0.0101791291961917,0.1170327597832199,0.2320088878060854,0.0453282979345586,0.5987801775162657,0.6502227008552155,1.150537322793801,...,0.3613068975879204,0.2216911510918702,0.1134131057126247,0.6235990319143301,0.3434934052886247,1.945716604399183,0.2348526776437594,1.1492340897552589,0.2306499371786323,1.980816797285208
4,12770,12770000,0.0295751650853831,0.006590670419187,0.0879429586665478,0.1819331616387966,0.0321855277505725,0.4977970203365375,0.5430833935630704,0.9960459255608516,...,0.2913449232869638,0.1732994618864006,0.0858158499400331,0.5200479922690016,0.2759286126768389,1.75688596863055,0.1843094649558234,0.9945695378809478,0.1808499163763917,1.7908420397208216


In [20]:
def calc_backwards_mig_rates(mig_df_f, targetName):    
    mig_rates = {}
    target = targetName
    target_columns = []
    ne_interval= 0
    mig_list = []

    for name in mig_df_f.columns:
        if target in name.split("_")[0]:
            target_columns.append(name)

    for name in Ne_df.columns:
        if target in name:
            ne_interval+=1

    for interval in range(0, ne_interval,1):
        mig_rates[str(interval)+ "." + "bmr" + "." +  str(target)] = []
        for index_1, row_1 in mig_df_f.iterrows():
            mig_list = []
            for direction in target_columns:
                other_loc = direction.split("_")[2]
                mig_rate_b = pd.to_numeric(row_1[direction]) * ((pd.to_numeric(Ne_df.loc[index_1,"Ne."+ str(target) + "." + str(interval)]))/(pd.to_numeric(Ne_df.loc[index_1, "Ne."+ str(other_loc) + "." + str(interval)])))
                #print(mig_rate_b)
                mig_list.append(mig_rate_b)
            combined_mig = np.sum(np.array(mig_list))
            #print(combined_mig)
            mig_rates[str(interval)+ "." + "bmr" + "." + str(target)].append(combined_mig)

    return(mig_rates) 


In [21]:
unique_column= []
unique_target = []
for name in mig_df_f.columns:
    try:

        if name.split("_")[0].split(".")[1] not in unique_target:
            unique_column.append(name)
            unique_target.append(name.split("_")[0].split(".")[1])
    except:
        pass



In [22]:
mig_rates = []
for target in unique_target:
    mig_rates.append(calc_backwards_mig_rates(mig_df_f, target))
    


In [23]:
mr_b_df = pd.DataFrame()
for x in mig_rates:
    x_df = pd.DataFrame(x)
    mr_b_df = pd.concat([mr_b_df, x_df], axis=1)


In [24]:
mr_b_df

Unnamed: 0,0.bmr.CentralEurope,1.bmr.CentralEurope,2.bmr.CentralEurope,3.bmr.CentralEurope,4.bmr.CentralEurope,5.bmr.CentralEurope,6.bmr.CentralEurope,7.bmr.CentralEurope,8.bmr.CentralEurope,9.bmr.CentralEurope,...,29.bmr.WesternEurope,30.bmr.WesternEurope,31.bmr.WesternEurope,32.bmr.WesternEurope,33.bmr.WesternEurope,34.bmr.WesternEurope,35.bmr.WesternEurope,36.bmr.WesternEurope,37.bmr.WesternEurope,38.bmr.WesternEurope
0,0.141677,0.169012,0.143346,0.120394,0.114632,0.105038,0.092698,0.111809,0.096931,0.072207,...,8.000181,8.304060,14.112389,9.213188,7.600430,7.600430,7.600430,7.600430,7.600430,7.600430
1,0.145229,0.173807,0.146543,0.122536,0.116725,0.107120,0.094889,0.114467,0.098909,0.073630,...,7.808612,8.114703,13.799362,9.010776,7.371246,7.371246,7.371246,7.371246,7.371246,7.371246
2,0.096196,0.115906,0.096064,0.079706,0.075759,0.070064,0.063024,0.076469,0.065192,0.048329,...,6.171183,6.384224,10.930898,7.332693,7.121689,7.121689,7.121689,7.121689,7.121689,7.121689
3,0.125459,0.148200,0.126212,0.106409,0.101701,0.094165,0.084483,0.100843,0.087640,0.066462,...,6.532804,6.803917,11.633148,7.850149,6.988877,6.988877,6.988877,6.988877,6.988877,6.988877
4,0.087540,0.104854,0.087813,0.073265,0.069768,0.064397,0.057625,0.069591,0.059796,0.044584,...,5.823270,6.066783,10.805934,7.113971,6.375786,6.375786,6.375786,6.375786,6.375786,6.375786
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29785,0.106050,0.133921,0.106402,0.086328,0.080665,0.072629,0.062957,0.079975,0.066298,0.045862,...,8.171035,8.372676,13.686211,8.523680,7.388226,7.388226,7.388226,7.388226,7.388226,7.388226
29786,0.089632,0.111564,0.090673,0.074253,0.069863,0.062818,0.054138,0.067602,0.057104,0.040294,...,7.334968,7.511937,13.201964,7.854720,5.744720,5.744720,5.744720,5.744720,5.744720,5.744720
29787,0.064119,0.081624,0.064480,0.051988,0.048635,0.043556,0.037447,0.047626,0.039497,0.027167,...,6.924053,7.082439,13.422848,7.640926,5.599167,5.599167,5.599167,5.599167,5.599167,5.599167
29788,0.103542,0.127534,0.104468,0.085907,0.081114,0.073491,0.064024,0.079239,0.067210,0.048137,...,7.497675,7.724013,13.770137,8.402331,6.518233,6.518233,6.518233,6.518233,6.518233,6.518233


In [25]:
# make a new dataframe that summarizes the 95% HPD estimate with mean for each deme and interval 
def generate_summary_df(input_df):
    
    count = 0
    new_df = pd.DataFrame()

    for i in input_df.columns.tolist():
        #if count %7 == 0:
        deme = i.split(".")[2]
        interval = i.split(".")[0]
        local_series = input_df[i].astype('float').to_numpy()
        mean_percent = local_series.mean()
        hpd_95 = az.hdi(local_series, 0.95)
        lower_hpd_log_95 = hpd_95[0]
        upper_hpd_log_95 = hpd_95[1]
        hpd_50 = az.hdi(local_series, 0.50)
        lower_hpd_log_50 = hpd_50[0]
        upper_hpd_log_50 = hpd_50[1]




        try:
            local_df = pd.DataFrame.from_dict({"deme": deme, "interval":interval, "mean_percent":mean_percent, 
                                               "upper_hpd_log_95":upper_hpd_log_95,"lower_hpd_log_95":[lower_hpd_log_95], 
                                               "upper_hpd_log_50":upper_hpd_log_50,"lower_hpd_log_50":lower_hpd_log_50})
            new_df = new_df.append(local_df)
        except:
            pass
        #count +=1  
    return(new_df)

In [26]:
south_mrb_df = generate_summary_df(mr_b_df)

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)


In [27]:
south_mrb_df[south_mrb_df.deme == "WesternEurope"]

Unnamed: 0,deme,interval,mean_percent,upper_hpd_log_95,lower_hpd_log_95,upper_hpd_log_50,lower_hpd_log_50
0,WesternEurope,0,2.082575,2.983542,1.3304,2.153521,1.65704
0,WesternEurope,1,1.481973,2.093356,0.968127,1.545128,1.193765
0,WesternEurope,2,2.571498,3.718529,1.652841,2.644937,2.027473
0,WesternEurope,3,3.820241,5.746131,2.423916,3.931349,2.982996
0,WesternEurope,4,3.811707,5.60718,2.375068,3.911638,2.979859
0,WesternEurope,5,3.522578,5.241959,2.255519,3.589953,2.739557
0,WesternEurope,6,2.852448,4.255922,1.791617,2.912014,2.218319
0,WesternEurope,7,2.508446,3.742934,1.569673,2.56429,1.953246
0,WesternEurope,8,3.471217,5.220798,2.196429,3.519263,2.673265
0,WesternEurope,9,4.317803,6.548883,2.679685,4.37355,3.293098


In [28]:
south_mrb_df['days'] = south_mrb_df.interval.astype(int)  *7
south_mrb_df['date'] = dt.strptime("2023-01-06",  "%Y-%m-%d") - south_mrb_df.days.map(timedelta)
south_mrb_df.date = south_mrb_df.date.astype(str)



In [29]:
south_mrb_df = south_mrb_df[south_mrb_df.date > "2022-05-01"]

In [30]:
south_mrb_plot = alt.Chart(south_mrb_df).mark_area(interpolate='monotone', opacity = 0.5, color = "orange").encode(
    alt.X('date:T',axis=alt.Axis(title=None, grid=False, format="%B %Y")),
    alt.Y('upper_hpd_log_50',axis=alt.Axis(title="Number of Estimated Introductions", grid=False)),
    alt.Y2('lower_hpd_log_50' ), 
    alt.Color("deme",title = "Phylo Estimates", legend=alt.Legend(orient = "right",offset = -155, labelFontSize = 14, titleFontSize = 14, symbolSize = 110), scale=alt.Scale( range=range_) )
).properties(
    width=800,
    height=400
)

median = alt.Chart(south_mrb_df).mark_line(interpolate = "monotone").encode(
    alt.X('date:T',axis=alt.Axis(title=None, grid=False, format="%B %Y")),
    alt.Y('mean_percent',axis=alt.Axis(title="", grid=False)), 
    alt.Color("deme")
).properties(
    width = 800,
    height = 400
)

In [31]:
 south_mrb_plot

In [32]:
eii_df = pd.read_csv("estimated_importation_intensity_region.csv")

In [33]:
eii_df.columns

Index(['Unnamed: 0', 'index', 'place', 'year-month', 'import_risk'], dtype='object')

In [34]:
eii_df = eii_df[eii_df['year-month'] > "2022-04-01"]

In [35]:
eii_plot = alt.Chart(eii_df).mark_line().encode(
    alt.X("year-month:T",axis=alt.Axis(title=None, grid=False, format="%B %Y")),
    alt.Y("import_risk", title = "Number of Estimated Introductions",axis = alt.Axis(grid = False)), 
    alt.Color("place:N",title = "EII", legend=alt.Legend(orient = "right",offset = -155, labelFontSize = 14, titleFontSize = 14, symbolSize = 110),  scale=alt.Scale(range=range_) )).properties(
    width = 800,
    height = 400
)
eii_plot

In [36]:
(eii_plot + south_mrb_plot).resolve_scale(y = "shared", x = "shared", color= "independent").configure_axis(
    labelFontSize=20,
    titleFontSize=20
)

###  percent of new cases from intros


In [37]:
def generate_percent_intro_df(input_df):
    
    temp_df = pd.DataFrame()
    new_df = pd.DataFrame()
   
    for i in input_df.columns.tolist():
        
        interval = i.split(".")[0]
        deme = i.split(".")[2]

        try:
            temp_df["total."+ str(interval)] = seir_growth_rate["Ne." + str(deme) + ".diff." + str(interval)].astype("float") +  input_df[i].astype("float")

            new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0) 


        except KeyError:
            pass 
            
            
    return(new_df)

In [38]:
percent_df = generate_percent_intro_df(mr_b_df)

  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_

In [39]:
percent_df

Unnamed: 0,intro.percent.CentralEurope.0,intro.percent.CentralEurope.1,intro.percent.CentralEurope.2,intro.percent.CentralEurope.3,intro.percent.CentralEurope.4,intro.percent.CentralEurope.5,intro.percent.CentralEurope.6,intro.percent.CentralEurope.7,intro.percent.CentralEurope.8,intro.percent.CentralEurope.9,...,intro.percent.WesternEurope.27,intro.percent.WesternEurope.28,intro.percent.WesternEurope.29,intro.percent.WesternEurope.30,intro.percent.WesternEurope.31,intro.percent.WesternEurope.32,intro.percent.WesternEurope.33,intro.percent.WesternEurope.34,intro.percent.WesternEurope.35,intro.percent.WesternEurope.36
0,0.001744,0.002079,0.001764,0.001482,0.001411,0.001293,0.001142,0.001377,0.001194,0.000889,...,0.080868,0.087044,0.069500,0.052836,0.065987,0.061259,0.085676,0.085676,0.085676,0.085676
1,0.001787,0.002138,0.001803,0.001508,0.001437,0.001319,0.001168,0.001409,0.001218,0.000907,...,0.079214,0.085245,0.067952,0.051699,0.064628,0.060000,0.083308,0.083308,0.083308,0.083308
2,0.001185,0.001427,0.001183,0.000982,0.000933,0.000863,0.000776,0.000942,0.000803,0.000595,...,0.064242,0.067395,0.054733,0.041519,0.052569,0.049809,0.080715,0.080715,0.080715,0.080715
3,0.001544,0.001824,0.001554,0.001310,0.001252,0.001160,0.001040,0.001242,0.001079,0.000819,...,0.067655,0.071555,0.058541,0.045370,0.057923,0.054504,0.079329,0.079329,0.079329,0.079329
4,0.001078,0.001291,0.001081,0.000902,0.000859,0.000793,0.000710,0.000857,0.000737,0.000549,...,0.060458,0.064093,0.051887,0.039661,0.052223,0.048534,0.072877,0.072877,0.072877,0.072877
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29785,0.001306,0.001648,0.001310,0.001063,0.000994,0.000895,0.000776,0.000985,0.000817,0.000565,...,0.082791,0.089704,0.067327,0.047991,0.055691,0.051725,0.083483,0.083483,0.083483,0.083483
29786,0.001104,0.001374,0.001117,0.000915,0.000861,0.000774,0.000667,0.000833,0.000704,0.000497,...,0.072128,0.080991,0.061305,0.043909,0.054895,0.048512,0.066141,0.066141,0.066141,0.066141
29787,0.000790,0.001005,0.000794,0.000641,0.000599,0.000537,0.000461,0.000587,0.000487,0.000335,...,0.067519,0.076471,0.057126,0.040170,0.053384,0.045852,0.064573,0.064573,0.064573,0.064573
29788,0.001275,0.001570,0.001286,0.001058,0.000999,0.000905,0.000789,0.000976,0.000828,0.000593,...,0.074360,0.082171,0.063452,0.046363,0.059298,0.053060,0.074384,0.074384,0.074384,0.074384


In [40]:
# make a new dataframe that summarizes the 95% HPD estimate with mean for each deme and interval 
def generate_summary_df(input_df):
    
    
    new_df = pd.DataFrame()

    for i in input_df.columns.tolist():
        if "percent" in i:
            deme = i.split(".")[2]
            interval = i.split(".")[3]
            local_series = input_df[i].astype('float').to_numpy()
            mean_percent = local_series.mean()
            hpd_95 = az.hdi(local_series, 0.95)
            lower_hpd_log_95 = hpd_95[0]
            upper_hpd_log_95 = hpd_95[1]
            hpd_50 = az.hdi(local_series, 0.50)
            lower_hpd_log_50 = hpd_50[0]
            upper_hpd_log_50 = hpd_50[1]
            

            
            
            try:
                local_df = pd.DataFrame.from_dict({"deme":deme, "interval":interval, "mean_percent":mean_percent, 
                                                   "upper_hpd_log_95":upper_hpd_log_95,"lower_hpd_log_95":[lower_hpd_log_95], 
                                                   "upper_hpd_log_50":upper_hpd_log_50,"lower_hpd_log_50":lower_hpd_log_50})
                new_df = new_df.append(local_df)
            except:
                pass
            
    return(new_df)

In [41]:
final_north_df = generate_summary_df(percent_df)

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)


In [42]:
#making sure that any numbers >1 are excluded
percent_df =pd.DataFrame(np.where(percent_df <1, percent_df, 1), columns=percent_df.columns )
percent_df =pd.DataFrame(np.where(percent_df >0, percent_df, 0), columns=percent_df.columns )
percent_df

Unnamed: 0,intro.percent.CentralEurope.0,intro.percent.CentralEurope.1,intro.percent.CentralEurope.2,intro.percent.CentralEurope.3,intro.percent.CentralEurope.4,intro.percent.CentralEurope.5,intro.percent.CentralEurope.6,intro.percent.CentralEurope.7,intro.percent.CentralEurope.8,intro.percent.CentralEurope.9,...,intro.percent.WesternEurope.27,intro.percent.WesternEurope.28,intro.percent.WesternEurope.29,intro.percent.WesternEurope.30,intro.percent.WesternEurope.31,intro.percent.WesternEurope.32,intro.percent.WesternEurope.33,intro.percent.WesternEurope.34,intro.percent.WesternEurope.35,intro.percent.WesternEurope.36
0,0.001744,0.002079,0.001764,0.001482,0.001411,0.001293,0.001142,0.001377,0.001194,0.000889,...,0.080868,0.087044,0.069500,0.052836,0.065987,0.061259,0.085676,0.085676,0.085676,0.085676
1,0.001787,0.002138,0.001803,0.001508,0.001437,0.001319,0.001168,0.001409,0.001218,0.000907,...,0.079214,0.085245,0.067952,0.051699,0.064628,0.060000,0.083308,0.083308,0.083308,0.083308
2,0.001185,0.001427,0.001183,0.000982,0.000933,0.000863,0.000776,0.000942,0.000803,0.000595,...,0.064242,0.067395,0.054733,0.041519,0.052569,0.049809,0.080715,0.080715,0.080715,0.080715
3,0.001544,0.001824,0.001554,0.001310,0.001252,0.001160,0.001040,0.001242,0.001079,0.000819,...,0.067655,0.071555,0.058541,0.045370,0.057923,0.054504,0.079329,0.079329,0.079329,0.079329
4,0.001078,0.001291,0.001081,0.000902,0.000859,0.000793,0.000710,0.000857,0.000737,0.000549,...,0.060458,0.064093,0.051887,0.039661,0.052223,0.048534,0.072877,0.072877,0.072877,0.072877
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29785,0.001306,0.001648,0.001310,0.001063,0.000994,0.000895,0.000776,0.000985,0.000817,0.000565,...,0.082791,0.089704,0.067327,0.047991,0.055691,0.051725,0.083483,0.083483,0.083483,0.083483
29786,0.001104,0.001374,0.001117,0.000915,0.000861,0.000774,0.000667,0.000833,0.000704,0.000497,...,0.072128,0.080991,0.061305,0.043909,0.054895,0.048512,0.066141,0.066141,0.066141,0.066141
29787,0.000790,0.001005,0.000794,0.000641,0.000599,0.000537,0.000461,0.000587,0.000487,0.000335,...,0.067519,0.076471,0.057126,0.040170,0.053384,0.045852,0.064573,0.064573,0.064573,0.064573
29788,0.001275,0.001570,0.001286,0.001058,0.000999,0.000905,0.000789,0.000976,0.000828,0.000593,...,0.074360,0.082171,0.063452,0.046363,0.059298,0.053060,0.074384,0.074384,0.074384,0.074384


In [43]:
final_north_df['days'] = final_north_df.interval.astype(int) *7
final_north_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - final_north_df.days.map(timedelta)
#final_north_df = final_north_df[final_north_df.date >"2022-06-15"]
final_north_df.date = final_north_df.date.astype(str)

In [44]:
final_north_df[final_north_df.deme == "WesternEurope"]

Unnamed: 0,deme,interval,mean_percent,upper_hpd_log_95,lower_hpd_log_95,upper_hpd_log_50,lower_hpd_log_50,days,date
0,WesternEurope,0,0.026907,0.038257,0.017578,0.027783,0.021556,0,2023-01-03
0,WesternEurope,1,0.040012,0.055136,0.027107,0.041415,0.032689,7,2022-12-27
0,WesternEurope,2,0.045139,0.064331,0.029786,0.046637,0.036231,14,2022-12-20
0,WesternEurope,3,0.047974,0.070973,0.031296,0.04959,0.038084,21,2022-12-13
0,WesternEurope,4,0.046106,0.066991,0.029778,0.047403,0.03652,28,2022-12-06
0,WesternEurope,5,0.03463,0.050438,0.022232,0.035517,0.027371,35,2022-11-29
0,WesternEurope,6,0.035821,0.052626,0.022889,0.036555,0.028077,42,2022-11-22
0,WesternEurope,7,0.058461,0.086084,0.037451,0.059934,0.045828,49,2022-11-15
0,WesternEurope,8,0.064465,0.094571,0.040842,0.065612,0.050077,56,2022-11-08
0,WesternEurope,9,0.053769,0.079626,0.034107,0.054601,0.041604,63,2022-11-01


In [45]:
#final_north_df.to_csv("../data-files/north_percent_intro.csv")

In [46]:
line1 = alt.Chart(final_north_df).mark_area(interpolate='monotone', opacity = 0.9).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,  format="%B %Y")),
    alt.Y('lower_hpd_log_50',title = "Percent of new cases due to introductions", axis=alt.Axis(title="", grid=False, format='%')),
    alt.Y2('upper_hpd_log_50' ), 
    alt.Color("deme",title = "Region",  legend=alt.Legend(orient = "right",offset = -155, labelFontSize = 14, titleFontSize = 14, symbolSize = 110), scale=alt.Scale( range=range_))
).properties(
    width=1000,
    height=300
).transform_filter(
    (datum.lower_hpd_log_50 >0) & (datum.upper_hpd_log_50 < 1)
)

band1 = alt.Chart(final_north_df).mark_area(
    opacity=0.3, interpolate='monotone'
).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False)),
    alt.Y('lower_hpd_log_95',title = "Percent of new cases due to introductions", axis=alt.Axis( grid=False)),
    alt.Y2('upper_hpd_log_95'),
    alt.Color("deme")
).properties(
    width=800,
    height=400
).transform_filter(
    (datum.lower_hpd_log_95 >0) & (datum.upper_hpd_log_95 < 1)
)

(band1 + line1).configure_axis(
    labelFontSize=20,
    titleFontSize=20
)

## working on doing Rt calculations based on percent intro

In [47]:
one_line = alt.Chart(pd.DataFrame({'y': [1.0]})).mark_rule(strokeDash=[1,1]).encode(y='y').properties(
    width=850,
    height=300
)

In [48]:
# make a new dataframe that summarizes the 95% HPD estimate with mean for each deme and interval 
def generate_rt_summary_df(input_df):
    
    
    new_df = pd.DataFrame()
    count = 0
    for i in input_df.columns.tolist():
        #print(i)
        if "rt" in i:
            #if count %7 == 0:
            deme = i.split(".")[1]
            interval = i.split(".")[2]
            #print(deme, interval)
            local_series = input_df[i].astype('float').to_numpy()
            mean_percent = local_series.mean()
            hpd_95 = az.hdi(local_series, 0.95)
            lower_hpd_log_95 = hpd_95[0]
            upper_hpd_log_95 = hpd_95[1]
            hpd_50 = az.hdi(local_series, 0.50)
            lower_hpd_log_50 = hpd_50[0]
            upper_hpd_log_50 = hpd_50[1]




            try:
                local_df = pd.DataFrame.from_dict({"deme": deme, "interval":interval, "mean_percent":mean_percent, 
                                                   "upper_hpd_log_95":upper_hpd_log_95,"lower_hpd_log_95":[lower_hpd_log_95], 
                                                   "upper_hpd_log_50":upper_hpd_log_50,"lower_hpd_log_50":lower_hpd_log_50})
                new_df = new_df.append(local_df)
            except:
                pass
            #count+=1
            
    return(new_df)

In [49]:
def generate_local_rt(input_df):
    
    
    new_df = pd.DataFrame()
   
    for i in input_df.columns.tolist():
        if "Ne" in i:
            interval = i.split(".")[3]
            deme = i.split(".")[1]
            try:
                new_df["rt"+"."+ str(deme) + "." + str(interval)] = (input_df[i].astype("float") * (1- percent_df["intro.percent." +str(deme)+"."+str(interval)].astype("float")))/ uninfectious_rate
            except KeyError:
                pass

            
    return(new_df)

In [50]:
def generate_local_and_intro_rt(input_df):
    
    
    new_df = pd.DataFrame()
   
    for i in input_df.columns.tolist():
        if "Ne" in i:
            interval = i.split(".")[3]
            deme = i.split(".")[1]
            #print((input_df[i].astype("float") ))
            
            new_df["rt" + "." + str(deme)+ "." + str(interval)] = (input_df[i].astype("float") )/ uninfectious_rate
            

            
    return(new_df)

In [51]:
# def foo_generate_local_and_intro_rt(input_df, foo_ne_growth):
    
    
#     new_df = pd.DataFrame()
#     incubation_period = 8
#     #uninfectious_rate = 4.5
    
#     for i in input_df.columns.tolist():
#         if "Ne" in i:
#             interval = i.split(".")[3]
#             deme = i.split(".")[1]
#             #print((input_df[i].astype("float") ))
#             print( (foo_ne_growth[i].astype("float")) / 8)
#             print((input_df[i].astype("float")) / uninfectious_rate)
#             new_df["rt" + "." + str(deme)+ "." + str(interval)] = ((input_df[i].astype("float")) / uninfectious_rate) * ( (foo_ne_growth[i].astype("float")) / incubation_period)
            

            
#     return(new_df)

In [52]:
def foo_generate_local_and_intro_rt(input_df, foo_ne_growth):
    
    
    new_df = pd.DataFrame()
    incubation_period = 365/8
    uninfectious_rate = 365/4.5
    
    for i in input_df.columns.tolist():
        if "Ne" in i:
            interval = i.split(".")[3]
            deme = i.split(".")[1]
            #print((input_df[i].astype("float") ))
            print( (foo_ne_growth[i].astype("float")) / 8)
            print((input_df[i].astype("float")) / uninfectious_rate)
            new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
            

            
    return(new_df)

In [53]:
# def foo_generate_local_rt(input_df, foo_ne_growth):
    
    
#     new_df = pd.DataFrame()
#     incubation_period = 365/8

#     for i in input_df.columns.tolist():
#         if "Ne" in i:
#             interval = i.split(".")[3]
#             deme = i.split(".")[1]
#             try:
#                 new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((input_df[i].astype("float")) / uninfectious_rate) * ( (foo_ne_growth[i].astype("float")) / incubation_period) *  (1- percent_df["intro.percent." +str(deme)+"."+str(interval)].astype("float"))
                
#             except KeyError:
#                 pass

            
#     return(new_df)

In [54]:
def foo_generate_local_rt(input_df, foo_ne_growth):
    
    
    new_df = pd.DataFrame()
    incubation_period = 365/8

    for i in input_df.columns.tolist():
        if "Ne" in i:
            interval = i.split(".")[3]
            deme = i.split(".")[1]
            try:
                new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))) *  (1- percent_df["intro.percent." +str(deme)+"."+str(interval)].astype("float"))
                
            except KeyError:
                pass

            
    return(new_df)

In [55]:
rt_local_df = foo_generate_local_rt(seir_growth_rate, ne_diff_summary)
rt_local_and_intro_df = foo_generate_local_and_intro_rt(seir_growth_rate, ne_diff_summary)


  new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))) *  (1- percent_df["intro.percent." +str(deme)+"."+str(interval)].astype("float"))
  new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))) *  (1- percent_df["intro.percent." +str(deme)+"."+str(interval)].astype("float"))
  new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))) *  (1- percent_df["intro.percent." +str(deme)+"."+str(interval)].astype("float"))
  new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))) *  (1- percent_df["intro.percent." +str(de

0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
29785    0.0
29786    0.0
29787    0.0
29788    0.0
29789    0.0
Name: Ne.CentralEurope.diff.0, Length: 29790, dtype: float64
0        1.0
1        1.0
2        1.0
3        1.0
4        1.0
        ... 
29785    1.0
29786    1.0
29787    1.0
29788    1.0
29789    1.0
Name: Ne.CentralEurope.diff.0, Length: 29790, dtype: float64
0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
29785    0.0
29786    0.0
29787    0.0
29788    0.0
29789    0.0
Name: Ne.CentralEurope.diff.1, Length: 29790, dtype: float64
0        1.0
1        1.0
2        1.0
3        1.0
4        1.0
        ... 
29785    1.0
29786    1.0
29787    1.0
29788    1.0
29789    1.0
Name: Ne.CentralEurope.diff.1, Length: 29790, dtype: float64
0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
29785    0.0
29786    0.0
29787    0.0
29788    0.0
29789    0.0
Name: Ne.CentralEurope.diff.2, Length: 29

  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_r


0        1.0
1        1.0
2        1.0
3        1.0
4        1.0
        ... 
29785    1.0
29786    1.0
29787    1.0
29788    1.0
29789    1.0
Name: Ne.SouthAmerica.diff.34, Length: 29790, dtype: float64
0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
29785    0.0
29786    0.0
29787    0.0
29788    0.0
29789    0.0
Name: Ne.SouthAmerica.diff.35, Length: 29790, dtype: float64
0        1.0
1        1.0
2        1.0
3        1.0
4        1.0
        ... 
29785    1.0
29786    1.0
29787    1.0
29788    1.0
29789    1.0
Name: Ne.SouthAmerica.diff.35, Length: 29790, dtype: float64
0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
29785    0.0
29786    0.0
29787    0.0
29788    0.0
29789    0.0
Name: Ne.SouthAmerica.diff.36, Length: 29790, dtype: float64
0        1.0
1        1.0
2        1.0
3        1.0
4        1.0
        ... 
29785    1.0
29786    1.0
29787    1.0
29788    1.0
29789    1.0
Name: Ne.SouthAmerica.diff.36, Length: 2

  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))


In [56]:
rt_local_df

Unnamed: 0,rt.CentralEurope.0,rt.CentralEurope.1,rt.CentralEurope.2,rt.CentralEurope.3,rt.CentralEurope.4,rt.CentralEurope.5,rt.CentralEurope.6,rt.CentralEurope.7,rt.CentralEurope.8,rt.CentralEurope.9,...,rt.WesternEurope.27,rt.WesternEurope.28,rt.WesternEurope.29,rt.WesternEurope.30,rt.WesternEurope.31,rt.WesternEurope.32,rt.WesternEurope.33,rt.WesternEurope.34,rt.WesternEurope.35,rt.WesternEurope.36
0,0.998256,0.997921,0.998236,0.998518,0.998589,0.998707,0.998858,0.998623,0.998806,0.999111,...,1.213159,1.154201,1.228764,1.738330,2.300203,1.634003,0.914324,0.914324,0.914324,0.914324
1,0.998213,0.997862,0.998197,0.998492,0.998563,0.998681,0.998832,0.998591,0.998782,0.999093,...,1.215271,1.156416,1.230734,1.740207,2.303167,1.636013,0.916692,0.916692,0.916692,0.916692
2,0.998815,0.998573,0.998817,0.999018,0.999067,0.999137,0.999224,0.999058,0.999197,0.999405,...,1.228989,1.174053,1.242074,1.741577,2.301132,1.638696,0.919285,0.919285,0.919285,0.919285
3,0.998456,0.998176,0.998446,0.998690,0.998748,0.998840,0.998960,0.998758,0.998921,0.999181,...,1.207079,1.154605,1.219434,1.684903,2.197563,1.587409,0.920671,0.920671,0.920671,0.920671
4,0.998922,0.998709,0.998919,0.999098,0.999141,0.999207,0.999290,0.999143,0.999263,0.999451,...,1.231963,1.176583,1.243795,1.739257,2.291559,1.635942,0.927123,0.927123,0.927123,0.927123
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29785,0.998694,0.998352,0.998690,0.998937,0.999006,0.999105,0.999224,0.999015,0.999183,0.999435,...,1.279249,1.206459,1.301568,1.949432,2.701743,1.826888,0.916517,0.916517,0.916517,0.916517
29786,0.998896,0.998626,0.998883,0.999085,0.999139,0.999226,0.999333,0.999167,0.999296,0.999503,...,1.284071,1.209891,1.299781,1.928059,2.648435,1.807225,0.933859,0.933859,0.933859,0.933859
29787,0.999210,0.998995,0.999206,0.999359,0.999401,0.999463,0.999539,0.999413,0.999513,0.999665,...,1.313065,1.234097,1.328485,2.002574,2.777811,1.870434,0.935427,0.935427,0.935427,0.935427
29788,0.998725,0.998430,0.998714,0.998942,0.999001,0.999095,0.999211,0.999024,0.999172,0.999407,...,1.262219,1.193156,1.277783,1.867915,2.533530,1.750659,0.925616,0.925616,0.925616,0.925616


In [99]:
summary_rt_local_df = generate_rt_summary_df(rt_local_df)
summary_rt_local_and_intro_df = generate_rt_summary_df(rt_local_and_intro_df)


  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)


In [100]:
summary_rt_local_df['days'] = summary_rt_local_df.interval.astype(int) *7
summary_rt_local_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - summary_rt_local_df.days.map(timedelta)
summary_rt_local_df.date = summary_rt_local_df.date.astype(str)

summary_rt_local_and_intro_df['days'] = summary_rt_local_and_intro_df.interval.astype(int) *7 
summary_rt_local_and_intro_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - summary_rt_local_and_intro_df.days.map(timedelta)
summary_rt_local_and_intro_df.date = summary_rt_local_and_intro_df.date.astype(str)

In [101]:
summary_rt_local_and_intro_df[summary_rt_local_and_intro_df.deme == "NorthAmerica"]

Unnamed: 0,deme,interval,mean_percent,upper_hpd_log_95,lower_hpd_log_95,upper_hpd_log_50,lower_hpd_log_50,days,date
0,NorthAmerica,0,0.962632,0.968259,0.957337,0.964194,0.960459,0,2023-01-03
0,NorthAmerica,1,0.261938,0.343443,0.18613,0.283649,0.230194,7,2022-12-27
0,NorthAmerica,2,0.671045,0.715434,0.627085,0.683563,0.653414,14,2022-12-20
0,NorthAmerica,3,1.187683,1.215711,1.158202,1.199111,1.179423,21,2022-12-13
0,NorthAmerica,4,0.87005,0.889217,0.852108,0.875342,0.86266,28,2022-12-06
0,NorthAmerica,5,0.835741,0.859769,0.813298,0.842362,0.826485,35,2022-11-29
0,NorthAmerica,6,0.86321,0.883353,0.844362,0.868769,0.855445,42,2022-11-22
0,NorthAmerica,7,0.867642,0.887154,0.849381,0.873029,0.86012,49,2022-11-15
0,NorthAmerica,8,0.846293,0.868835,0.825223,0.852508,0.837607,56,2022-11-08
0,NorthAmerica,9,0.933403,0.943369,0.92404,0.936164,0.929556,63,2022-11-01


In [102]:
line2 = alt.Chart(summary_rt_local_and_intro_df, title = "Rt (Local + Intro Phylo)").mark_area(interpolate='monotone', opacity = 1 ,color = "#f58518").encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False,format="%B %Y")),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title=" Rt", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_50' ),
    alt.Color("deme",legend=alt.Legend(title= "Region",offset = -120, labelFontSize = 12, titleFontSize = 12))
).properties(
    width=850,
    height=300
).transform_filter(
   (datum.lower_hpd_log_50 >0) #& (datum.upper_hpd_log_50 < 4)
)
band2 = alt.Chart(summary_rt_local_and_intro_df).mark_area(
    opacity=0.3, interpolate='monotone', color = "#f58518"
).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False,format="%d %B %Y")),
    alt.Y('lower_hpd_log_95', axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_95'), 
    alt.Color("deme",  scale=alt.Scale(range=range_))
).properties(
    width=850,
    height=300
).transform_filter(
    (datum.lower_hpd_log_95 >0) #& (datum.upper_hpd_log_95 < 4)
)

phlyo_rt_plot = band2 + line2 +one_line
phlyo_rt_plot

In [103]:
line = alt.Chart(summary_rt_local_df, title = "Rt (local only phylo)").mark_area(interpolate='monotone', opacity = 1 ,color = "#f58518").encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False,format="%B %Y")),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title="local only Rt (phylo)", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_50' ),
    alt.Color("deme")
).properties(
    width=850,
    height=300
).transform_filter(
   (datum.lower_hpd_log_50 >0.4) #& (datum.upper_hpd_log_50 < 4)
)
band = alt.Chart(summary_rt_local_df).mark_area(
    opacity=0.3, interpolate='monotone', color = "#f58518"
).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('lower_hpd_log_95', axis=alt.Axis(title="local only Rt (phylo)", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_95'), 
    alt.Color("deme",  scale=alt.Scale( range=range_))
).properties(
    width=850,
    height=300
).transform_filter(
    (datum.lower_hpd_log_95 >0.4) #& (datum.upper_hpd_log_95 < 4)
)

local_phlyo_rt_plot = band + line +one_line
local_phlyo_rt_plot

In [104]:
(phlyo_rt_plot & local_phlyo_rt_plot).resolve_scale(y = "shared").configure_axis(
    labelFontSize=20,
    titleFontSize=20
)

In [105]:
case_rt = pd.read_csv("../case-rt-analysis/estimates/case-rt-estimates_region.tsv", sep="\t")

In [106]:
case_rt

Unnamed: 0,date,region,median_R,R_upper_95,R_lower_95
0,2022-05-07,Western Europe,15.350703,16.130447,14.625510
1,2022-05-08,Western Europe,14.309429,15.003319,13.648905
2,2022-05-09,Western Europe,13.353642,13.970683,12.751278
3,2022-05-10,Western Europe,12.476653,13.023672,11.925460
4,2022-05-11,Western Europe,11.672797,12.154329,11.164991
...,...,...,...,...,...
1217,2023-01-22,Central Europe,0.771716,0.771716,0.771716
1218,2023-01-23,Central Europe,0.771716,0.771716,0.771715
1219,2023-01-24,Central Europe,0.771715,0.771715,0.771715
1220,2023-01-25,Central Europe,0.771715,0.771715,0.771715


In [107]:
band3 = alt.Chart(case_rt).mark_area(interpolate='monotone', opacity = 0.5 ,color = "#f58518").encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False,format="%B %Y")),
    alt.Y('R_lower_80',axis=alt.Axis(title="Overall Rt (Cases Only)", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('R_upper_80' ),
    alt.Color("location",legend=alt.Legend(title= "Region",offset = -120, labelFontSize = 12, titleFontSize = 12),  scale=alt.Scale( range=range_))
).properties(
    width=850,
    height=300).transform_filter(
    (datum.R_upper_80 <7) #& (datum.upper_hpd_log_95 < 4)
)

line3 = alt.Chart(case_rt).mark_line(
    opacity=1, interpolate='monotone', color = "#f58518"
).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('median_R', axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))), 
    alt.Color("location")
).properties(
    width=850,
    height=300).transform_filter(
    (datum.median_R <7) #& (datum.upper_hpd_log_95 < 4)
)

case_rt_plot = band3 + line3 + one_line
case_rt_plot

ValueError: location encoding field is specified without a type; the type cannot be inferred because it does not match any column in the data.

alt.LayerChart(...)

In [66]:
(phlyo_rt_plot & case_rt_plot).resolve_scale(color='independent', x='shared', y = 'shared'
).configure_axis(
    labelFontSize=20,
    titleFontSize=20
)

In [67]:
case_rt

Unnamed: 0,date,location,median_R,R_upper_80,R_lower_80
0,2022-05-07,Western Europe,15.350703,15.970606,14.856801
1,2022-05-08,Western Europe,14.309429,14.859400,13.862485
2,2022-05-09,Western Europe,13.353642,13.841060,12.949375
3,2022-05-10,Western Europe,12.476653,12.906902,12.102924
4,2022-05-11,Western Europe,11.672797,12.049124,11.324210
...,...,...,...,...,...
1217,2023-01-22,Central Europe,0.771716,0.771716,0.771716
1218,2023-01-23,Central Europe,0.771716,0.771716,0.771715
1219,2023-01-24,Central Europe,0.771715,0.771715,0.771715
1220,2023-01-25,Central Europe,0.771715,0.771715,0.771715


In [68]:
vac_df =pd.read_csv("../data/us_mpox_vaccinaiton.csv")
vac_df.index = vac_df["Unnamed: 0"]
vac_df = vac_df.transpose()
vac_df = vac_df.drop(index = "Unnamed: 0").reset_index()
vac_df = vac_df.replace(',','', regex=True)
vac_df.columns.name = None
vac_df["First doses"] = pd.to_numeric(vac_df["First doses"])
vac_df = vac_df.rename(columns={"index": "date"} )
long_vac = pd.melt(vac_df, id_vars='date', value_vars=['First doses', 'Second doses'])

vac_plot = alt.Chart(long_vac).mark_bar(interpolate='monotone', opacity = 0.5 , width = 20).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,format="%B %Y")),
    alt.Y('value',axis=alt.Axis(title="Vaccinaiton Cases", grid=False)),
    alt.Color("variable",legend=alt.Legend(title= "Dosage",offset = -160, labelFontSize = 12, titleFontSize = 12), scale=alt.Scale(scheme='accent'))
         ).properties(
    width=850,
    height=300)

line2 = alt.Chart(summary_rt_local_and_intro_df).mark_area(interpolate='monotone', opacity = 1 ,color = "#2664A5").encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,format="%B %Y")),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title="Overall Rt", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_50' )).properties(
    width=850,
    height=300
).transform_filter(
   (datum.lower_hpd_log_50 >0) & (datum.deme == "NorthAmerica") #(datum.upper_hpd_log_50 < 4)
)
band2 = alt.Chart(summary_rt_local_and_intro_df).mark_area(
    opacity=0.3, interpolate='monotone',color = "#2664A5"
).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False)),
    alt.Y('lower_hpd_log_95', axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_95')
).properties(
    width=850,
    height=300
).transform_filter(
    (datum.lower_hpd_log_95 >0) & (datum.deme == "NorthAmerica") # (datum.upper_hpd_log_95 < 4)
)

phlyo_rt_plot_NA = band2 + line2 +one_line



band3 = alt.Chart(case_rt).mark_area(interpolate='monotone', opacity = 0.3 ,color = "#2664A5", strokeDash=[3,5]).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,format="%B %Y")),
    alt.Y('R_lower_80',axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('R_upper_80' ),
    alt.Color("location",legend=alt.Legend(title= "Region",offset = -160, labelFontSize = 12, titleFontSize = 12))
, alt.StrokeDash("location", legend=alt.Legend(title= "Region",offset = -140, labelFontSize = 12, titleFontSize = 12))).properties(
    width=850,
    height=300).transform_filter(
    (datum.location == "North America") # (datum.upper_hpd_log_95 < 4)
)

line3 = alt.Chart(case_rt).mark_line(
    opacity=1, interpolate='monotone', color = "#2664A5", strokeDash=[3,5]
).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False)),
    alt.Y('median_R', axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))), 
    alt.Color("location"),
).properties(
    width=850,
    height=300).transform_filter(
    (datum.location == "North America") # (datum.upper_hpd_log_95 < 4)
)

case_rt_plot_NA =  line3 + band3 + one_line



(vac_plot+ (case_rt_plot_NA + phlyo_rt_plot_NA).resolve_scale(y ="shared")).resolve_scale(y = "independent", color = "independent").configure_axis(
    labelFontSize=20,
    titleFontSize=20
)


## now we separate out Rt by contribution 

In [108]:
summary_rt_local_df['days'] = summary_rt_local_df.interval.astype(int) *7
summary_rt_local_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - summary_rt_local_df.days.map(timedelta)
#summary_rt_local_df = summary_rt_local_df[summary_rt_local_df.date >"2022-06-17"]
summary_rt_local_df.date = summary_rt_local_df.date.astype(str)

summary_rt_local_and_intro_df['days'] = summary_rt_local_and_intro_df.interval.astype(int) *7
summary_rt_local_and_intro_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - summary_rt_local_and_intro_df.days.map(timedelta)
#summary_rt_local_and_intro_df = summary_rt_local_and_intro_df[summary_rt_local_and_intro_df.date >"2022-06-17"]
summary_rt_local_and_intro_df.date = summary_rt_local_and_intro_df.date.astype(str)





In [109]:
summary_rt_local_df['Contribution'] = "Local"
summary_rt_local_and_intro_df['Contribution'] = "Local + Intoductions"
summary_rt_local_and_intro_df = summary_rt_local_and_intro_df[summary_rt_local_and_intro_df.deme == "NorthAmerica"]
summary_rt_local_df = summary_rt_local_df[summary_rt_local_df.deme == "NorthAmerica"]


In [110]:
summary_rt_local_df

Unnamed: 0,deme,interval,mean_percent,upper_hpd_log_95,lower_hpd_log_95,upper_hpd_log_50,lower_hpd_log_50,days,date,Contribution
0,NorthAmerica,0,0.923241,0.939636,0.907423,0.92877,0.917498,0,2023-01-03,Local
0,NorthAmerica,1,0.232507,0.312605,0.155915,0.252639,0.198361,7,2022-12-27,Local
0,NorthAmerica,2,0.630024,0.678171,0.583718,0.645822,0.612747,14,2022-12-20,Local
0,NorthAmerica,3,1.118051,1.151907,1.084327,1.129234,1.106718,21,2022-12-13,Local
0,NorthAmerica,4,0.819709,0.847001,0.791454,0.829457,0.80958,28,2022-12-06,Local
0,NorthAmerica,5,0.786979,0.818154,0.756104,0.798458,0.776322,35,2022-11-29,Local
0,NorthAmerica,6,0.814779,0.842784,0.786669,0.824424,0.804429,42,2022-11-22,Local
0,NorthAmerica,7,0.806451,0.838695,0.773046,0.818064,0.794514,49,2022-11-15,Local
0,NorthAmerica,8,0.78895,0.821875,0.755226,0.800723,0.776895,56,2022-11-08,Local
0,NorthAmerica,9,0.883594,0.906425,0.861012,0.892731,0.876865,63,2022-11-01,Local


In [111]:
combined_rt = pd.concat([ summary_rt_local_and_intro_df, summary_rt_local_df], ignore_index=True)


In [116]:
stream_rt = alt.Chart(combined_rt, title = "North American Rt by Contribution").mark_area(interpolate='monotone', opacity = 0.8 ,color = "#f58518", clip = True).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,format="%B %Y")),
    alt.Y('mean_percent',axis=alt.Axis(title="Estimated Rt", grid=False),stack = False, scale=alt.Scale(domain=(0, 4.5))),
    #alt.Y2('upper_hpd_log_50' ), 
    alt.Color('Contribution:N',legend=alt.Legend(title= "Contribution",offset = -130, labelFontSize = 12, titleFontSize = 12))#, scale=alt.Scale(domain = ['Local',  "lo"], range = ["#4c90c0",  "#df4327"]))
).properties(
    width=800,
    height=300
)

#.transform_filter(
#    (datum.mean_percent >0) & (datum.mean_percent < 2.5)
#)

In [117]:
(stream_rt + one_line).configure_axis(
    labelFontSize=20,
    titleFontSize=20
)


In [75]:
## Now we calculate the percent of RT that is influenced by introductions

In [76]:
rt_local_intro_northamerica_df = rt_local_and_intro_df.filter(regex='.NorthAmerica.')
rt_local_northamerica_df = rt_local_df.filter(regex='.NorthAmerica.')

In [77]:
rt_local_intro_northamerica_df

Unnamed: 0,rt.NorthAmerica.0,rt.NorthAmerica.1,rt.NorthAmerica.2,rt.NorthAmerica.3,rt.NorthAmerica.4,rt.NorthAmerica.5,rt.NorthAmerica.6,rt.NorthAmerica.7,rt.NorthAmerica.8,rt.NorthAmerica.9,...,rt.NorthAmerica.27,rt.NorthAmerica.28,rt.NorthAmerica.29,rt.NorthAmerica.30,rt.NorthAmerica.31,rt.NorthAmerica.32,rt.NorthAmerica.33,rt.NorthAmerica.34,rt.NorthAmerica.35,rt.NorthAmerica.36
0,0.966299,0.315587,0.700803,1.168405,0.882519,0.851361,0.876312,0.880335,0.860951,0.939894,...,1.055601,1.360840,3.650640,2.991708,1.0,1.0,1.0,1.0,1.0,1.0
1,0.966307,0.315706,0.700867,1.168364,0.882545,0.851394,0.876340,0.880361,0.860982,0.939908,...,1.055588,1.360751,3.649844,2.991130,1.0,1.0,1.0,1.0,1.0,1.0
2,0.966942,0.325589,0.706098,1.165055,0.884713,0.854114,0.878618,0.882568,0.863533,0.941033,...,1.054519,1.353420,3.584747,2.943876,1.0,1.0,1.0,1.0,1.0,1.0
3,0.968789,0.355015,0.721410,1.155454,0.891030,0.862046,0.885259,0.888999,0.870970,0.944308,...,1.051415,1.332206,3.398684,2.808530,1.0,1.0,1.0,1.0,1.0,1.0
4,0.967151,0.328872,0.707826,1.163965,0.885428,0.855011,0.879369,0.883296,0.864374,0.941404,...,1.054167,1.351008,3.563422,2.928385,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29785,0.959061,0.211236,0.642270,1.206529,0.857928,0.820569,0.850475,0.855305,0.832052,0.927085,...,1.067842,1.445920,4.435067,3.557596,1.0,1.0,1.0,1.0,1.0,1.0
29786,0.960096,0.225214,0.650512,1.201035,0.861430,0.824947,0.854153,0.858869,0.836163,0.928914,...,1.066086,1.433588,4.318163,3.473639,1.0,1.0,1.0,1.0,1.0,1.0
29787,0.957785,0.194434,0.632164,1.213324,0.853617,0.815183,0.845947,0.850917,0.826994,0.924830,...,1.070012,1.461203,4.581410,3.662530,1.0,1.0,1.0,1.0,1.0,1.0
29788,0.962044,0.252381,0.666143,1.190732,0.868035,0.833210,0.861091,0.865591,0.843920,0.932359,...,1.062784,1.410526,4.102407,3.318359,1.0,1.0,1.0,1.0,1.0,1.0


In [78]:
rt_diff_df = rt_local_northamerica_df.div(rt_local_intro_northamerica_df.values)

In [79]:
rt_df_summary = generate_rt_summary_df(rt_diff_df)

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

In [80]:
rt_df_summary['days'] = rt_df_summary.interval.astype(int) *7
rt_df_summary['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - rt_df_summary.days.map(timedelta)
rt_df_summary = rt_df_summary[rt_df_summary.date >"2022-06-17"]
rt_df_summary.date = rt_df_summary.date.astype(str)

In [81]:
line = alt.Chart(rt_df_summary, title = "North America" ).mark_area( interpolate='monotone', opacity = 1 ,color = "#f58518").encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False,format="%B %Y")),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title="Percent of Rt due to Introductions ", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_50' )
).properties(
    width=850,
    height=300
).transform_filter(
   (datum.lower_hpd_log_50 >0) #& (datum.upper_hpd_log_50 < 4)
)
band = alt.Chart(rt_df_summary).mark_area(
    opacity=0.3, interpolate='monotone', color = "#f58518"
).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('lower_hpd_log_95', axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_95')
).properties(
    width=850,
    height=300
).transform_filter(
    (datum.lower_hpd_log_95 >0) #& (datum.upper_hpd_log_95 < 4)
)

rt_diff_plot = line + band +one_line
(vac_plot + (rt_diff_plot)).resolve_scale(y="independent")