In [1]:
import baltic as bt
import pandas as pd
import numpy as np
import matplotlib as mpl
from matplotlib import pyplot as plt
from datetime import datetime as dt
from datetime import timedelta
import time
#import pymc3
import math
import arviz as az
import re
#from hpd import hpd
import scipy.stats as stats
from io import StringIO
import altair as alt
from altair import datum
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')


### Load Log files

In [2]:
from datetime import date
current_date = str(date.today())

In [3]:
log_file_path = "../../../mpox_rhino/550_glm_region_air_travel.log"


In [4]:
domain = ['CentralEurope', 'NorthAmerica', 'SouthAmerica' ,"SouthernEurope", "WesternEurope"]
range_ = ['#EEC060', '#2664A5', '#A76BB1', "#EEA160", "#356D4C"]

## we can then estimate the transmission rates via the change in Ne's over time plus the uninfectious rate

In [5]:
def read_in_Ne_changes_mascot(log_file_path):
    
    Ne_skyline_dict = {"sample":[]}
    
    with open(log_file_path, "r") as infile:
        line_number = 0
        for line in infile:
            line_number += 1
            if not line.startswith("#"):  # log combiner will sometimes put the entire xml at the start of the log file
                # use the first line to find the migration rate columns
            
            # use the first line to find the migration rate columns
                if "posterior" in line:
                    all_cols = line.split("\t")
                    Ne_column_indices = []   # list to store column indices
                    Nes_key = {}   # dictionary to store the column index to map to column name

                    for i in range(len(all_cols)):
                        col = all_cols[i]
                        if "Ne." in col:
                            Ne_column_indices.append(i)

                    # make an empty dictionary to store Nes and generate dictionary to convert index to name
                    for n in Ne_column_indices:
                        name = line.split("\t")[n]
                        deme = name.split(".")[1]# the syntax here is "NeLog.state01" where 0 is deme and 1 is interval 1
                        interval = name.split(".")[2]
                       
                        Nes_key[n] = name
                        Ne_skyline_dict[name] = []


                # read in actual parameter estimates and store in dictionary
                else:
                    sample = line.split("\t")[0]
                    Ne_skyline_dict["sample"].append(sample)

                    for index in Ne_column_indices:
                        name = Nes_key[index]
                        Ne_skyline_dict[name].append(line.split("\t")[index])
                    
                
    return(Ne_skyline_dict)

In [6]:
# make a new dataframe that summarizes the 95% HPD estimate with mean for each deme and interval 
def generate_summary_df(input_df):
    
    
    new_df = pd.DataFrame()
    
    for i in input_df.columns.tolist():
        if "Ne" in i:
            deme = i.split(".")[1]
            #print(deme)
            interval = i.split(".")[2]
            #print(interval)
            #print(i)
            next_interval = int(interval)+7
            local_series = input_df[i].astype('float').to_numpy()
            #print(local_series)
            mean_log = local_series.mean()
            mean_linear = 10**mean_log
            hpd_95 = az.hdi(local_series, 0.95)
            lower_hpd_log_95 = hpd_95[0]
            lower_hpd_linear_95 = 10**lower_hpd_log_95
            upper_hpd_log_95 = hpd_95[1]
            upper_hpd_linear_95 = 10**upper_hpd_log_95
            hpd_50 = az.hdi(local_series, 0.50)
            lower_hpd_log_50 = hpd_50[0]
            lower_hpd_linear_50 = 10**lower_hpd_log_50
            upper_hpd_log_50 = hpd_50[1]
            upper_hpd_linear_50 = 10**upper_hpd_log_50
            
            try:
                next_local_series = input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float').to_numpy()
                diff_series = np.subtract(local_series, next_local_series)
                #print(local_series)
                #print(next_local_series)
                #print(diff_series)
                diff_mean_log = diff_series.mean()
                diff_hpd_95 = az.hdi(diff_series, 0.95)
                diff_lower_hpd_log_95 = diff_hpd_95[0]
                diff_lower_hpd_linear_95 = math.exp(diff_lower_hpd_log_95)
                diff_upper_hpd_log_95 = diff_hpd_95[1]
                diff_upper_hpd_linear_95 = math.exp(diff_upper_hpd_log_95)
                diff_hpd_50 = az.hdi(diff_series, 0.50)
                diff_lower_hpd_log_50 = diff_hpd_50[0]
                diff_lower_hpd_linear_50 = math.exp(diff_lower_hpd_log_50)
                diff_upper_hpd_log_50 = diff_hpd_50[1]
                diff_upper_hpd_linear_50 = math.exp(diff_upper_hpd_log_50)
            except KeyError:
                pass   
            
            try:
                local_df = pd.DataFrame.from_dict({"deme":deme, "interval":interval, "mean_Ne_log":mean_log,"mean_Ne_linear":mean_linear, 
                                                   "upper_hpd_log_95":upper_hpd_log_95,"lower_hpd_log_95":[lower_hpd_log_95], 
                                                   "upper_hpd_log_50":upper_hpd_log_50,"lower_hpd_log_50":lower_hpd_log_50,
                                                   "upper_hpd_linear":upper_hpd_linear_95,"lower_hpd_linear":lower_hpd_linear_95,
                                                   "diff_mean_Ne_log":diff_mean_log, 
                                                   "diff_upper_hpd_log_95":diff_upper_hpd_log_95,"diff_lower_hpd_log_95":diff_lower_hpd_log_95, 
                                                   "diff_upper_hpd_log_50":diff_upper_hpd_log_50,"diff_lower_hpd_log_50":diff_lower_hpd_log_50,
                                                   "diff_upper_hpd_linear":diff_upper_hpd_linear_95,"diff_lower_hpd_linear":diff_lower_hpd_linear_95,
                                                   "diff_upper_hpd_linear_50":diff_upper_hpd_linear_50,"diff_lower_hpd_linear_50":diff_lower_hpd_linear_50})
                new_df = new_df.append(local_df)
                #print(new_df)
            except:
                pass
            
    return(new_df)

In [7]:
Ne_skyline = read_in_Ne_changes_mascot(log_file_path)

In [11]:
Ne_df = pd.DataFrame.from_dict(Ne_skyline)
print(len(Ne_df))
burnin_percent = 0.3

rows_to_remove = int(len(Ne_df)* burnin_percent)
Ne_df = Ne_df.iloc[rows_to_remove:]

print(len(Ne_df))
Ne_df = Ne_df.reset_index()
Ne_df

61542
43080


Unnamed: 0,index,sample,Ne.CentralEurope.0,Ne.CentralEurope.1,Ne.CentralEurope.2,Ne.CentralEurope.3,Ne.CentralEurope.4,Ne.CentralEurope.5,Ne.CentralEurope.6,Ne.CentralEurope.7,...,Ne.WesternEurope.29,Ne.WesternEurope.30,Ne.WesternEurope.31,Ne.WesternEurope.32,Ne.WesternEurope.33,Ne.WesternEurope.34,Ne.WesternEurope.35,Ne.WesternEurope.36,Ne.WesternEurope.37,Ne.WesternEurope.38
0,18462,18462000,0.2385159201117813,0.2385159201117813,0.2385159201117813,0.2385159201117813,0.2385159201117813,0.2385159201117813,0.2385159201117813,0.2385159201117813,...,4.95243174611514,4.331712291405307,3.2210377079627,1.5487019554259094,0.614010638209614,0.614010638209614,0.614010638209614,0.614010638209614,0.614010638209614,0.614010638209614
1,18463,18463000,0.2536308348760531,0.2536308348760531,0.2536308348760531,0.2536308348760531,0.2536308348760531,0.2536308348760531,0.2536308348760531,0.2536308348760531,...,4.821611769474034,4.233797030870421,3.175556146331607,1.5598031038865519,0.635330132537403,0.635330132537403,0.635330132537403,0.635330132537403,0.635330132537403,0.635330132537403
2,18464,18464000,0.2852248529714689,0.2852248529714689,0.2852248529714689,0.2852248529714689,0.2852248529714689,0.2852248529714689,0.2852248529714689,0.2852248529714689,...,4.8867760654862895,4.306867444556743,3.256819800262676,1.6322996399198586,0.6820119405081403,0.6820119405081403,0.6820119405081403,0.6820119405081403,0.6820119405081403,0.6820119405081403
3,18465,18465000,0.2607850235753611,0.2607850235753611,0.2607850235753611,0.2607850235753611,0.2607850235753611,0.2607850235753611,0.2607850235753611,0.2607850235753611,...,5.14943376817782,4.514211298043121,3.3735643963851905,1.6422038180788119,0.6613242574138305,0.6613242574138305,0.6613242574138305,0.6613242574138305,0.6613242574138305,0.6613242574138305
4,18466,18466000,0.278130739984564,0.278130739984564,0.278130739984564,0.278130739984564,0.278130739984564,0.278130739984564,0.278130739984564,0.278130739984564,...,4.4858083639699355,3.96702379676836,3.0226113958477314,1.543508256986103,0.6603289780962172,0.6603289780962172,0.6603289780962172,0.6603289780962172,0.6603289780962172,0.6603289780962172
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43075,61537,61537000,0.2817847164905922,0.2817847164905922,0.2817847164905922,0.2817847164905922,0.2817847164905922,0.2817847164905922,0.2817847164905922,0.2817847164905922,...,3.518476641037173,3.148172497027055,2.4615795917998486,1.3400406349814853,0.6215312792391129,0.6215312792391129,0.6215312792391129,0.6215312792391129,0.6215312792391129,0.6215312792391129
43076,61538,61538000,0.3144518092596648,0.3144518092596648,0.3144518092596648,0.3144518092596648,0.3144518092596648,0.3144518092596648,0.3144518092596648,0.3144518092596648,...,4.037557625428705,3.6081455401005034,2.8135071209416505,1.5212733399481835,0.6995709963376596,0.6995709963376596,0.6995709963376596,0.6995709963376596,0.6995709963376596,0.6995709963376596
43077,61539,61539000,0.31214563260265554,0.31214563260265554,0.31214563260265554,0.31214563260265554,0.31214563260265554,0.31214563260265554,0.31214563260265554,0.31214563260265554,...,4.050187526682634,3.6170432643181942,2.8163285766675696,1.5173109056992828,0.6945734196099098,0.6945734196099098,0.6945734196099098,0.6945734196099098,0.6945734196099098,0.6945734196099098
43078,61540,61540000,0.32727980956643576,0.32727980956643576,0.32727980956643576,0.32727980956643576,0.32727980956643576,0.32727980956643576,0.32727980956643576,0.32727980956643576,...,3.690645123124899,3.3147534395233813,2.613641408033353,1.452599500313919,0.6916017450681493,0.6916017450681493,0.6916017450681493,0.6916017450681493,0.6916017450681493,0.6916017450681493


## calculating transmission rate

In [12]:
def generate_summary_diff_df(input_df):
    
    
    new_df = pd.DataFrame()
   
    for i in input_df.columns.tolist():
        if "Ne" in i:
            deme = i.split(".")[1]
            interval = i.split(".")[2]
            next_interval = int(interval)+3 #averaging over three weeks to reduce noise
            local_series = input_df[i].astype('float').to_numpy()
           
            try:
                new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
            
            
            except KeyError:
                pass 
            
            
    return(new_df)

In [13]:
ne_diff_summary = generate_summary_diff_df(Ne_df)

  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(in

  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(in

  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(in

  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(in

  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))


In [14]:
ne_diff_summary

Unnamed: 0,Ne.CentralEurope.diff.0,Ne.CentralEurope.diff.1,Ne.CentralEurope.diff.2,Ne.CentralEurope.diff.3,Ne.CentralEurope.diff.4,Ne.CentralEurope.diff.5,Ne.CentralEurope.diff.6,Ne.CentralEurope.diff.7,Ne.CentralEurope.diff.8,Ne.CentralEurope.diff.9,...,Ne.WesternEurope.diff.26,Ne.WesternEurope.diff.27,Ne.WesternEurope.diff.28,Ne.WesternEurope.diff.29,Ne.WesternEurope.diff.30,Ne.WesternEurope.diff.31,Ne.WesternEurope.diff.32,Ne.WesternEurope.diff.33,Ne.WesternEurope.diff.34,Ne.WesternEurope.diff.35
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.251696,...,8.425708,9.763496,11.348887,20.149334,33.864236,28.729074,16.036109,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.186007,...,8.179901,9.478662,11.017802,19.561510,32.876302,27.890951,15.568282,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.124011,...,7.947919,9.209847,10.705337,19.006745,31.943928,27.099961,15.126765,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.213704,...,8.283543,9.598759,11.157400,19.809359,33.292852,28.244335,15.765536,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.066517,...,7.732778,8.960547,10.415556,18.492254,31.079243,26.366397,14.717301,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43075,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.869847,...,6.996853,8.107776,9.424312,16.732355,28.121447,23.857120,13.316662,0.0,0.0,0.0
43076,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.890697,...,7.074873,8.198183,9.529400,16.918932,28.435020,24.123143,13.465152,0.0,0.0,0.0
43077,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.901799,...,7.116414,8.246320,9.585353,17.018275,28.601982,24.264786,13.544215,0.0,0.0,0.0
43078,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.806155,...,6.758522,7.831604,9.103295,16.162407,27.163556,23.044483,12.863062,0.0,0.0,0.0


In [15]:
uninfectious_rate = 365/4.5

#taken from https://www.medrxiv.org/content/10.1101/2022.08.17.22278897v1.full.pdf


In [16]:
incubation_period = 365/8

In [17]:
seir_growth_rate = ((ne_diff_summary*2 + uninfectious_rate + incubation_period)**2 - (incubation_period- uninfectious_rate)**2)/(4*incubation_period)



seir_growth_rate



Unnamed: 0,Ne.CentralEurope.diff.0,Ne.CentralEurope.diff.1,Ne.CentralEurope.diff.2,Ne.CentralEurope.diff.3,Ne.CentralEurope.diff.4,Ne.CentralEurope.diff.5,Ne.CentralEurope.diff.6,Ne.CentralEurope.diff.7,Ne.CentralEurope.diff.8,Ne.CentralEurope.diff.9,...,Ne.WesternEurope.diff.26,Ne.WesternEurope.diff.27,Ne.WesternEurope.diff.28,Ne.WesternEurope.diff.29,Ne.WesternEurope.diff.30,Ne.WesternEurope.diff.31,Ne.WesternEurope.diff.32,Ne.WesternEurope.diff.33,Ne.WesternEurope.diff.34,Ne.WesternEurope.diff.35
0,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,74.967526,...,106.071856,110.321268,115.458752,145.980019,200.313481,179.004172,131.292173,81.111111,81.111111,81.111111
1,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,75.143608,...,105.299597,109.409935,114.376763,143.835548,196.124064,175.635949,129.668589,81.111111,81.111111,81.111111
2,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,75.309960,...,104.573199,108.553115,113.360031,141.825571,192.209496,172.485393,128.145113,81.111111,81.111111,81.111111
3,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,75.069342,...,105.624888,109.793754,114.832384,144.737891,197.885263,177.052364,130.351983,81.111111,81.111111,81.111111
4,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,75.464387,...,103.901644,107.761331,112.420937,139.973572,188.613177,169.588075,126.739875,81.111111,81.111111,81.111111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43075,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,75.993723,...,101.619822,105.073501,109.236445,133.726242,176.559190,159.855675,121.988601,81.111111,81.111111,81.111111
43076,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,75.937524,...,101.860606,105.356943,109.572010,134.382124,177.818930,160.874385,122.488236,81.111111,81.111111,81.111111
43077,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,75.907609,...,101.988920,105.508007,109.750879,134.731970,178.491435,161.418059,122.754660,81.111111,81.111111,81.111111
43078,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,76.165513,...,100.885937,104.209874,108.214371,131.732131,172.737681,156.762978,120.468323,81.111111,81.111111,81.111111


In [18]:
# make a new dataframe that summarizes the 95% HPD estimate with mean for each deme and interval 
def generate_summary_df(input_df):
    
    
    new_df = pd.DataFrame()

    for i in input_df.columns.tolist():
        if "Ne" in i:
            deme = i.split(".")[1]
            interval = i.split(".")[3]
            local_series = input_df[i].astype('float').to_numpy()
            mean_percent = local_series.mean()
            hpd_95 = az.hdi(local_series, 0.95)
            lower_hpd_log_95 = hpd_95[0]
            upper_hpd_log_95 = hpd_95[1]
            hpd_50 = az.hdi(local_series, 0.50)
            lower_hpd_log_50 = hpd_50[0]
            upper_hpd_log_50 = hpd_50[1]
            

            
            
            try:
                local_df = pd.DataFrame.from_dict({"deme": deme, "interval":interval, "mean_percent":mean_percent, 
                                                   "upper_hpd_log_95":upper_hpd_log_95,"lower_hpd_log_95":[lower_hpd_log_95], 
                                                   "upper_hpd_log_50":upper_hpd_log_50,"lower_hpd_log_50":lower_hpd_log_50})
                new_df = new_df.append(local_df)
            except:
                pass
            
    return(new_df)

In [19]:
#just checking to make sure the changes in Ne look okay
test_north = generate_summary_df(seir_growth_rate)

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)


In [20]:
test_north

Unnamed: 0,deme,interval,mean_percent,upper_hpd_log_95,lower_hpd_log_95,upper_hpd_log_50,lower_hpd_log_50
0,CentralEurope,0,81.111111,81.111111,81.111111,81.111111,81.111111
0,CentralEurope,1,81.111111,81.111111,81.111111,81.111111,81.111111
0,CentralEurope,2,81.111111,81.111111,81.111111,81.111111,81.111111
0,CentralEurope,3,81.111111,81.111111,81.111111,81.111111,81.111111
0,CentralEurope,4,81.111111,81.111111,81.111111,81.111111,81.111111
...,...,...,...,...,...,...,...
0,WesternEurope,31,163.931450,177.497989,150.027653,168.469891,158.928829
0,WesternEurope,32,123.971458,130.566796,117.138938,126.196439,121.533537
0,WesternEurope,33,81.111111,81.111111,81.111111,81.111111,81.111111
0,WesternEurope,34,81.111111,81.111111,81.111111,81.111111,81.111111


In [21]:
test_north['days'] = test_north.interval.astype(int) *7
test_north['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - test_north.days.map(timedelta)
test_north.date = test_north.date.astype(str)

In [22]:
line = alt.Chart(test_north).mark_area().encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title="transmission rate"), scale = alt.Scale(zero= False)),
    alt.Y2('upper_hpd_log_50'),
    color=alt.Color('deme:N')
).properties(
    width=850,
    height=300
)

band = alt.Chart(test_north).mark_area(
    opacity=0.3
).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('lower_hpd_log_95'),
    alt.Y2('upper_hpd_log_95'),
    color=alt.Color('deme:N')
).properties(
    width=850,
    height=300
)

band + line

## calculating backward migration rates

In [23]:
def read_in_forward_migration_rates_mascot(log_file_path):
    
    mig_rates_dict = {"sample":[]}
    
    with open(log_file_path, "r") as infile:
        line_number = 0
        for line in infile:
            #print(line_number)
            line_number += 1
            if not line.startswith("#"):  # log combiner will sometimes put the entire xml at the start of the log file
                # use the first line to find the migration rate columns
                
            # use the first line to find the migration rate columns
                if "posterior" in line:
                    all_cols = line.split("\t")
                    mig_column_indices = []   # list to store column indices
                    mig_key = {}   # dictionary to store the column index to map to column name

                    for i in range(len(all_cols)):
                        col = all_cols[i]
                        if "mig." in col:
                            mig_column_indices.append(i)

                    # make an empty dictionary to store Nes and generate dictionary to convert index to name
                    for n in mig_column_indices:
                        name = line.split("\t")[n]
                        deme = name.split(".")[1]# the syntax here is "NeLog.state01" where 0 is deme and 1 is interval 1
                        #interval = name.split(".")[2]
                       
                        mig_key[n] = name
                        mig_rates_dict[name] = []


                # read in actual parameter estimates and store in dictionary
                else:
                    sample = line.split("\t")[0]
                    mig_rates_dict["sample"].append(sample)

                    for index in mig_column_indices:
                        name = mig_key[index]
                        mig_rates_dict[name].append(line.split("\t")[index])
                    
                
                
                
    return(mig_rates_dict)

In [24]:
migration_rates_f = read_in_forward_migration_rates_mascot(log_file_path)

In [25]:
mig_df_f = pd.DataFrame.from_dict(migration_rates_f)


In [26]:
burnin_percent = 0.3
print(len(mig_df_f))
rows_to_remove = int(len(mig_df_f)* burnin_percent)
mig_df_f = mig_df_f.iloc[rows_to_remove:]

print(len(mig_df_f))
mig_df_f = mig_df_f.reset_index()
mig_df_f.head()

61542
43080


Unnamed: 0,index,sample,mig.CentralEurope_to_NorthAmerica,mig.CentralEurope_to_SouthAmerica,mig.CentralEurope_to_SouthernEurope,mig.CentralEurope_to_WesternEurope,mig.NorthAmerica_to_CentralEurope,mig.NorthAmerica_to_SouthAmerica,mig.NorthAmerica_to_SouthernEurope,mig.NorthAmerica_to_WesternEurope,...,mig.SouthAmerica_to_SouthernEurope,mig.SouthAmerica_to_WesternEurope,mig.SouthernEurope_to_CentralEurope,mig.SouthernEurope_to_NorthAmerica,mig.SouthernEurope_to_SouthAmerica,mig.SouthernEurope_to_WesternEurope,mig.WesternEurope_to_CentralEurope,mig.WesternEurope_to_NorthAmerica,mig.WesternEurope_to_SouthAmerica,mig.WesternEurope_to_SouthernEurope
0,18462,18462000,0.0222901560686067,0.0045465080032927,0.0707592581099474,0.1575272423530496,0.0248978579243426,0.4614327282141604,0.5015533409129413,0.9610430241669768,...,0.2585957845305963,0.1481498212454375,0.0687943839306149,0.4776559020530993,0.2435570841618468,1.7662968237294805,0.1562089371350409,0.9315766397721864,0.1542869083959815,1.7932891749923738
1,18463,18463000,0.0199767028954481,0.0040459406621737,0.0646554555177642,0.1460223713699703,0.022356453538769,0.4349656269064594,0.4734629119235158,0.9174833147485936,...,0.2414815754083346,0.1370279765464337,0.0628476697931263,0.4504167456357187,0.22717158657757,1.6935184819293412,0.1446567935568828,0.8890775359714894,0.1419256248399739,1.7148728686114427
2,18464,18464000,0.0112315150755312,0.0024249262491073,0.0741061108959602,0.1955738989905565,0.0125905995589218,0.4401691928005701,0.2945909556246779,0.5728158020141305,...,0.1652962276457295,0.0904942786446785,0.0720216006502653,0.2816333569783555,0.1556511305076182,2.597737312729631,0.195008778811396,0.5484246244756364,0.0938056025086783,2.6310786502804326
3,18465,18465000,0.0131934179408307,0.0022291136218711,0.0482473711025757,0.1176325687296244,0.0149441502171316,0.3959709123414355,0.4338651525761761,0.9132703319863134,...,0.2062585210640883,0.1104100809774047,0.0468400368632465,0.413718548718174,0.1929910030333565,1.7346334130328005,0.1174485353866602,0.8729159824794684,0.1150316603778439,1.7963599344104266
4,18466,18466000,0.0285096890809487,0.0060328237710955,0.0884546403518518,0.1929107819188352,0.0317731323199512,0.5573434870435923,0.6032993254569182,1.154261718822669,...,0.3144368520726209,0.1825535142032301,0.0860184566234697,0.5775982727673358,0.2971538044176213,2.018940535697048,0.1922093804633906,1.154975205194661,0.1901681720105456,2.0862931479529445


In [27]:
def calc_backwards_mig_rates(mig_df_f, targetName):    
    mig_rates = {}
    target = targetName
    target_columns = []
    ne_interval= 0
    mig_list = []

    for name in mig_df_f.columns:
        if target in name.split("_")[0]:
            target_columns.append(name)

    for name in Ne_df.columns:
        if target in name:
            ne_interval+=1

    for interval in range(0, ne_interval,1):
        mig_rates[str(interval)+ "." + "bmr" + "." +  str(target)] = []
        for index_1, row_1 in mig_df_f.iterrows():
            mig_list = []
            for direction in target_columns:
                other_loc = direction.split("_")[2]
                mig_rate_b = pd.to_numeric(row_1[direction]) * ((pd.to_numeric(Ne_df.loc[index_1,"Ne."+ str(target) + "." + str(interval)]))/(pd.to_numeric(Ne_df.loc[index_1, "Ne."+ str(other_loc) + "." + str(interval)])))
                #print(mig_rate_b)
                mig_list.append(mig_rate_b)
            combined_mig = np.sum(np.array(mig_list))
            #print(combined_mig)
            mig_rates[str(interval)+ "." + "bmr" + "." + str(target)].append(combined_mig)

    return(mig_rates) 


In [28]:
unique_column= []
unique_target = []
for name in mig_df_f.columns:
    try:

        if name.split("_")[0].split(".")[1] not in unique_target:
            unique_column.append(name)
            unique_target.append(name.split("_")[0].split(".")[1])
    except:
        pass



In [29]:
mig_rates = []
for target in unique_target:
    mig_rates.append(calc_backwards_mig_rates(mig_df_f, target))
    


In [30]:
mr_b_df = pd.DataFrame()
for x in mig_rates:
    x_df = pd.DataFrame(x)
    mr_b_df = pd.concat([mr_b_df, x_df], axis=1)


In [31]:
mr_b_df

Unnamed: 0,0.bmr.CentralEurope,1.bmr.CentralEurope,2.bmr.CentralEurope,3.bmr.CentralEurope,4.bmr.CentralEurope,5.bmr.CentralEurope,6.bmr.CentralEurope,7.bmr.CentralEurope,8.bmr.CentralEurope,9.bmr.CentralEurope,...,29.bmr.WesternEurope,30.bmr.WesternEurope,31.bmr.WesternEurope,32.bmr.WesternEurope,33.bmr.WesternEurope,34.bmr.WesternEurope,35.bmr.WesternEurope,36.bmr.WesternEurope,37.bmr.WesternEurope,38.bmr.WesternEurope
0,0.052092,0.067662,0.051625,0.040664,0.037950,0.034202,0.029884,0.038482,0.031155,0.021079,...,7.551574,7.790675,16.984195,9.433181,7.759385,7.759385,7.759385,7.759385,7.759385,7.759385
1,0.050256,0.064693,0.049827,0.039454,0.036911,0.033391,0.029307,0.037463,0.030502,0.020870,...,6.727425,6.968377,15.064672,8.541065,7.184985,7.184985,7.184985,7.184985,7.184985,7.184985
2,0.067946,0.087356,0.066836,0.052716,0.049286,0.044968,0.040108,0.051241,0.041393,0.028408,...,6.481428,6.542024,10.848764,7.007473,8.414786,8.414786,8.414786,8.414786,8.414786,8.414786
3,0.038890,0.050526,0.038346,0.030046,0.028081,0.025447,0.022458,0.028858,0.023264,0.015794,...,6.280087,6.532370,15.088074,8.428557,7.331105,7.331105,7.331105,7.331105,7.331105,7.331105
4,0.072739,0.091890,0.072364,0.058060,0.054584,0.049629,0.043759,0.055102,0.045511,0.031845,...,7.971666,8.297828,17.249738,10.130486,8.474423,8.474423,8.474423,8.474423,8.474423,8.474423
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43075,0.149325,0.180621,0.151120,0.125662,0.119352,0.108687,0.095173,0.115844,0.099696,0.073090,...,8.623220,8.939979,15.221481,9.683436,7.509877,7.509877,7.509877,7.509877,7.509877,7.509877
43076,0.073133,0.090394,0.072717,0.059101,0.055907,0.051368,0.045921,0.056756,0.047497,0.034211,...,7.276418,7.607585,15.183169,9.433115,8.498243,8.498243,8.498243,8.498243,8.498243,8.498243
43077,0.055060,0.068414,0.054540,0.044080,0.041649,0.038330,0.034414,0.042688,0.035491,0.025442,...,6.784451,7.086801,14.653258,9.061409,8.629656,8.629656,8.629656,8.629656,8.629656,8.629656
43078,0.046311,0.056985,0.045787,0.037165,0.035250,0.032652,0.029603,0.036329,0.030372,0.022119,...,5.335671,5.574675,11.158113,7.109456,6.952012,6.952012,6.952012,6.952012,6.952012,6.952012


In [32]:
# make a new dataframe that summarizes the 95% HPD estimate with mean for each deme and interval 
def generate_summary_df(input_df):
    
    count = 0
    new_df = pd.DataFrame()

    for i in input_df.columns.tolist():
        #if count %7 == 0:
        deme = i.split(".")[2]
        interval = i.split(".")[0]
        local_series = input_df[i].astype('float').to_numpy()
        mean_percent = local_series.mean()
        hpd_95 = az.hdi(local_series, 0.95)
        lower_hpd_log_95 = hpd_95[0]
        upper_hpd_log_95 = hpd_95[1]
        hpd_50 = az.hdi(local_series, 0.50)
        lower_hpd_log_50 = hpd_50[0]
        upper_hpd_log_50 = hpd_50[1]




        try:
            local_df = pd.DataFrame.from_dict({"deme": deme, "interval":interval, "mean_percent":mean_percent, 
                                               "upper_hpd_log_95":upper_hpd_log_95,"lower_hpd_log_95":[lower_hpd_log_95], 
                                               "upper_hpd_log_50":upper_hpd_log_50,"lower_hpd_log_50":lower_hpd_log_50})
            new_df = new_df.append(local_df)
        except:
            pass
        #count +=1  
    return(new_df)

In [33]:
south_mrb_df = generate_summary_df(mr_b_df)

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)


In [34]:
south_mrb_df[south_mrb_df.deme == "WesternEurope"]

Unnamed: 0,deme,interval,mean_percent,upper_hpd_log_95,lower_hpd_log_95,upper_hpd_log_50,lower_hpd_log_50
0,WesternEurope,0,2.07883,3.032126,1.316215,2.135975,1.6306
0,WesternEurope,1,1.480013,2.105688,0.940068,1.549136,1.18856
0,WesternEurope,2,2.564169,3.766761,1.645267,2.644482,2.019729
0,WesternEurope,3,3.804981,5.721096,2.361197,3.844842,2.895816
0,WesternEurope,4,3.796795,5.655568,2.378086,3.824818,2.891761
0,WesternEurope,5,3.511445,5.242097,2.200402,3.553428,2.6981
0,WesternEurope,6,2.848214,4.299223,1.777074,2.895152,2.19078
0,WesternEurope,7,2.504911,3.808843,1.57356,2.525048,1.904923
0,WesternEurope,8,3.461907,5.245768,2.160488,3.462302,2.611986
0,WesternEurope,9,4.305879,6.627021,2.679685,4.319975,3.243669


In [270]:
south_mrb_df['days'] = (south_mrb_df.interval.astype(int)-1.5)*7
south_mrb_df['date'] = dt.strptime("2023-01-06",  "%Y-%m-%d") - south_mrb_df.days.map(timedelta)
south_mrb_df.date = south_mrb_df.date.astype(str)
south_mrb_df.deme = south_mrb_df.deme.replace({"CentralEurope": "Central Europe", "NorthAmerica": "North America", "SouthAmerica":"South America", "SouthernEurope":"Southern Europe", "WesternEurope": "Western Europe" })



In [271]:
south_mrb_df = south_mrb_df[south_mrb_df.date > "2022-04-30"]

In [272]:
south_mrb_plot = alt.Chart(south_mrb_df).mark_area(interpolate='monotone', opacity = 0.5, color = "orange").encode(
    alt.X('date:T',axis=alt.Axis(title=None, grid=False, format="%B %Y")),
    alt.Y('upper_hpd_log_50',axis=alt.Axis(title="Number of Estimated Introductions", grid=False)),
    alt.Y2('lower_hpd_log_50' ), 
    alt.Color("deme",title = "Phylo Estimates", legend=alt.Legend(orient = "right",offset = -155, labelFontSize = 14, titleFontSize = 14, symbolSize = 110), scale=alt.Scale( range=range_) )
).properties(
    width=800,
    height=400
)

median = alt.Chart(south_mrb_df).mark_line(interpolate = "monotone").encode(
    alt.X('date:T',axis=alt.Axis(title=None, grid=False, format="%B %Y")),
    alt.Y('mean_percent',axis=alt.Axis(title="", grid=False)), 
    alt.Color("deme")
).properties(
    width = 800,
    height = 400
)

In [273]:
 south_mrb_plot

In [274]:
eii_df = pd.read_csv("estimated_importation_intensity_region.csv")

In [275]:
eii_df.columns

Index(['Unnamed: 0', 'index', 'place', 'year-month', 'import_risk'], dtype='object')

In [276]:
eii_df = eii_df[eii_df['year-month'] > "2022-05-01"]

In [277]:
eii_plot = alt.Chart(eii_df).mark_line().encode(
    alt.X("year-month:T",axis=alt.Axis(title=None, grid=False, format="%B %Y")),
    alt.Y("import_risk", title = "Number of Estimated Introductions",axis = alt.Axis(grid = False)), 
    alt.Color("place:N",title = "EII", legend=alt.Legend(orient = "right",offset = -155, labelFontSize = 14, titleFontSize = 14, symbolSize = 110),  scale=alt.Scale(range=range_) )).properties(
    width = 800,
    height = 400
)
eii_plot

In [278]:
intro_plot = (eii_plot + south_mrb_plot).resolve_scale(y = "shared", x = "shared", color= "independent")
(eii_plot + south_mrb_plot).resolve_scale(y = "shared", x = "shared", color= "independent").configure_axis(
    labelFontSize=20,
    titleFontSize=20
)

###  percent of new cases from intros


In [44]:
def generate_percent_intro_df(input_df):
    
    temp_df = pd.DataFrame()
    new_df = pd.DataFrame()
   
    for i in input_df.columns.tolist():
        
        interval = i.split(".")[0]
        deme = i.split(".")[2]

        try:
            temp_df["total."+ str(interval)] = seir_growth_rate["Ne." + str(deme) + ".diff." + str(interval)].astype("float") +  input_df[i].astype("float")

            new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0) 


        except KeyError:
            pass 
            
            
    return(new_df)

In [45]:
percent_df = generate_percent_intro_df(mr_b_df)

  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_

In [46]:
percent_df

Unnamed: 0,intro.percent.CentralEurope.0,intro.percent.CentralEurope.1,intro.percent.CentralEurope.2,intro.percent.CentralEurope.3,intro.percent.CentralEurope.4,intro.percent.CentralEurope.5,intro.percent.CentralEurope.6,intro.percent.CentralEurope.7,intro.percent.CentralEurope.8,intro.percent.CentralEurope.9,...,intro.percent.WesternEurope.26,intro.percent.WesternEurope.27,intro.percent.WesternEurope.28,intro.percent.WesternEurope.29,intro.percent.WesternEurope.30,intro.percent.WesternEurope.31,intro.percent.WesternEurope.32,intro.percent.WesternEurope.33,intro.percent.WesternEurope.34,intro.percent.WesternEurope.35
0,0.000642,0.000833,0.000636,0.000501,0.000468,0.000421,0.000368,0.000474,0.000384,0.000281,...,0.066782,0.076109,0.077103,0.049186,0.037436,0.086659,0.067033,0.087311,0.087311,0.087311
1,0.000619,0.000797,0.000614,0.000486,0.000455,0.000411,0.000361,0.000462,0.000376,0.000278,...,0.061237,0.068930,0.069250,0.044682,0.034311,0.078996,0.061798,0.081374,0.081374,0.081374
2,0.000837,0.001076,0.000823,0.000650,0.000607,0.000554,0.000494,0.000631,0.000510,0.000377,...,0.067541,0.069459,0.066293,0.043703,0.032916,0.059175,0.051849,0.093993,0.093993,0.093993
3,0.000479,0.000623,0.000473,0.000370,0.000346,0.000314,0.000277,0.000356,0.000287,0.000210,...,0.058233,0.064629,0.064348,0.041585,0.031956,0.078526,0.060733,0.082891,0.082891,0.082891
4,0.000896,0.001132,0.000891,0.000715,0.000672,0.000611,0.000539,0.000679,0.000561,0.000422,...,0.072932,0.081429,0.081425,0.053883,0.042140,0.092325,0.074015,0.094596,0.094596,0.094596
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43075,0.001838,0.002222,0.001860,0.001547,0.001469,0.001338,0.001172,0.001426,0.001228,0.000961,...,0.078637,0.089006,0.089712,0.060578,0.048194,0.086942,0.073542,0.084742,0.084742,0.084742
43076,0.000901,0.001113,0.000896,0.000728,0.000689,0.000633,0.000566,0.000699,0.000585,0.000450,...,0.070331,0.076030,0.074669,0.051366,0.041027,0.086240,0.071506,0.094837,0.094837,0.094837
43077,0.000678,0.000843,0.000672,0.000543,0.000513,0.000472,0.000424,0.000526,0.000437,0.000335,...,0.066786,0.071064,0.069192,0.047941,0.038188,0.083223,0.068743,0.096162,0.096162,0.096162
43078,0.000571,0.000702,0.000564,0.000458,0.000434,0.000402,0.000365,0.000448,0.000374,0.000290,...,0.054235,0.057065,0.055230,0.038927,0.031264,0.066449,0.055726,0.078944,0.078944,0.078944


In [47]:
# make a new dataframe that summarizes the 95% HPD estimate with mean for each deme and interval 
def generate_summary_df(input_df):
    
    
    new_df = pd.DataFrame()

    for i in input_df.columns.tolist():
        if "percent" in i:
            deme = i.split(".")[2]
            interval = i.split(".")[3]
            local_series = input_df[i].astype('float').to_numpy()
            mean_percent = local_series.mean()
            hpd_95 = az.hdi(local_series, 0.95)
            lower_hpd_log_95 = hpd_95[0]
            upper_hpd_log_95 = hpd_95[1]
            hpd_50 = az.hdi(local_series, 0.50)
            lower_hpd_log_50 = hpd_50[0]
            upper_hpd_log_50 = hpd_50[1]
            

            
            
            try:
                local_df = pd.DataFrame.from_dict({"deme":deme, "interval":interval, "mean_percent":mean_percent, 
                                                   "upper_hpd_log_95":upper_hpd_log_95,"lower_hpd_log_95":[lower_hpd_log_95], 
                                                   "upper_hpd_log_50":upper_hpd_log_50,"lower_hpd_log_50":lower_hpd_log_50})
                new_df = new_df.append(local_df)
            except:
                pass
            
    return(new_df)

In [48]:
final_north_df = generate_summary_df(percent_df)

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)


In [49]:
#making sure that any numbers >1 are excluded
percent_df =pd.DataFrame(np.where(percent_df <1, percent_df, 1), columns=percent_df.columns )
percent_df =pd.DataFrame(np.where(percent_df >0, percent_df, 0), columns=percent_df.columns )
percent_df

Unnamed: 0,intro.percent.CentralEurope.0,intro.percent.CentralEurope.1,intro.percent.CentralEurope.2,intro.percent.CentralEurope.3,intro.percent.CentralEurope.4,intro.percent.CentralEurope.5,intro.percent.CentralEurope.6,intro.percent.CentralEurope.7,intro.percent.CentralEurope.8,intro.percent.CentralEurope.9,...,intro.percent.WesternEurope.26,intro.percent.WesternEurope.27,intro.percent.WesternEurope.28,intro.percent.WesternEurope.29,intro.percent.WesternEurope.30,intro.percent.WesternEurope.31,intro.percent.WesternEurope.32,intro.percent.WesternEurope.33,intro.percent.WesternEurope.34,intro.percent.WesternEurope.35
0,0.000642,0.000833,0.000636,0.000501,0.000468,0.000421,0.000368,0.000474,0.000384,0.000281,...,0.066782,0.076109,0.077103,0.049186,0.037436,0.086659,0.067033,0.087311,0.087311,0.087311
1,0.000619,0.000797,0.000614,0.000486,0.000455,0.000411,0.000361,0.000462,0.000376,0.000278,...,0.061237,0.068930,0.069250,0.044682,0.034311,0.078996,0.061798,0.081374,0.081374,0.081374
2,0.000837,0.001076,0.000823,0.000650,0.000607,0.000554,0.000494,0.000631,0.000510,0.000377,...,0.067541,0.069459,0.066293,0.043703,0.032916,0.059175,0.051849,0.093993,0.093993,0.093993
3,0.000479,0.000623,0.000473,0.000370,0.000346,0.000314,0.000277,0.000356,0.000287,0.000210,...,0.058233,0.064629,0.064348,0.041585,0.031956,0.078526,0.060733,0.082891,0.082891,0.082891
4,0.000896,0.001132,0.000891,0.000715,0.000672,0.000611,0.000539,0.000679,0.000561,0.000422,...,0.072932,0.081429,0.081425,0.053883,0.042140,0.092325,0.074015,0.094596,0.094596,0.094596
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43075,0.001838,0.002222,0.001860,0.001547,0.001469,0.001338,0.001172,0.001426,0.001228,0.000961,...,0.078637,0.089006,0.089712,0.060578,0.048194,0.086942,0.073542,0.084742,0.084742,0.084742
43076,0.000901,0.001113,0.000896,0.000728,0.000689,0.000633,0.000566,0.000699,0.000585,0.000450,...,0.070331,0.076030,0.074669,0.051366,0.041027,0.086240,0.071506,0.094837,0.094837,0.094837
43077,0.000678,0.000843,0.000672,0.000543,0.000513,0.000472,0.000424,0.000526,0.000437,0.000335,...,0.066786,0.071064,0.069192,0.047941,0.038188,0.083223,0.068743,0.096162,0.096162,0.096162
43078,0.000571,0.000702,0.000564,0.000458,0.000434,0.000402,0.000365,0.000448,0.000374,0.000290,...,0.054235,0.057065,0.055230,0.038927,0.031264,0.066449,0.055726,0.078944,0.078944,0.078944


In [268]:
final_north_df['days'] = (final_north_df.interval.astype(int)-1.5) *7
final_north_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - final_north_df.days.map(timedelta)
#final_north_df = final_north_df[final_north_df.date >"2022-06-15"]
final_north_df.date = final_north_df.date.astype(str)
final_north_df.deme = final_north_df.deme.replace({"CentralEurope": "Central Europe", "NorthAmerica": "North America", "SouthAmerica":"South America", "SouthernEurope":"Southern Europe", "WesternEurope": "Western Europe" })


In [280]:
line1 = alt.Chart(final_north_df).mark_area(interpolate='monotone', opacity = 0.9).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,  format="%B %Y")),
    alt.Y('lower_hpd_log_50',title = "Percent of new cases due to introductions", axis=alt.Axis(title="", grid=False, format='%')),
    alt.Y2('upper_hpd_log_50' ), 
    alt.Color("deme",title = "Region",  legend=alt.Legend(orient = "right",offset = -155, labelFontSize = 16, titleFontSize = 14, symbolSize = 110), scale=alt.Scale( range=range_))
).properties(
    width=1000,
    height=300
).transform_filter(
    (datum.lower_hpd_log_50 >0) & (datum.upper_hpd_log_50 < 1)
)

band1 = alt.Chart(final_north_df).mark_area(
    opacity=0.3, interpolate='monotone'
).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False)),
    alt.Y('lower_hpd_log_95',title = "Percent of new cases due to introductions", axis=alt.Axis( grid=False)),
    alt.Y2('upper_hpd_log_95'),
    alt.Color("deme")
).properties(
    width=800,
    height=400
).transform_filter(
    (datum.lower_hpd_log_95 >0) & (datum.upper_hpd_log_95 < 1)
)
percent_plot= band1 + line1
(percent_plot).configure_axis(
    labelFontSize=20,
    titleFontSize=20
)

In [281]:
#creating a joint figure for manuscript!
# Add labels to each chart
intro_plot = intro_plot.properties(title='A')
percent_plot = percent_plot.properties(title='B')

combined_plot = alt.vconcat(intro_plot, percent_plot,  spacing=50).resolve_scale(x='shared', color ='independent')


combined_plot.configure_view(
    strokeWidth=0
).configure_title(
    anchor='start', fontSize= 35
).configure_axis(
    labelFontSize=22,
    titleFontSize=22
).configure_legend(
     labelFontSize=22,
    titleFontSize=22
)

## working on doing Rt calculations based on percent intro

In [53]:
# a line to show an Rt of 1
one_line = alt.Chart(pd.DataFrame({'y': [1.0]})).mark_rule(strokeDash=[1,1]).encode(y='y').properties(
    width=850,
    height=300
)

In [54]:
# make a new dataframe that summarizes the 95% HPD estimate with mean for each deme and interval 
def generate_rt_summary_df(input_df):
    
    
    new_df = pd.DataFrame()
    count = 0
    for i in input_df.columns.tolist():
        #print(i)
        if "rt" in i:
            if count %2 == 0:
                deme = i.split(".")[1]
                interval = i.split(".")[2]
                #print(deme, interval)
                local_series = input_df[i].astype('float').to_numpy()
                mean_percent = local_series.mean()
                hpd_95 = az.hdi(local_series, 0.95)
                lower_hpd_log_95 = hpd_95[0]
                upper_hpd_log_95 = hpd_95[1]
                hpd_50 = az.hdi(local_series, 0.50)
                lower_hpd_log_50 = hpd_50[0]
                upper_hpd_log_50 = hpd_50[1]




                try:
                    local_df = pd.DataFrame.from_dict({"deme": deme, "interval":interval, "mean_percent":mean_percent, 
                                                       "upper_hpd_log_95":upper_hpd_log_95,"lower_hpd_log_95":[lower_hpd_log_95], 
                                                       "upper_hpd_log_50":upper_hpd_log_50,"lower_hpd_log_50":lower_hpd_log_50})
                    new_df = new_df.append(local_df)
                except:
                    pass
            count+=1

    return(new_df)

In [55]:
def foo_generate_local_and_intro_rt(input_df, foo_ne_growth):
    
    
    new_df = pd.DataFrame()
    incubation_period = 365/8
    uninfectious_rate = 365/4.5
    
    for i in input_df.columns.tolist():
        if "Ne" in i:
            interval = i.split(".")[3]
            deme = i.split(".")[1]
            #print((input_df[i].astype("float") ))
            print( (foo_ne_growth[i].astype("float")) / 8)
            print((input_df[i].astype("float")) / uninfectious_rate)
            new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
            

            
    return(new_df)

In [56]:
def foo_generate_local_rt(input_df, foo_ne_growth):
    
    
    new_df = pd.DataFrame()
    incubation_period = 365/8

    for i in input_df.columns.tolist():
        if "Ne" in i:
            interval = i.split(".")[3]
            deme = i.split(".")[1]
            try:
                new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))) *  (1- percent_df["intro.percent." +str(deme)+"."+str(interval)].astype("float"))
                
            except KeyError:
                pass

            
    return(new_df)

In [57]:
rt_local_df = foo_generate_local_rt(seir_growth_rate, ne_diff_summary)
rt_local_and_intro_df = foo_generate_local_and_intro_rt(seir_growth_rate, ne_diff_summary)


  new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))) *  (1- percent_df["intro.percent." +str(deme)+"."+str(interval)].astype("float"))
  new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))) *  (1- percent_df["intro.percent." +str(deme)+"."+str(interval)].astype("float"))
  new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))) *  (1- percent_df["intro.percent." +str(deme)+"."+str(interval)].astype("float"))
  new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))) *  (1- percent_df["intro.percent." +str(de

0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
43075    0.0
43076    0.0
43077    0.0
43078    0.0
43079    0.0
Name: Ne.CentralEurope.diff.0, Length: 43080, dtype: float64
0        1.0
1        1.0
2        1.0
3        1.0
4        1.0
        ... 
43075    1.0
43076    1.0
43077    1.0
43078    1.0
43079    1.0
Name: Ne.CentralEurope.diff.0, Length: 43080, dtype: float64
0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
43075    0.0
43076    0.0
43077    0.0
43078    0.0
43079    0.0
Name: Ne.CentralEurope.diff.1, Length: 43080, dtype: float64
0        1.0
1        1.0
2        1.0
3        1.0
4        1.0
        ... 
43075    1.0
43076    1.0
43077    1.0
43078    1.0
43079    1.0
Name: Ne.CentralEurope.diff.1, Length: 43080, dtype: float64
0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
43075    0.0
43076    0.0
43077    0.0
43078    0.0
43079    0.0
Name: Ne.CentralEurope.diff.2, Length: 43

Name: Ne.SouthAmerica.diff.1, Length: 43080, dtype: float64
0        0.921684
1        0.923927
2        0.926046
3        0.922981
4        0.928014
           ...   
43075    0.934758
43076    0.934042
43077    0.933661
43078    0.936947
43079    0.935146
Name: Ne.SouthAmerica.diff.1, Length: 43080, dtype: float64
0       -0.843466
1       -0.818859
2       -0.795636
3       -0.829234
4       -0.774099
           ...   
43075   -0.700429
43076   -0.708239
43077   -0.712397
43078   -0.676570
43079   -0.696197
Name: Ne.SouthAmerica.diff.2, Length: 43080, dtype: float64
0        0.781217
1        0.787251
2        0.792965
3        0.784704
4        0.798281
           ...   
43075    0.816586
43076    0.814637
43077    0.813600
43078    0.822555
43079    0.817643
Name: Ne.SouthAmerica.diff.2, Length: 43080, dtype: float64
0       -0.437792
1       -0.425020
2       -0.412967
3       -0.430405
4       -0.401788
           ...   
43075   -0.363550
43076   -0.367604
43077   -0.369763
4307

  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_r

0        0.696030
1        0.675725
2        0.656561
3        0.684286
4        0.638789
           ...   
43075    0.577995
43076    0.584440
43077    0.587872
43078    0.558307
43079    0.574504
Name: Ne.SouthernEurope.diff.23, Length: 43080, dtype: float64
0        1.199071
1        1.193027
2        1.187335
3        1.195574
4        1.182067
           ...   
43075    1.164132
43076    1.166028
43077    1.167038
43078    1.158352
43079    1.163106
Name: Ne.SouthernEurope.diff.23, Length: 43080, dtype: float64
0        1.015540
1        0.985913
2        0.957953
3        0.998405
4        0.932022
           ...   
43075    0.843322
43076    0.852725
43077    0.857732
43078    0.814596
43079    0.838227
Name: Ne.SouthernEurope.diff.24, Length: 43080, dtype: float64
0        1.296066
1        1.286923
2        1.278323
3        1.290774
4        1.270371
           ...   
43075    1.243346
43076    1.246199
43077    1.247718
43078    1.234653
43079    1.241803
Name: Ne.SouthernEu

  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_r

In [235]:
summary_rt_local_df = generate_rt_summary_df(rt_local_df)
summary_rt_local_and_intro_df = generate_rt_summary_df(rt_local_and_intro_df)


  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)


In [236]:
summary_rt_local_df

Unnamed: 0,deme,interval,mean_percent,upper_hpd_log_95,lower_hpd_log_95,upper_hpd_log_50,lower_hpd_log_50
0,CentralEurope,0,0.998629,0.999546,0.997404,0.999237,0.998591
0,CentralEurope,2,0.998620,0.999561,0.997381,0.999240,0.998583
0,CentralEurope,4,0.998923,0.999680,0.997925,0.999437,0.998908
0,CentralEurope,6,0.999139,0.999748,0.998344,0.999562,0.999143
0,CentralEurope,8,0.999098,0.999733,0.998256,0.999544,0.999103
...,...,...,...,...,...,...,...
0,WesternEurope,26,1.180152,1.224214,1.136012,1.197281,1.167260
0,WesternEurope,28,1.262931,1.315955,1.209719,1.282780,1.246260
0,WesternEurope,30,2.153474,2.355090,1.948640,2.226387,2.085506
0,WesternEurope,32,1.437771,1.515416,1.360962,1.464238,1.411605


In [237]:
summary_rt_local_df['days'] = (summary_rt_local_df.interval.astype(int) -1.5) *7
summary_rt_local_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - summary_rt_local_df.days.map(timedelta)
summary_rt_local_df.date = summary_rt_local_df.date.astype(str)
summary_rt_local_df = summary_rt_local_df[summary_rt_local_df.date >"2022-05-30"]

summary_rt_local_and_intro_df['days'] = (summary_rt_local_and_intro_df.interval.astype(int) -1.5) *7 
summary_rt_local_and_intro_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - summary_rt_local_and_intro_df.days.map(timedelta)
summary_rt_local_and_intro_df.date = summary_rt_local_and_intro_df.date.astype(str)
summary_rt_local_and_intro_df = summary_rt_local_and_intro_df[summary_rt_local_and_intro_df.date >"2022-05-30"]

In [238]:
summary_rt_local_and_intro_df.deme = summary_rt_local_and_intro_df.deme.replace({"CentralEurope": "Central Europe", "NorthAmerica": "North America", "SouthAmerica":"South America", "SouthernEurope":"Southern Europe", "WesternEurope": "Western Europe" })
summary_rt_local_df.deme = summary_rt_local_df.deme.replace({"CentralEurope": "Central Europe", "NorthAmerica": "North America", "SouthAmerica":"South America", "SouthernEurope":"Southern Europe", "WesternEurope": "Western Europe" })

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_rt_local_df.deme = summary_rt_local_df.deme.replace({"CentralEurope": "Central Europe", "NorthAmerica": "North America", "SouthAmerica":"South America", "SouthernEurope":"Southern Europe", "WesternEurope": "Western Europe" })


In [239]:
summary_rt_local_and_intro_df = summary_rt_local_and_intro_df[summary_rt_local_and_intro_df.date >"2022-05-30"]

In [240]:
line2 = alt.Chart(summary_rt_local_and_intro_df, ).mark_area(interpolate='monotone', opacity = 1 ,color = "#f58518").encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,format="%B %Y")),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title="Phylo-estimated Rt", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_50' ),
    alt.Color("deme",legend=alt.Legend(title= "Region",offset = -170, labelFontSize = 20, titleFontSize = 20, symbolSize = 150))
).properties(
    width=850,
    height=300
).transform_filter(
   (datum.lower_hpd_log_50 >0) #& (datum.upper_hpd_log_50 < 4)
)
band2 = alt.Chart(summary_rt_local_and_intro_df).mark_area(
    opacity=0.3, interpolate='monotone', color = "#f58518"
).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,format="%B %Y")),
    alt.Y('lower_hpd_log_95', axis=alt.Axis(title="Phylo-estimated Rt", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_95'), 
    alt.Color("deme",  scale=alt.Scale(range=range_))
).properties(
    width=850,
    height=300
).transform_filter(
    (datum.lower_hpd_log_95 >0) #& (datum.upper_hpd_log_95 < 4)
)

phlyo_rt_plot = band2 + line2 +one_line
phlyo_rt_plot

In [241]:
line = alt.Chart(summary_rt_local_df, title = "Rt (local only phylo)").mark_area(interpolate='monotone', opacity = 1 ,color = "#f58518").encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False,format="%B %Y")),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title="local only Rt (phylo)", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_50' ),
    alt.Color("deme")
).properties(
    width=850,
    height=300
).transform_filter(
   (datum.lower_hpd_log_50 >0.4) #& (datum.upper_hpd_log_50 < 4)
)
band = alt.Chart(summary_rt_local_df).mark_area(
    opacity=0.3, interpolate='monotone', color = "#f58518"
).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('lower_hpd_log_95', axis=alt.Axis(title="local only Rt (phylo)", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_95'), 
    alt.Color("deme",  scale=alt.Scale( range=range_))
).properties(
    width=850,
    height=300
).transform_filter(
    (datum.lower_hpd_log_95 >0.4) #& (datum.upper_hpd_log_95 < 4)
)

local_phlyo_rt_plot = band + line +one_line
local_phlyo_rt_plot

In [242]:


(phlyo_rt_plot & local_phlyo_rt_plot).resolve_scale(y = "shared", color = "shared").configure_axis(
    labelFontSize=20,
    titleFontSize=20
)

In [159]:
case_rt = pd.read_csv("../case-rt-analysis/estimates/case-rt-estimates_region.tsv", sep="\t")

In [160]:
case_rt.date = case_rt.date.apply(pd.to_datetime)

In [161]:
case_rt = case_rt[case_rt.date > "2022-05-30"]

In [162]:
band3 = alt.Chart(case_rt).mark_area(interpolate='monotone', opacity = 0.5 ,color = "#f58518").encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,format="%B %Y")),
    alt.Y('R_lower_95',axis=alt.Axis(title="Case Based Rt", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('R_upper_95' ),
    alt.Color("region",legend=None,  scale=alt.Scale( range=range_))
).properties(
    width=850,
    height=300).transform_filter(
    (datum.R_upper_95 <7) 
)

line3 = alt.Chart(case_rt).mark_line(
    opacity=1, interpolate='monotone', color = "#f58518"
).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False)),
    alt.Y('median_R', axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))), 
    alt.Color("region")
).properties(
    width=850,
    height=300).transform_filter(
    (datum.median_R <5) 
)

case_rt_plot = band3 + line3 + one_line
case_rt_plot

In [163]:
#creating a joint figure for manuscript!
# Add labels to each chart
phlyo_rt_plot = phlyo_rt_plot.properties(title='A')
case_rt_plot = case_rt_plot.properties(title='B')

combined_plot = alt.vconcat(phlyo_rt_plot, case_rt_plot,  spacing=50).resolve_scale(x='shared', y = "shared", color ='independent')


combined_plot.configure_view(
    strokeWidth=0
).configure_title(
    anchor='start', fontSize= 35
).configure_axis(
    labelFontSize=22,
    titleFontSize=22
).configure_legend(
     labelFontSize=28,
    titleFontSize=28
)





In [243]:
vac_df =pd.read_csv("../data/us_mpox_vaccinaiton.csv")
vac_df.index = vac_df["Unnamed: 0"]
vac_df = vac_df.transpose()
vac_df = vac_df.drop(index = "Unnamed: 0").reset_index()
vac_df = vac_df.replace(',','', regex=True)
vac_df.columns.name = None
vac_df["First doses"] = pd.to_numeric(vac_df["First doses"])
vac_df = vac_df.rename(columns={"index": "date"} )
long_vac = pd.melt(vac_df, id_vars='date', value_vars=['First doses', 'Second doses'])
long_vac.date = long_vac.date.apply(pd.to_datetime)
long_vac = long_vac[long_vac.date >"2022-05-30"]


vac_plot = alt.Chart(long_vac).mark_bar(interpolate='monotone', opacity = 0.5 , width = 20).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,format="%B")),
    alt.Y('value',axis=alt.Axis(title="Vaccination Cases", grid=False)),
    alt.Color("variable",legend=alt.Legend(title= "Dosage",offset = -220, labelFontSize = 20, titleFontSize = 20), scale=alt.Scale(scheme='accent'))
         ).properties(
    width=850,
    height=300)

summary_rt_local_and_intro_df = summary_rt_local_and_intro_df[summary_rt_local_and_intro_df.date >"2022-05-30"]
case_rt = case_rt[case_rt.date >"2022-05-30"]

line2 = alt.Chart(summary_rt_local_and_intro_df).mark_area(interpolate='monotone', opacity = 1 ,color = "#2664A5").encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,format="%B")),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title="Rt estimate", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_50' )).properties(
    width=850,
    height=300
).transform_filter(
   (datum.lower_hpd_log_50 >0) & (datum.deme == "North America") #(datum.upper_hpd_log_50 < 4)
)
band2 = alt.Chart(summary_rt_local_and_intro_df).mark_area(
    opacity=0.3, interpolate='monotone',color = "#2664A5"
).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False)),
    alt.Y('lower_hpd_log_95', axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_95')
).properties(
    width=850,
    height=300
).transform_filter(
    (datum.lower_hpd_log_95 >0) & (datum.deme == "North America") # (datum.upper_hpd_log_95 < 4)
)

phlyo_rt_plot_NA = band2 + line2 +one_line



band3 = alt.Chart(case_rt).mark_area(interpolate='monotone', opacity = 0.3 ,color = "#2664A5", strokeDash=[3,5]).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,format="%B")),
    alt.Y('R_lower_95',axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('R_upper_95' ),
    alt.Color("region",legend=alt.Legend(title= "Region",offset = -220, labelFontSize = 20, titleFontSize = 20))
, alt.StrokeDash("region", legend=alt.Legend(title= "Region",offset = -220, labelFontSize = 20, titleFontSize = 20))).properties(
    width=850,
    height=300).transform_filter(
    (datum.region == "North America") # (datum.upper_hpd_log_95 < 4)
)

line3 = alt.Chart(case_rt).mark_line(
    opacity=1, interpolate='monotone', color = "#2664A5", strokeDash=[3,5]
).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False)),
    alt.Y('median_R', axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))), 
    alt.Color("region"),
    
).properties(
    width=850,
    height=300).transform_filter(
    (datum.region == "North America") 
)

case_rt_plot_NA =   band3 + line3+  one_line


vac_comb = (vac_plot+ (case_rt_plot_NA + phlyo_rt_plot_NA).resolve_scale(y ="shared")).resolve_scale(y = "independent", color = "independent", strokeDash = "independent")
(vac_plot+ (case_rt_plot_NA + phlyo_rt_plot_NA).resolve_scale(y ="shared")).resolve_scale(y = "independent", color = "independent", strokeDash = "independent").configure_axis(
    labelFontSize=20,
    titleFontSize=20
)


In [208]:
case_rt

Unnamed: 0,date,region,median_R,R_upper_95,R_lower_95
24,2022-05-31,Western Europe,3.384671,3.702972,3.059855
25,2022-06-01,Western Europe,3.303538,3.598887,2.994588
26,2022-06-02,Western Europe,3.219603,3.498808,2.928121
27,2022-06-03,Western Europe,3.136115,3.397408,2.857217
28,2022-06-04,Western Europe,3.052999,3.298328,2.791844
...,...,...,...,...,...
1217,2023-01-22,Central Europe,0.230695,1.406764,0.028778
1218,2023-01-23,Central Europe,0.230555,1.446675,0.027869
1219,2023-01-24,Central Europe,0.228316,1.489627,0.027010
1220,2023-01-25,Central Europe,0.229833,1.534110,0.026198


## now we separate out Rt by contribution 

In [209]:
summary_rt_local_df['days'] = (summary_rt_local_df.interval.astype(int)-1.5) *7
summary_rt_local_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - summary_rt_local_df.days.map(timedelta)
#summary_rt_local_df = summary_rt_local_df[summary_rt_local_df.date >"2022-06-17"]
summary_rt_local_df.date = summary_rt_local_df.date.astype(str)

summary_rt_local_and_intro_df['days'] = (summary_rt_local_and_intro_df.interval.astype(int)-1.5) *7
summary_rt_local_and_intro_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - summary_rt_local_and_intro_df.days.map(timedelta)
#summary_rt_local_and_intro_df = summary_rt_local_and_intro_df[summary_rt_local_and_intro_df.date >"2022-06-17"]
summary_rt_local_and_intro_df.date = summary_rt_local_and_intro_df.date.astype(str)





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_rt_local_df['days'] = (summary_rt_local_df.interval.astype(int)-1.5) *7
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_rt_local_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - summary_rt_local_df.days.map(timedelta)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_rt_loc

In [210]:
summary_rt_local_df['Contribution'] = "Local"
summary_rt_local_and_intro_df['Contribution'] = "Local + Intoductions"
summary_rt_local_and_intro_df = summary_rt_local_and_intro_df[summary_rt_local_and_intro_df.deme == "North America"]
summary_rt_local_df = summary_rt_local_df[summary_rt_local_df.deme == "NorthAmerica"]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_rt_local_df['Contribution'] = "Local"


In [211]:
summary_rt_local_df

Unnamed: 0,deme,interval,mean_percent,upper_hpd_log_95,lower_hpd_log_95,upper_hpd_log_50,lower_hpd_log_50,days,date,Contribution
0,NorthAmerica,0,0.58425,0.635342,0.534114,0.602199,0.566684,-10.5,2023-01-13 12:00:00,Local
0,NorthAmerica,2,0.70317,0.741278,0.665738,0.71661,0.690016,3.5,2022-12-30 12:00:00,Local
0,NorthAmerica,4,0.806907,0.835565,0.779364,0.817229,0.797239,17.5,2022-12-16 12:00:00,Local
0,NorthAmerica,6,0.807171,0.835393,0.779726,0.818185,0.798427,31.5,2022-12-02 12:00:00,Local
0,NorthAmerica,8,0.844319,0.871765,0.817484,0.854127,0.835011,45.5,2022-11-18 12:00:00,Local
0,NorthAmerica,10,0.817509,0.844971,0.788414,0.828137,0.808453,59.5,2022-11-04 12:00:00,Local
0,NorthAmerica,12,0.831177,0.858385,0.803438,0.84107,0.822005,73.5,2022-10-21 12:00:00,Local
0,NorthAmerica,14,0.849454,0.877245,0.821369,0.860692,0.841851,87.5,2022-10-07 12:00:00,Local
0,NorthAmerica,16,0.886735,0.904866,0.868245,0.893171,0.880423,101.5,2022-09-23 12:00:00,Local
0,NorthAmerica,18,1.014339,1.031229,0.996965,1.0207,1.009136,115.5,2022-09-09 12:00:00,Local


In [212]:
combined_rt = pd.concat([ summary_rt_local_and_intro_df, summary_rt_local_df], ignore_index=True)


In [213]:
combined_rt = combined_rt[combined_rt.date > "2022-05-30"]


In [214]:
stream_rt = alt.Chart(combined_rt, title = "North American Rt by Contribution").mark_area(interpolate='monotone', opacity = 0.8 ,color = "#f58518", clip = True).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,format="%B")),
    alt.Y('mean_percent',axis=alt.Axis(title="Estimated Rt", grid=False),stack = False, scale=alt.Scale(domain=(0, 3.5))),
    #alt.Y2('upper_hpd_log_50' ), 
    alt.Color('Contribution:N',legend=alt.Legend(title= "Contribution",offset = -180, labelFontSize = 16, titleFontSize = 20))#, scale=alt.Scale(domain = ['Local',  "lo"], range = ["#4c90c0",  "#df4327"]))
).properties(
    width=800,
    height=300
)

#.transform_filter(
#    (datum.mean_percent >0) & (datum.mean_percent < 2.5)
#)

In [215]:
contribution_plot = stream_rt + one_line
(stream_rt + one_line).configure_axis(
    labelFontSize=20,
    titleFontSize=20
)


In [244]:
#creating a joint figure for manuscript!
# Add labels to each chart
contribution_plot = contribution_plot.properties(title='A')
vac_comb = vac_comb.properties(title='B')

combined_plot = alt.vconcat(contribution_plot, vac_comb,  spacing=50).resolve_scale(x='shared', y = "shared", color ='independent')


combined_plot.configure_view(
    strokeWidth=0
).configure_title(
    anchor='start', fontSize= 35
).configure_axis(
    labelFontSize=22,
    titleFontSize=22
).configure_legend(
     labelFontSize=28,
    titleFontSize=28
).transform_filter(
    (datum.date > "2022-05-25") # (datum.upper_hpd_log_95 < 4)
)





In [78]:
## Now we calculate the percent of RT that is influenced by introductions

In [79]:
rt_local_intro_northamerica_df = rt_local_and_intro_df.filter(regex='.NorthAmerica.')
rt_local_northamerica_df = rt_local_df.filter(regex='.NorthAmerica.')

In [80]:
rt_local_intro_northamerica_df

Unnamed: 0,rt.NorthAmerica.0,rt.NorthAmerica.1,rt.NorthAmerica.2,rt.NorthAmerica.3,rt.NorthAmerica.4,rt.NorthAmerica.5,rt.NorthAmerica.6,rt.NorthAmerica.7,rt.NorthAmerica.8,rt.NorthAmerica.9,...,rt.NorthAmerica.26,rt.NorthAmerica.27,rt.NorthAmerica.28,rt.NorthAmerica.29,rt.NorthAmerica.30,rt.NorthAmerica.31,rt.NorthAmerica.32,rt.NorthAmerica.33,rt.NorthAmerica.34,rt.NorthAmerica.35
0,0.571053,0.522095,0.707496,1.076749,0.835410,0.835420,0.833595,0.843515,0.886420,0.856130,...,1.167864,1.394379,3.102385,3.157147,2.632160,1.0,1.0,1.0,1.0,1.0
1,0.582047,0.534087,0.715380,1.074473,0.840020,0.840030,0.838254,0.847908,0.889645,0.860182,...,1.162796,1.382010,3.025388,3.077902,2.574118,1.0,1.0,1.0,1.0,1.0
2,0.592507,0.545511,0.722857,1.072326,0.844381,0.844391,0.842661,0.852063,0.892693,0.864015,...,1.158022,1.370384,2.953584,3.004013,2.519915,1.0,1.0,1.0,1.0,1.0
3,0.577401,0.529016,0.712051,1.075432,0.838075,0.838085,0.836288,0.846054,0.888284,0.858473,...,1.164932,1.387219,3.057738,3.111195,2.598514,1.0,1.0,1.0,1.0,1.0
4,0.602280,0.556199,0.729821,1.070338,0.848435,0.848445,0.846758,0.855925,0.895524,0.867576,...,1.153603,1.359643,2.887743,2.936268,2.470146,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43075,0.636238,0.593439,0.753871,1.063549,0.862369,0.862378,0.860840,0.869195,0.905239,0.879808,...,1.138547,1.323203,2.667973,2.710217,2.303532,1.0,1.0,1.0,1.0,1.0
43076,0.632599,0.589441,0.751305,1.064268,0.860887,0.860896,0.859342,0.867783,0.904207,0.878507,...,1.140139,1.327044,2.690872,2.733765,2.320930,1.0,1.0,1.0,1.0,1.0
43077,0.630665,0.587318,0.749940,1.064650,0.860098,0.860107,0.858545,0.867032,0.903658,0.877815,...,1.140987,1.329092,2.703104,2.746344,2.330219,1.0,1.0,1.0,1.0,1.0
43078,0.647411,0.605725,0.761735,1.061354,0.866903,0.866912,0.865423,0.873512,0.908396,0.883786,...,1.133691,1.311502,2.598609,2.638893,2.250778,1.0,1.0,1.0,1.0,1.0


In [81]:
rt_diff_df = rt_local_northamerica_df.div(rt_local_intro_northamerica_df.values)

In [82]:
rt_df_summary = generate_rt_summary_df(rt_diff_df)

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)


In [83]:
rt_df_summary['days'] = rt_df_summary.interval.astype(int) *7
rt_df_summary['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - rt_df_summary.days.map(timedelta)
rt_df_summary = rt_df_summary[rt_df_summary.date >"2022-06-17"]
rt_df_summary.date = rt_df_summary.date.astype(str)

In [84]:
line = alt.Chart(rt_df_summary, title = "North America" ).mark_area( interpolate='monotone', opacity = 1 ,color = "#f58518").encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False,format="%B %Y")),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title="Percent of Rt due to Introductions ", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_50' )
).properties(
    width=850,
    height=300
).transform_filter(
   (datum.lower_hpd_log_50 >0) #& (datum.upper_hpd_log_50 < 4)
)
band = alt.Chart(rt_df_summary).mark_area(
    opacity=0.3, interpolate='monotone', color = "#f58518"
).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('lower_hpd_log_95', axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_95')
).properties(
    width=850,
    height=300
).transform_filter(
    (datum.lower_hpd_log_95 >0) #& (datum.upper_hpd_log_95 < 4)
)

rt_diff_plot = line + band +one_line
(vac_plot + (rt_diff_plot)).resolve_scale(y="independent")