In [2]:
import baltic as bt
import pandas as pd
import numpy as np
import matplotlib as mpl
from matplotlib import pyplot as plt
from datetime import datetime as dt
from datetime import timedelta
import time
#import pymc3
import math
import arviz as az
import re
#from hpd import hpd
import scipy.stats as stats
from io import StringIO
import altair as alt
from altair import datum
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')


### Load Log files

In [3]:
from datetime import date
current_date = str(date.today())

In [4]:
log_file_path = "../../../mpox_rhino/550_glm_region_air_travel.log"


In [5]:
domain = ['CentralEurope', 'NorthAmerica', 'SouthAmerica' ,"SouthernEurope", "WesternEurope"]
range_ = ['#EEC060', '#2664A5', '#A76BB1', "#EEA160", "#356D4C"]

## we can then estimate the transmission rates via the change in Ne's over time plus the uninfectious rate

In [6]:
def read_in_Ne_changes_mascot(log_file_path):
    
    Ne_skyline_dict = {"sample":[]}
    
    with open(log_file_path, "r") as infile:
        line_number = 0
        for line in infile:
            line_number += 1
            if not line.startswith("#"):  # log combiner will sometimes put the entire xml at the start of the log file
                # use the first line to find the migration rate columns
            
            # use the first line to find the migration rate columns
                if "posterior" in line:
                    all_cols = line.split("\t")
                    Ne_column_indices = []   # list to store column indices
                    Nes_key = {}   # dictionary to store the column index to map to column name

                    for i in range(len(all_cols)):
                        col = all_cols[i]
                        if "Ne." in col:
                            Ne_column_indices.append(i)

                    # make an empty dictionary to store Nes and generate dictionary to convert index to name
                    for n in Ne_column_indices:
                        name = line.split("\t")[n]
                        deme = name.split(".")[1]# the syntax here is "NeLog.state01" where 0 is deme and 1 is interval 1
                        interval = name.split(".")[2]
                       
                        Nes_key[n] = name
                        Ne_skyline_dict[name] = []


                # read in actual parameter estimates and store in dictionary
                else:
                    sample = line.split("\t")[0]
                    Ne_skyline_dict["sample"].append(sample)

                    for index in Ne_column_indices:
                        name = Nes_key[index]
                        Ne_skyline_dict[name].append(line.split("\t")[index])
                    
                
    return(Ne_skyline_dict)

In [7]:
# make a new dataframe that summarizes the 95% HPD estimate with mean for each deme and interval 
def generate_summary_df(input_df):
    
    
    new_df = pd.DataFrame()
    
    for i in input_df.columns.tolist():
        if "Ne" in i:
            deme = i.split(".")[1]
            #print(deme)
            interval = i.split(".")[2]
            #print(interval)
            #print(i)
            next_interval = int(interval)+7
            local_series = input_df[i].astype('float').to_numpy()
            #print(local_series)
            mean_log = local_series.mean()
            mean_linear = 10**mean_log
            hpd_95 = az.hdi(local_series, 0.95)
            lower_hpd_log_95 = hpd_95[0]
            lower_hpd_linear_95 = 10**lower_hpd_log_95
            upper_hpd_log_95 = hpd_95[1]
            upper_hpd_linear_95 = 10**upper_hpd_log_95
            hpd_50 = az.hdi(local_series, 0.50)
            lower_hpd_log_50 = hpd_50[0]
            lower_hpd_linear_50 = 10**lower_hpd_log_50
            upper_hpd_log_50 = hpd_50[1]
            upper_hpd_linear_50 = 10**upper_hpd_log_50
            
            try:
                next_local_series = input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float').to_numpy()
                diff_series = np.subtract(local_series, next_local_series)
                #print(local_series)
                #print(next_local_series)
                #print(diff_series)
                diff_mean_log = diff_series.mean()
                diff_hpd_95 = az.hdi(diff_series, 0.95)
                diff_lower_hpd_log_95 = diff_hpd_95[0]
                diff_lower_hpd_linear_95 = math.exp(diff_lower_hpd_log_95)
                diff_upper_hpd_log_95 = diff_hpd_95[1]
                diff_upper_hpd_linear_95 = math.exp(diff_upper_hpd_log_95)
                diff_hpd_50 = az.hdi(diff_series, 0.50)
                diff_lower_hpd_log_50 = diff_hpd_50[0]
                diff_lower_hpd_linear_50 = math.exp(diff_lower_hpd_log_50)
                diff_upper_hpd_log_50 = diff_hpd_50[1]
                diff_upper_hpd_linear_50 = math.exp(diff_upper_hpd_log_50)
            except KeyError:
                pass   
            
            try:
                local_df = pd.DataFrame.from_dict({"deme":deme, "interval":interval, "mean_Ne_log":mean_log,"mean_Ne_linear":mean_linear, 
                                                   "upper_hpd_log_95":upper_hpd_log_95,"lower_hpd_log_95":[lower_hpd_log_95], 
                                                   "upper_hpd_log_50":upper_hpd_log_50,"lower_hpd_log_50":lower_hpd_log_50,
                                                   "upper_hpd_linear":upper_hpd_linear_95,"lower_hpd_linear":lower_hpd_linear_95,
                                                   "diff_mean_Ne_log":diff_mean_log, 
                                                   "diff_upper_hpd_log_95":diff_upper_hpd_log_95,"diff_lower_hpd_log_95":diff_lower_hpd_log_95, 
                                                   "diff_upper_hpd_log_50":diff_upper_hpd_log_50,"diff_lower_hpd_log_50":diff_lower_hpd_log_50,
                                                   "diff_upper_hpd_linear":diff_upper_hpd_linear_95,"diff_lower_hpd_linear":diff_lower_hpd_linear_95,
                                                   "diff_upper_hpd_linear_50":diff_upper_hpd_linear_50,"diff_lower_hpd_linear_50":diff_lower_hpd_linear_50})
                new_df = new_df.append(local_df)
                #print(new_df)
            except:
                pass
            
    return(new_df)

In [8]:
Ne_skyline = read_in_Ne_changes_mascot(log_file_path)

In [9]:
Ne_df = pd.DataFrame.from_dict(Ne_skyline)
print(len(Ne_df))
burnin_percent = 0.3

rows_to_remove = int(len(Ne_df)* burnin_percent)
Ne_df = Ne_df.iloc[rows_to_remove:]

print(len(Ne_df))
Ne_df = Ne_df.reset_index()
Ne_df

53101
37171


Unnamed: 0,index,sample,Ne.CentralEurope.0,Ne.CentralEurope.1,Ne.CentralEurope.2,Ne.CentralEurope.3,Ne.CentralEurope.4,Ne.CentralEurope.5,Ne.CentralEurope.6,Ne.CentralEurope.7,...,Ne.WesternEurope.29,Ne.WesternEurope.30,Ne.WesternEurope.31,Ne.WesternEurope.32,Ne.WesternEurope.33,Ne.WesternEurope.34,Ne.WesternEurope.35,Ne.WesternEurope.36,Ne.WesternEurope.37,Ne.WesternEurope.38
0,15930,15930000,0.3010748527020914,0.3010748527020914,0.3010748527020914,0.3010748527020914,0.3010748527020914,0.3010748527020914,0.3010748527020914,0.3010748527020914,...,4.447354779996291,3.952599388486265,3.044893112405841,1.5976931342104246,0.7073689481821203,0.7073689481821203,0.7073689481821203,0.7073689481821203,0.7073689481821203,0.7073689481821203
1,15931,15931000,0.3251227039583064,0.3251227039583064,0.3251227039583064,0.3251227039583064,0.3251227039583064,0.3251227039583064,0.3251227039583064,0.3251227039583064,...,4.787084443560588,4.247683381729248,3.260564911659325,1.6958464492007714,0.742511803657621,0.742511803657621,0.742511803657621,0.742511803657621,0.742511803657621,0.742511803657621
2,15932,15932000,0.264337113131706,0.264337113131706,0.264337113131706,0.264337113131706,0.264337113131706,0.264337113131706,0.264337113131706,0.264337113131706,...,4.579360647304079,4.037320158282615,3.0553126149398646,1.534186242537514,0.6425418929362159,0.6425418929362159,0.6425418929362159,0.6425418929362159,0.6425418929362159,0.6425418929362159
3,15933,15933000,0.31171624839131323,0.31171624839131323,0.31171624839131323,0.31171624839131323,0.31171624839131323,0.31171624839131323,0.31171624839131323,0.31171624839131323,...,4.97755853711799,4.402893428353906,3.356385536972268,1.7160612156127646,0.7352909045543207,0.7352909045543207,0.7352909045543207,0.7352909045543207,0.7352909045543207,0.7352909045543207
4,15934,15934000,0.2740198426118082,0.2740198426118082,0.2740198426118082,0.2740198426118082,0.2740198426118082,0.2740198426118082,0.2740198426118082,0.2740198426118082,...,5.31301923421135,4.659396316142017,3.4850073282393375,1.6999991268693353,0.6864073432905621,0.6864073432905621,0.6864073432905621,0.6864073432905621,0.6864073432905621,0.6864073432905621
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37166,53096,53096000,0.24912477238244993,0.24912477238244993,0.24912477238244993,0.24912477238244993,0.24912477238244993,0.24912477238244993,0.24912477238244993,0.24912477238244993,...,4.933692342038105,4.33042490554338,3.245065792538732,1.5903502130012812,0.6459269693251559,0.6459269693251559,0.6459269693251559,0.6459269693251559,0.6459269693251559,0.6459269693251559
37167,53097,53097000,0.24658794921313162,0.24658794921313162,0.24658794921313162,0.24658794921313162,0.24658794921313162,0.24658794921313162,0.24658794921313162,0.24658794921313162,...,4.771314904928117,4.192054848605725,3.1482714983721887,1.551297645164368,0.6343944846757168,0.6343944846757168,0.6343944846757168,0.6343944846757168,0.6343944846757168,0.6343944846757168
37168,53098,53098000,0.29363894689549414,0.29363894689549414,0.29363894689549414,0.29363894689549414,0.29363894689549414,0.29363894689549414,0.29363894689549414,0.29363894689549414,...,4.355021304001303,3.871630393712394,2.984381453703678,1.568360761081619,0.6957375788334246,0.6957375788334246,0.6957375788334246,0.6957375788334246,0.6957375788334246,0.6957375788334246
37169,53099,53099000,0.2642954113832466,0.2642954113832466,0.2642954113832466,0.2642954113832466,0.2642954113832466,0.2642954113832466,0.2642954113832466,0.2642954113832466,...,4.579374867054967,4.040137084364934,3.0621446708723368,1.5434663226517642,0.6495369670252166,0.6495369670252166,0.6495369670252166,0.6495369670252166,0.6495369670252166,0.6495369670252166


In [393]:
melted_df = pd.melt(
    Ne_df, 
    id_vars=['index', 'sample'], 
    value_vars=[col for col in Ne_df.columns if col.startswith('Ne.')], 
    var_name='deme.intervel', 
    value_name='Ne.'
)

# Split 'deme.intervel' column into separate 'deme' and 'intervel' columns
melted_df[["Ne",'deme', 'interval']] = melted_df['deme.intervel'].str.split('.', expand=True)

# Drop the original 'deme.intervel' column
melted_df.drop(columns=['deme.intervel', "Ne"], inplace=True)

# Print the resulting DataFrame
print(melted_df)

         index    sample                  Ne.           deme interval
0        15930  15930000   0.3010748527020914  CentralEurope        0
1        15931  15931000   0.3251227039583064  CentralEurope        0
2        15932  15932000    0.264337113131706  CentralEurope        0
3        15933  15933000  0.31171624839131323  CentralEurope        0
4        15934  15934000   0.2740198426118082  CentralEurope        0
...        ...       ...                  ...            ...      ...
7248340  53096  53096000   0.6459269693251559  WesternEurope       38
7248341  53097  53097000   0.6343944846757168  WesternEurope       38
7248342  53098  53098000   0.6957375788334246  WesternEurope       38
7248343  53099  53099000   0.6495369670252166  WesternEurope       38
7248344  53100  53100000   0.7088138355996161  WesternEurope       38

[7248345 rows x 5 columns]


In [394]:
melted_df

Unnamed: 0,index,sample,Ne.,deme,interval
0,15930,15930000,0.3010748527020914,CentralEurope,0
1,15931,15931000,0.3251227039583064,CentralEurope,0
2,15932,15932000,0.264337113131706,CentralEurope,0
3,15933,15933000,0.31171624839131323,CentralEurope,0
4,15934,15934000,0.2740198426118082,CentralEurope,0
...,...,...,...,...,...
7248340,53096,53096000,0.6459269693251559,WesternEurope,38
7248341,53097,53097000,0.6343944846757168,WesternEurope,38
7248342,53098,53098000,0.6957375788334246,WesternEurope,38
7248343,53099,53099000,0.6495369670252166,WesternEurope,38


In [396]:
melted_df["Ne."] = melted_df["Ne."].astype("float")

In [403]:
df_test = melted_df.groupby(['deme', pd.Grouper(key='interval')])['Ne.'].rolling(2, min_periods=1).mean()

ValueError: Grouper and axis must be same length

In [404]:
df_test

deme           interval         
CentralEurope  0         0          0.301075
                         1          0.313099
                         2          0.294730
                         3          0.288027
                         4          0.292868
                                      ...   
WesternEurope  9         6170381    2.638374
                         6170382    2.709160
                         6170383    2.619483
                         6170384    2.589838
                         6170385    2.625495
Name: Ne., Length: 7248345, dtype: float64

## calculating transmission rate

In [519]:
def generate_summary_diff_df(input_df):
    
    
    new_df = pd.DataFrame()
   
    for i in input_df.columns.tolist():
        if "Ne" in i:
            deme = i.split(".")[1]
            interval = i.split(".")[2]
            next_interval = int(interval)+3 #averaging over three weeks to reduce noise
            local_series = input_df[i].astype('float').to_numpy()
           
            try:
                new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
            
            
            except KeyError:
                pass 
            
            
    return(new_df)

In [520]:
ne_diff_summary = generate_summary_diff_df(Ne_df)

  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(in

  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(in

  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(in

  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/3)*(np.log(in

In [521]:
ne_diff_summary

Unnamed: 0,Ne.CentralEurope.diff.0,Ne.CentralEurope.diff.1,Ne.CentralEurope.diff.2,Ne.CentralEurope.diff.3,Ne.CentralEurope.diff.4,Ne.CentralEurope.diff.5,Ne.CentralEurope.diff.6,Ne.CentralEurope.diff.7,Ne.CentralEurope.diff.8,Ne.CentralEurope.diff.9,...,Ne.WesternEurope.diff.26,Ne.WesternEurope.diff.27,Ne.WesternEurope.diff.28,Ne.WesternEurope.diff.29,Ne.WesternEurope.diff.30,Ne.WesternEurope.diff.31,Ne.WesternEurope.diff.32,Ne.WesternEurope.diff.33,Ne.WesternEurope.diff.34,Ne.WesternEurope.diff.35
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.983008,...,7.420294,8.598449,9.994660,17.744977,29.823323,25.300924,14.122571,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.010109,...,7.521702,8.715958,10.131250,17.987486,30.230898,25.646694,14.315574,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.118232,...,7.926293,9.184787,10.676208,18.955028,31.857010,27.026223,15.085605,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.062734,...,7.718623,8.944144,10.396489,18.458403,31.022351,26.318131,14.690360,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.207282,...,8.259514,9.570916,11.125036,19.751897,33.196279,28.162406,15.719804,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37166,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.192950,...,8.205884,9.508771,11.052799,19.623646,32.980731,27.979544,15.617733,0.0,0.0,0.0
37167,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.176286,...,8.143526,9.436512,10.968807,19.474523,32.730106,27.766923,15.499052,0.0,0.0,0.0
37168,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.978262,...,7.402535,8.577870,9.970740,17.702508,29.751946,25.240371,14.088771,0.0,0.0,0.0
37169,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.106557,...,7.882604,9.134162,10.617362,18.850550,31.681419,26.877259,15.002455,0.0,0.0,0.0


In [522]:
uninfectious_rate = 365/4.5

#taken from https://www.medrxiv.org/content/10.1101/2022.08.17.22278897v1.full.pdf


In [523]:
incubation_period = 365/8

In [524]:
seir_growth_rate = ((ne_diff_summary*2 + uninfectious_rate + incubation_period)**2 - (incubation_period- uninfectious_rate)**2)/(4*incubation_period)



seir_growth_rate



Unnamed: 0,Ne.CentralEurope.diff.0,Ne.CentralEurope.diff.1,Ne.CentralEurope.diff.2,Ne.CentralEurope.diff.3,Ne.CentralEurope.diff.4,Ne.CentralEurope.diff.5,Ne.CentralEurope.diff.6,Ne.CentralEurope.diff.7,Ne.CentralEurope.diff.8,Ne.CentralEurope.diff.9,...,Ne.WesternEurope.diff.26,Ne.WesternEurope.diff.27,Ne.WesternEurope.diff.28,Ne.WesternEurope.diff.29,Ne.WesternEurope.diff.30,Ne.WesternEurope.diff.31,Ne.WesternEurope.diff.32,Ne.WesternEurope.diff.33,Ne.WesternEurope.diff.34,Ne.WesternEurope.diff.35
0,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,75.688943,...,102.929851,106.616148,111.063497,137.304287,183.448045,165.421850,124.711915,81.111111,81.111111,81.111111
1,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,75.616036,...,103.244751,106.987156,111.503166,138.167848,185.116670,166.768431,125.368335,81.111111,81.111111,81.111111
2,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,75.325477,...,104.505602,108.473402,113.265466,141.638882,191.846510,172.193087,128.003525,81.111111,81.111111,81.111111
3,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,75.474552,...,103.857530,107.709330,112.359278,139.852126,188.377704,169.398271,126.647674,81.111111,81.111111,81.111111
4,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,75.086557,...,105.549430,109.704712,114.726676,144.528450,197.476266,176.723492,130.193392,81.111111,81.111111,81.111111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37166,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,75.124986,...,105.381103,109.506099,114.490907,144.061512,196.564881,175.990530,129.839756,81.111111,81.111111,81.111111
37167,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,75.169681,...,105.185542,109.275376,114.217056,143.519491,195.507739,175.140128,129.429142,81.111111,81.111111,81.111111
37168,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,75.701714,...,102.874751,106.551237,110.986584,137.153320,183.156575,165.186569,124.597127,81.111111,81.111111,81.111111
37169,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,75.356827,...,104.369107,108.312450,113.074542,141.262096,191.114225,171.603303,127.717720,81.111111,81.111111,81.111111


In [371]:
#ne_diff_summary += uninfectious_rate

In [372]:
ne_diff_summary

Unnamed: 0,Ne.CentralEurope.diff.0,Ne.CentralEurope.diff.1,Ne.CentralEurope.diff.2,Ne.CentralEurope.diff.3,Ne.CentralEurope.diff.4,Ne.CentralEurope.diff.5,Ne.CentralEurope.diff.6,Ne.CentralEurope.diff.7,Ne.CentralEurope.diff.8,Ne.CentralEurope.diff.9,...,Ne.WesternEurope.diff.25,Ne.WesternEurope.diff.26,Ne.WesternEurope.diff.27,Ne.WesternEurope.diff.28,Ne.WesternEurope.diff.29,Ne.WesternEurope.diff.30,Ne.WesternEurope.diff.31,Ne.WesternEurope.diff.32,Ne.WesternEurope.diff.33,Ne.WesternEurope.diff.34
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.487256,-9.407094,...,5.160275,7.098389,9.840636,15.879760,23.900661,22.367492,18.975693,10.591928,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.507582,-9.535654,...,5.230797,7.195398,9.975121,16.096778,24.227295,22.673173,19.235021,10.736681,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.588674,-10.048574,...,5.512160,7.582437,10.511680,16.962619,25.530474,23.892757,20.269667,11.314204,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.547050,-9.785300,...,5.367741,7.383776,10.236272,16.518196,24.861572,23.266763,19.738599,11.017770,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.655462,-10.471016,...,5.743892,7.901203,10.953591,17.675728,26.603776,24.897209,21.121804,11.789853,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37166,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.644713,-10.403027,...,5.706596,7.849899,10.882468,17.560957,26.431034,24.735548,20.984658,11.713300,0.0,0.0
37167,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.632214,-10.323973,...,5.663230,7.790247,10.799771,17.427509,26.230181,24.547579,20.825192,11.624289,0.0,0.0
37168,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.483697,-9.384579,...,5.147925,7.081401,9.817084,15.841755,23.843459,22.313960,18.930278,10.566578,0.0,0.0
37169,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.579917,-9.993188,...,5.481778,7.540644,10.453741,16.869124,25.389754,23.761064,20.157944,11.251842,0.0,0.0


In [373]:
seir_growth_rate.filter(regex='Ne.NorthAmerica.diff.2')

Unnamed: 0,Ne.NorthAmerica.diff.2,Ne.NorthAmerica.diff.20,Ne.NorthAmerica.diff.21,Ne.NorthAmerica.diff.22,Ne.NorthAmerica.diff.23,Ne.NorthAmerica.diff.24,Ne.NorthAmerica.diff.25,Ne.NorthAmerica.diff.26,Ne.NorthAmerica.diff.27,Ne.NorthAmerica.diff.28,Ne.NorthAmerica.diff.29
0,62.013392,104.581749,108.126142,109.284962,111.201599,104.666527,95.739055,108.640222,189.930910,183.685793,186.202234
1,61.768555,104.921700,108.520346,109.697050,111.643417,105.007767,95.946750,109.042351,191.717383,185.358722,187.920802
2,60.796061,106.283160,110.099825,111.348427,113.414359,106.374408,96.777488,110.653688,198.925258,192.106017,194.853204
3,61.294362,105.583313,109.287761,110.499349,112.503720,105.671895,96.350665,109.825224,195.209516,188.628195,191.279766
4,60.000321,107.410647,111.408748,112.717224,114.882770,107.506205,97.464195,111.989140,204.958153,197.750488,200.653754
...,...,...,...,...,...,...,...,...,...,...,...
37166,60.128073,107.228808,111.197593,112.496393,114.645837,107.323669,97.353521,111.773698,203.981310,196.836714,199.714640
37167,60.276768,107.017557,110.952312,112.239879,114.370635,107.111611,97.224909,111.523440,202.848335,195.776804,198.625371
37168,62.056314,104.522269,108.057176,109.212870,111.124311,104.606821,95.702704,108.569871,189.618882,183.393573,185.902052
37169,60.900739,106.135750,109.928752,111.169548,113.222496,106.226436,96.687622,110.479157,198.140749,191.371825,194.098791


In [374]:
# for i in range(190,252, 1):

#     seir_growth_rate = seir_growth_rate.drop(columns="Ne.NorthAmerica.diff."+str(i))

In [375]:
# make a new dataframe that summarizes the 95% HPD estimate with mean for each deme and interval 
def generate_summary_df(input_df):
    
    
    new_df = pd.DataFrame()

    for i in input_df.columns.tolist():
        if "Ne" in i:
            deme = i.split(".")[1]
            interval = i.split(".")[3]
            local_series = input_df[i].astype('float').to_numpy()
            mean_percent = local_series.mean()
            hpd_95 = az.hdi(local_series, 0.95)
            lower_hpd_log_95 = hpd_95[0]
            upper_hpd_log_95 = hpd_95[1]
            hpd_50 = az.hdi(local_series, 0.50)
            lower_hpd_log_50 = hpd_50[0]
            upper_hpd_log_50 = hpd_50[1]
            

            
            
            try:
                local_df = pd.DataFrame.from_dict({"deme": deme, "interval":interval, "mean_percent":mean_percent, 
                                                   "upper_hpd_log_95":upper_hpd_log_95,"lower_hpd_log_95":[lower_hpd_log_95], 
                                                   "upper_hpd_log_50":upper_hpd_log_50,"lower_hpd_log_50":lower_hpd_log_50})
                new_df = new_df.append(local_df)
            except:
                pass
            
    return(new_df)

In [376]:
#just checking to make sure the changes in Ne look okay
test_north = generate_summary_df(seir_growth_rate)

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

In [377]:
test_north

Unnamed: 0,deme,interval,mean_percent,upper_hpd_log_95,lower_hpd_log_95,upper_hpd_log_50,lower_hpd_log_50
0,CentralEurope,0,81.111111,81.111111,81.111111,81.111111,81.111111
0,CentralEurope,1,81.111111,81.111111,81.111111,81.111111,81.111111
0,CentralEurope,2,81.111111,81.111111,81.111111,81.111111,81.111111
0,CentralEurope,3,81.111111,81.111111,81.111111,81.111111,81.111111
0,CentralEurope,4,81.111111,81.111111,81.111111,81.111111,81.111111
...,...,...,...,...,...,...,...
0,WesternEurope,30,152.974646,164.664660,140.885643,156.831104,148.643931
0,WesternEurope,31,140.702286,150.217404,130.830600,143.849258,137.174721
0,WesternEurope,32,112.476860,117.233093,107.496649,114.061408,110.709877
0,WesternEurope,33,81.111111,81.111111,81.111111,81.111111,81.111111


In [378]:
test_north['days'] = test_north.interval.astype(int) *7
test_north['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - test_north.days.map(timedelta)
test_north.date = test_north.date.astype(str)

In [537]:
line = alt.Chart(test_north).mark_area().encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title="transmission rate"), scale = alt.Scale(zero= False)),
    alt.Y2('upper_hpd_log_50'),
    color=alt.Color('deme:N')
).properties(
    width=850,
    height=300
)

band = alt.Chart(test_north).mark_area(
    opacity=0.3
).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('lower_hpd_log_95'),
    alt.Y2('upper_hpd_log_95'),
    color=alt.Color('deme:N')
).properties(
    width=850,
    height=300
)

band + line

## calculating backward migration rates

In [538]:
def read_in_forward_migration_rates_mascot(log_file_path):
    
    mig_rates_dict = {"sample":[]}
    
    with open(log_file_path, "r") as infile:
        line_number = 0
        for line in infile:
            #print(line_number)
            line_number += 1
            if not line.startswith("#"):  # log combiner will sometimes put the entire xml at the start of the log file
                # use the first line to find the migration rate columns
                
            # use the first line to find the migration rate columns
                if "posterior" in line:
                    all_cols = line.split("\t")
                    mig_column_indices = []   # list to store column indices
                    mig_key = {}   # dictionary to store the column index to map to column name

                    for i in range(len(all_cols)):
                        col = all_cols[i]
                        if "mig." in col:
                            mig_column_indices.append(i)

                    # make an empty dictionary to store Nes and generate dictionary to convert index to name
                    for n in mig_column_indices:
                        name = line.split("\t")[n]
                        deme = name.split(".")[1]# the syntax here is "NeLog.state01" where 0 is deme and 1 is interval 1
                        #interval = name.split(".")[2]
                       
                        mig_key[n] = name
                        mig_rates_dict[name] = []


                # read in actual parameter estimates and store in dictionary
                else:
                    sample = line.split("\t")[0]
                    mig_rates_dict["sample"].append(sample)

                    for index in mig_column_indices:
                        name = mig_key[index]
                        mig_rates_dict[name].append(line.split("\t")[index])
                    
                
                
                
    return(mig_rates_dict)

In [539]:
migration_rates_f = read_in_forward_migration_rates_mascot(log_file_path)

In [540]:
mig_df_f = pd.DataFrame.from_dict(migration_rates_f)


In [541]:
burnin_percent = 0.3
print(len(mig_df_f))
rows_to_remove = int(len(mig_df_f)* burnin_percent)
mig_df_f = mig_df_f.iloc[rows_to_remove:]

print(len(mig_df_f))
mig_df_f = mig_df_f.reset_index()
mig_df_f.head()

53101
37171


Unnamed: 0,index,sample,mig.CentralEurope_to_NorthAmerica,mig.CentralEurope_to_SouthAmerica,mig.CentralEurope_to_SouthernEurope,mig.CentralEurope_to_WesternEurope,mig.NorthAmerica_to_CentralEurope,mig.NorthAmerica_to_SouthAmerica,mig.NorthAmerica_to_SouthernEurope,mig.NorthAmerica_to_WesternEurope,...,mig.SouthAmerica_to_SouthernEurope,mig.SouthAmerica_to_WesternEurope,mig.SouthernEurope_to_CentralEurope,mig.SouthernEurope_to_NorthAmerica,mig.SouthernEurope_to_SouthAmerica,mig.SouthernEurope_to_WesternEurope,mig.WesternEurope_to_CentralEurope,mig.WesternEurope_to_NorthAmerica,mig.WesternEurope_to_SouthAmerica,mig.WesternEurope_to_SouthernEurope
0,15930,15930000,0.043389152206161,0.0163429295728428,0.1628850133085988,0.3153999585304822,0.0477523797610299,0.4859461920188331,0.3503118586190569,0.5285982474603492,...,0.2473769731421294,0.1663306651804479,0.1677633866537059,0.3510854971102834,0.2419130368214358,1.6540640861826357,0.319471261093722,0.5179175528286454,0.1736534810104251,1.6726007457841328
1,15931,15931000,0.0573058331602363,0.0200114095872844,0.1205256858079968,0.2060223427182111,0.0603357015575001,0.4086073445700076,0.4402734654692688,0.6744679658867523,...,0.2850319733516773,0.1960461479226467,0.1241354058252247,0.4412457776852909,0.2723199667097877,0.9995991991452404,0.2086817574369111,0.6948534966694988,0.2022944379908389,1.0108014435124748
2,15932,15932000,0.1515325848304453,0.0761823313551183,0.2442329964218546,0.3485353627693134,0.155407877187796,0.5397370164098606,0.5702263823330294,0.7484076734075628,...,0.4284396374430823,0.3360799974019317,0.2522308417588083,0.5752109034595533,0.4159817598593018,0.9706389472210718,0.3518493050107741,0.7743112838896807,0.3423698131749236,0.9781055011538607
3,15933,15933000,0.1070146256239837,0.0437335129192797,0.2022828292173146,0.317292355183869,0.1114027597561452,0.5874217075411439,0.6062112163581739,0.902590855317988,...,0.4171454022635773,0.3036585273105008,0.1982784812048784,0.6072768331942207,0.401322292887926,1.2113606542267663,0.3209221214578038,0.893725548778168,0.311386943178927,1.233406728070407
4,15934,15934000,0.0597692687274124,0.0267302802776233,0.2300988223101021,0.4208855445547474,0.0622163000271121,0.5699578322785352,0.3582314601561049,0.522782558838975,...,0.2748537014837581,0.1941959389141612,0.2312345128172054,0.3585079021659547,0.2707856953722064,1.7673707714876656,0.4256970635414517,0.5176528781465641,0.2064535561215288,1.7988758878258444


In [542]:
def calc_backwards_mig_rates(mig_df_f, targetName):    
    mig_rates = {}
    target = targetName
    target_columns = []
    ne_interval= 0
    mig_list = []

    for name in mig_df_f.columns:
        if target in name.split("_")[0]:
            target_columns.append(name)

    for name in Ne_df.columns:
        if target in name:
            ne_interval+=1

    for interval in range(0, ne_interval,1):
        mig_rates[str(interval)+ "." + "bmr" + "." +  str(target)] = []
        for index_1, row_1 in mig_df_f.iterrows():
            mig_list = []
            for direction in target_columns:
                other_loc = direction.split("_")[2]
                mig_rate_b = pd.to_numeric(row_1[direction]) * ((pd.to_numeric(Ne_df.loc[index_1,"Ne."+ str(target) + "." + str(interval)]))/(pd.to_numeric(Ne_df.loc[index_1, "Ne."+ str(other_loc) + "." + str(interval)])))
                #print(mig_rate_b)
                mig_list.append(mig_rate_b)
            combined_mig = np.sum(np.array(mig_list))
            #print(combined_mig)
            mig_rates[str(interval)+ "." + "bmr" + "." + str(target)].append(combined_mig)

    return(mig_rates) 


In [543]:
unique_column= []
unique_target = []
for name in mig_df_f.columns:
    try:

        if name.split("_")[0].split(".")[1] not in unique_target:
            unique_column.append(name)
            unique_target.append(name.split("_")[0].split(".")[1])
    except:
        pass



In [544]:
mig_rates = []
for target in unique_target:
    mig_rates.append(calc_backwards_mig_rates(mig_df_f, target))
    


In [545]:
mr_b_df = pd.DataFrame()
for x in mig_rates:
    x_df = pd.DataFrame(x)
    mr_b_df = pd.concat([mr_b_df, x_df], axis=1)


In [546]:
mr_b_df

Unnamed: 0,0.bmr.CentralEurope,1.bmr.CentralEurope,2.bmr.CentralEurope,3.bmr.CentralEurope,4.bmr.CentralEurope,5.bmr.CentralEurope,6.bmr.CentralEurope,7.bmr.CentralEurope,8.bmr.CentralEurope,9.bmr.CentralEurope,...,29.bmr.WesternEurope,30.bmr.WesternEurope,31.bmr.WesternEurope,32.bmr.WesternEurope,33.bmr.WesternEurope,34.bmr.WesternEurope,35.bmr.WesternEurope,36.bmr.WesternEurope,37.bmr.WesternEurope,38.bmr.WesternEurope
0,0.126401,0.155856,0.127117,0.104730,0.098428,0.089358,0.078203,0.097500,0.082107,0.058433,...,6.687487,6.871370,9.903239,6.729769,6.206198,6.206198,6.206198,6.206198,6.206198,6.206198
1,0.091237,0.112285,0.092398,0.076088,0.071898,0.064917,0.056225,0.069420,0.059145,0.042390,...,6.162000,6.346756,11.085452,6.739850,4.945403,4.945403,4.945403,4.945403,4.945403,4.945403
2,0.154057,0.190708,0.157197,0.128896,0.121884,0.108897,0.093071,0.114299,0.097963,0.069918,...,9.913826,10.053188,15.379191,9.279736,5.847801,5.847801,5.847801,5.847801,5.847801,5.847801
3,0.139323,0.171780,0.141727,0.116799,0.110279,0.099016,0.085033,0.105004,0.089735,0.064072,...,9.220100,9.432137,15.590237,9.436349,6.529326,6.529326,6.529326,6.529326,6.529326,6.529326
4,0.150949,0.191409,0.151895,0.123499,0.114934,0.102912,0.088478,0.113185,0.093665,0.064142,...,9.143194,9.388806,13.011548,8.612401,7.413485,7.413485,7.413485,7.413485,7.413485,7.413485
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37166,0.048131,0.061539,0.048073,0.038505,0.035993,0.032401,0.028130,0.035925,0.029523,0.020243,...,6.318278,6.541367,13.805221,7.820102,6.263061,6.263061,6.263061,6.263061,6.263061,6.263061
37167,0.032114,0.041487,0.031684,0.024937,0.023325,0.021176,0.018724,0.023978,0.019397,0.013246,...,5.615845,5.861575,13.663797,7.708431,6.837860,6.837860,6.837860,6.837860,6.837860,6.837860
37168,0.042877,0.053623,0.042598,0.034383,0.032395,0.029630,0.026340,0.032927,0.027317,0.019357,...,5.481051,5.733474,12.170178,7.325652,6.630535,6.630535,6.630535,6.630535,6.630535,6.630535
37169,0.060136,0.076297,0.059878,0.047959,0.045025,0.040819,0.035838,0.045308,0.037351,0.025975,...,7.073885,7.357699,15.425345,8.938199,7.279569,7.279569,7.279569,7.279569,7.279569,7.279569


In [547]:
# make a new dataframe that summarizes the 95% HPD estimate with mean for each deme and interval 
def generate_summary_df(input_df):
    
    count = 0
    new_df = pd.DataFrame()

    for i in input_df.columns.tolist():
        #if count %7 == 0:
        deme = i.split(".")[2]
        interval = i.split(".")[0]
        local_series = input_df[i].astype('float').to_numpy()
        mean_percent = local_series.mean()
        hpd_95 = az.hdi(local_series, 0.95)
        lower_hpd_log_95 = hpd_95[0]
        upper_hpd_log_95 = hpd_95[1]
        hpd_50 = az.hdi(local_series, 0.50)
        lower_hpd_log_50 = hpd_50[0]
        upper_hpd_log_50 = hpd_50[1]




        try:
            local_df = pd.DataFrame.from_dict({"deme": deme, "interval":interval, "mean_percent":mean_percent, 
                                               "upper_hpd_log_95":upper_hpd_log_95,"lower_hpd_log_95":[lower_hpd_log_95], 
                                               "upper_hpd_log_50":upper_hpd_log_50,"lower_hpd_log_50":lower_hpd_log_50})
            new_df = new_df.append(local_df)
        except:
            pass
        #count +=1  
    return(new_df)

In [548]:
south_mrb_df = generate_summary_df(mr_b_df)

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)


In [549]:
south_mrb_df[south_mrb_df.deme == "WesternEurope"]

Unnamed: 0,deme,interval,mean_percent,upper_hpd_log_95,lower_hpd_log_95,upper_hpd_log_50,lower_hpd_log_50
0,WesternEurope,0,2.083787,3.01193,1.312327,2.12986,1.630524
0,WesternEurope,1,1.48387,2.110577,0.957265,1.545128,1.190042
0,WesternEurope,2,2.570748,3.759999,1.650644,2.660448,2.04092
0,WesternEurope,3,3.814769,5.720597,2.361197,3.847616,2.896508
0,WesternEurope,4,3.806826,5.638086,2.365441,3.912294,2.979052
0,WesternEurope,5,3.520008,5.240855,2.211139,3.575544,2.72386
0,WesternEurope,6,2.853987,4.288169,1.782587,2.916322,2.218167
0,WesternEurope,7,2.509718,3.788966,1.570894,2.548347,1.933592
0,WesternEurope,8,3.469588,5.238775,2.164525,3.514257,2.668543
0,WesternEurope,9,4.315454,6.620205,2.685152,4.373599,3.296842


In [550]:
south_mrb_df['days'] = south_mrb_df.interval.astype(int)  *7
south_mrb_df['date'] = dt.strptime("2023-01-06",  "%Y-%m-%d") - south_mrb_df.days.map(timedelta)
south_mrb_df.date = south_mrb_df.date.astype(str)



In [551]:
south_mrb_df = south_mrb_df[south_mrb_df.date > "2022-05-01"]

In [552]:
south_mrb_plot = alt.Chart(south_mrb_df).mark_area(interpolate='monotone', opacity = 0.5, color = "orange").encode(
    alt.X('date:T',axis=alt.Axis(title=None, grid=False, format="%B %Y")),
    alt.Y('upper_hpd_log_50',axis=alt.Axis(title="Number of Estimated Introductions", grid=False)),
    alt.Y2('lower_hpd_log_50' ), 
    alt.Color("deme",title = "Phylo Estimates", legend=alt.Legend(orient = "right",offset = -155, labelFontSize = 14, titleFontSize = 14, symbolSize = 110), scale=alt.Scale( range=range_) )
).properties(
    width=800,
    height=400
)

median = alt.Chart(south_mrb_df).mark_line(interpolate = "monotone").encode(
    alt.X('date:T',axis=alt.Axis(title=None, grid=False, format="%B %Y")),
    alt.Y('mean_percent',axis=alt.Axis(title="", grid=False)), 
    alt.Color("deme")
).properties(
    width = 800,
    height = 400
)

In [553]:
 south_mrb_plot

In [554]:
eii_df = pd.read_csv("estimated_importation_intensity_region.csv")

In [555]:
eii_df.columns

Index(['Unnamed: 0', 'index', 'place', 'year-month', 'import_risk'], dtype='object')

In [556]:
eii_df = eii_df[eii_df['year-month'] > "2022-04-01"]

In [557]:
eii_plot = alt.Chart(eii_df).mark_line().encode(
    alt.X("year-month:T",axis=alt.Axis(title=None, grid=False, format="%B %Y")),
    alt.Y("import_risk", title = "Number of Estimated Introductions",axis = alt.Axis(grid = False)), 
    alt.Color("place:N",title = "EII", legend=alt.Legend(orient = "right",offset = -155, labelFontSize = 14, titleFontSize = 14, symbolSize = 110),  scale=alt.Scale(range=range_) )).properties(
    width = 800,
    height = 400
)
eii_plot

In [558]:
intro_plot = (eii_plot + south_mrb_plot).resolve_scale(y = "shared", x = "shared", color= "independent")
(eii_plot + south_mrb_plot).resolve_scale(y = "shared", x = "shared", color= "independent").configure_axis(
    labelFontSize=20,
    titleFontSize=20
)

###  percent of new cases from intros


In [559]:
def generate_percent_intro_df(input_df):
    
    temp_df = pd.DataFrame()
    new_df = pd.DataFrame()
   
    for i in input_df.columns.tolist():
        
        interval = i.split(".")[0]
        deme = i.split(".")[2]

        try:
            temp_df["total."+ str(interval)] = seir_growth_rate["Ne." + str(deme) + ".diff." + str(interval)].astype("float") +  input_df[i].astype("float")

            new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0) 


        except KeyError:
            pass 
            
            
    return(new_df)

In [560]:
percent_df = generate_percent_intro_df(mr_b_df)

  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_

In [561]:
percent_df

Unnamed: 0,intro.percent.CentralEurope.0,intro.percent.CentralEurope.1,intro.percent.CentralEurope.2,intro.percent.CentralEurope.3,intro.percent.CentralEurope.4,intro.percent.CentralEurope.5,intro.percent.CentralEurope.6,intro.percent.CentralEurope.7,intro.percent.CentralEurope.8,intro.percent.CentralEurope.9,...,intro.percent.WesternEurope.26,intro.percent.WesternEurope.27,intro.percent.WesternEurope.28,intro.percent.WesternEurope.29,intro.percent.WesternEurope.30,intro.percent.WesternEurope.31,intro.percent.WesternEurope.32,intro.percent.WesternEurope.33,intro.percent.WesternEurope.34,intro.percent.WesternEurope.35
0,0.001556,0.001918,0.001565,0.001290,0.001212,0.001100,0.000963,0.001201,0.001011,0.000771,...,0.068379,0.073421,0.071557,0.046444,0.036104,0.056485,0.051200,0.071076,0.071076,0.071076
1,0.001124,0.001382,0.001138,0.000937,0.000886,0.000800,0.000693,0.000855,0.000729,0.000560,...,0.054935,0.064122,0.065670,0.042694,0.033149,0.062329,0.051018,0.057467,0.057467,0.057467
2,0.001896,0.002346,0.001934,0.001587,0.001500,0.001341,0.001146,0.001407,0.001206,0.000927,...,0.080827,0.098076,0.102852,0.065415,0.049793,0.081991,0.067596,0.067248,0.067248,0.067248
3,0.001715,0.002113,0.001744,0.001438,0.001358,0.001219,0.001047,0.001293,0.001105,0.000848,...,0.078170,0.092552,0.095711,0.061850,0.047683,0.084277,0.069342,0.074501,0.074501,0.074501
4,0.001858,0.002354,0.001869,0.001520,0.001415,0.001267,0.001090,0.001393,0.001153,0.000854,...,0.090938,0.098559,0.096441,0.059498,0.045386,0.068577,0.062046,0.083745,0.083745,0.083745
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37166,0.000593,0.000758,0.000592,0.000474,0.000444,0.000399,0.000347,0.000443,0.000364,0.000269,...,0.056939,0.065001,0.065780,0.042015,0.032207,0.072737,0.056807,0.071681,0.071681,0.071681
37167,0.000396,0.000511,0.000390,0.000307,0.000287,0.000261,0.000231,0.000296,0.000239,0.000176,...,0.053245,0.058454,0.057777,0.037656,0.029109,0.072370,0.056209,0.077748,0.077748,0.077748
37168,0.000528,0.000661,0.000525,0.000424,0.000399,0.000365,0.000325,0.000406,0.000337,0.000256,...,0.053471,0.057938,0.056876,0.038427,0.030353,0.068620,0.055530,0.075569,0.075569,0.075569
37169,0.000741,0.000940,0.000738,0.000591,0.000555,0.000503,0.000442,0.000558,0.000460,0.000345,...,0.064409,0.072668,0.073041,0.047688,0.037072,0.082476,0.065407,0.082357,0.082357,0.082357


In [562]:
# make a new dataframe that summarizes the 95% HPD estimate with mean for each deme and interval 
def generate_summary_df(input_df):
    
    
    new_df = pd.DataFrame()

    for i in input_df.columns.tolist():
        if "percent" in i:
            deme = i.split(".")[2]
            interval = i.split(".")[3]
            local_series = input_df[i].astype('float').to_numpy()
            mean_percent = local_series.mean()
            hpd_95 = az.hdi(local_series, 0.95)
            lower_hpd_log_95 = hpd_95[0]
            upper_hpd_log_95 = hpd_95[1]
            hpd_50 = az.hdi(local_series, 0.50)
            lower_hpd_log_50 = hpd_50[0]
            upper_hpd_log_50 = hpd_50[1]
            

            
            
            try:
                local_df = pd.DataFrame.from_dict({"deme":deme, "interval":interval, "mean_percent":mean_percent, 
                                                   "upper_hpd_log_95":upper_hpd_log_95,"lower_hpd_log_95":[lower_hpd_log_95], 
                                                   "upper_hpd_log_50":upper_hpd_log_50,"lower_hpd_log_50":lower_hpd_log_50})
                new_df = new_df.append(local_df)
            except:
                pass
            
    return(new_df)

In [563]:
final_north_df = generate_summary_df(percent_df)

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)


In [564]:
#making sure that any numbers >1 are excluded
percent_df =pd.DataFrame(np.where(percent_df <1, percent_df, 1), columns=percent_df.columns )
percent_df =pd.DataFrame(np.where(percent_df >0, percent_df, 0), columns=percent_df.columns )
percent_df

Unnamed: 0,intro.percent.CentralEurope.0,intro.percent.CentralEurope.1,intro.percent.CentralEurope.2,intro.percent.CentralEurope.3,intro.percent.CentralEurope.4,intro.percent.CentralEurope.5,intro.percent.CentralEurope.6,intro.percent.CentralEurope.7,intro.percent.CentralEurope.8,intro.percent.CentralEurope.9,...,intro.percent.WesternEurope.26,intro.percent.WesternEurope.27,intro.percent.WesternEurope.28,intro.percent.WesternEurope.29,intro.percent.WesternEurope.30,intro.percent.WesternEurope.31,intro.percent.WesternEurope.32,intro.percent.WesternEurope.33,intro.percent.WesternEurope.34,intro.percent.WesternEurope.35
0,0.001556,0.001918,0.001565,0.001290,0.001212,0.001100,0.000963,0.001201,0.001011,0.000771,...,0.068379,0.073421,0.071557,0.046444,0.036104,0.056485,0.051200,0.071076,0.071076,0.071076
1,0.001124,0.001382,0.001138,0.000937,0.000886,0.000800,0.000693,0.000855,0.000729,0.000560,...,0.054935,0.064122,0.065670,0.042694,0.033149,0.062329,0.051018,0.057467,0.057467,0.057467
2,0.001896,0.002346,0.001934,0.001587,0.001500,0.001341,0.001146,0.001407,0.001206,0.000927,...,0.080827,0.098076,0.102852,0.065415,0.049793,0.081991,0.067596,0.067248,0.067248,0.067248
3,0.001715,0.002113,0.001744,0.001438,0.001358,0.001219,0.001047,0.001293,0.001105,0.000848,...,0.078170,0.092552,0.095711,0.061850,0.047683,0.084277,0.069342,0.074501,0.074501,0.074501
4,0.001858,0.002354,0.001869,0.001520,0.001415,0.001267,0.001090,0.001393,0.001153,0.000854,...,0.090938,0.098559,0.096441,0.059498,0.045386,0.068577,0.062046,0.083745,0.083745,0.083745
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37166,0.000593,0.000758,0.000592,0.000474,0.000444,0.000399,0.000347,0.000443,0.000364,0.000269,...,0.056939,0.065001,0.065780,0.042015,0.032207,0.072737,0.056807,0.071681,0.071681,0.071681
37167,0.000396,0.000511,0.000390,0.000307,0.000287,0.000261,0.000231,0.000296,0.000239,0.000176,...,0.053245,0.058454,0.057777,0.037656,0.029109,0.072370,0.056209,0.077748,0.077748,0.077748
37168,0.000528,0.000661,0.000525,0.000424,0.000399,0.000365,0.000325,0.000406,0.000337,0.000256,...,0.053471,0.057938,0.056876,0.038427,0.030353,0.068620,0.055530,0.075569,0.075569,0.075569
37169,0.000741,0.000940,0.000738,0.000591,0.000555,0.000503,0.000442,0.000558,0.000460,0.000345,...,0.064409,0.072668,0.073041,0.047688,0.037072,0.082476,0.065407,0.082357,0.082357,0.082357


In [565]:
final_north_df['days'] = final_north_df.interval.astype(int) *7
final_north_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - final_north_df.days.map(timedelta)
#final_north_df = final_north_df[final_north_df.date >"2022-06-15"]
final_north_df.date = final_north_df.date.astype(str)

In [566]:
line1 = alt.Chart(final_north_df).mark_area(interpolate='monotone', opacity = 0.9).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,  format="%B %Y")),
    alt.Y('lower_hpd_log_50',title = "Percent of new cases due to introductions", axis=alt.Axis(title="", grid=False, format='%')),
    alt.Y2('upper_hpd_log_50' ), 
    alt.Color("deme",title = "Region",  legend=alt.Legend(orient = "right",offset = -155, labelFontSize = 14, titleFontSize = 14, symbolSize = 110), scale=alt.Scale( range=range_))
).properties(
    width=1000,
    height=300
).transform_filter(
    (datum.lower_hpd_log_50 >0) & (datum.upper_hpd_log_50 < 1)
)

band1 = alt.Chart(final_north_df).mark_area(
    opacity=0.3, interpolate='monotone'
).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False)),
    alt.Y('lower_hpd_log_95',title = "Percent of new cases due to introductions", axis=alt.Axis( grid=False)),
    alt.Y2('upper_hpd_log_95'),
    alt.Color("deme")
).properties(
    width=800,
    height=400
).transform_filter(
    (datum.lower_hpd_log_95 >0) & (datum.upper_hpd_log_95 < 1)
)
percent_plot= band1 + line1
(percent_plot).configure_axis(
    labelFontSize=20,
    titleFontSize=20
)

In [567]:
#creating a joint figure for manuscript!
# Add labels to each chart
intro_plot = intro_plot.properties(title='A')
percent_plot = percent_plot.properties(title='B')

combined_plot = alt.vconcat(intro_plot, percent_plot,  spacing=50).resolve_scale(x='shared', color ='independent')


combined_plot.configure_view(
    strokeWidth=0
).configure_title(
    anchor='start', fontSize= 35
).configure_axis(
    labelFontSize=22,
    titleFontSize=22
).configure_legend(
     labelFontSize=22,
    titleFontSize=22
)

## working on doing Rt calculations based on percent intro

In [568]:
# a line to show an Rt of 1
one_line = alt.Chart(pd.DataFrame({'y': [1.0]})).mark_rule(strokeDash=[1,1]).encode(y='y').properties(
    width=850,
    height=300
)

In [569]:
# make a new dataframe that summarizes the 95% HPD estimate with mean for each deme and interval 
def generate_rt_summary_df(input_df):
    
    
    new_df = pd.DataFrame()
    count = 0
    for i in input_df.columns.tolist():
        #print(i)
        if "rt" in i:
            if count %2 == 0:
                deme = i.split(".")[1]
                interval = i.split(".")[2]
                #print(deme, interval)
                local_series = input_df[i].astype('float').to_numpy()
                mean_percent = local_series.mean()
                hpd_95 = az.hdi(local_series, 0.95)
                lower_hpd_log_95 = hpd_95[0]
                upper_hpd_log_95 = hpd_95[1]
                hpd_50 = az.hdi(local_series, 0.50)
                lower_hpd_log_50 = hpd_50[0]
                upper_hpd_log_50 = hpd_50[1]




                try:
                    local_df = pd.DataFrame.from_dict({"deme": deme, "interval":interval, "mean_percent":mean_percent, 
                                                       "upper_hpd_log_95":upper_hpd_log_95,"lower_hpd_log_95":[lower_hpd_log_95], 
                                                       "upper_hpd_log_50":upper_hpd_log_50,"lower_hpd_log_50":lower_hpd_log_50})
                    new_df = new_df.append(local_df)
                except:
                    pass
            count+=1

    return(new_df)

In [570]:
def foo_generate_local_and_intro_rt(input_df, foo_ne_growth):
    
    
    new_df = pd.DataFrame()
    incubation_period = 365/8
    uninfectious_rate = 365/4.5
    
    for i in input_df.columns.tolist():
        if "Ne" in i:
            interval = i.split(".")[3]
            deme = i.split(".")[1]
            #print((input_df[i].astype("float") ))
            print( (foo_ne_growth[i].astype("float")) / 8)
            print((input_df[i].astype("float")) / uninfectious_rate)
            new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
            

            
    return(new_df)

In [571]:
def foo_generate_local_rt(input_df, foo_ne_growth):
    
    
    new_df = pd.DataFrame()
    incubation_period = 365/8

    for i in input_df.columns.tolist():
        if "Ne" in i:
            interval = i.split(".")[3]
            deme = i.split(".")[1]
            try:
                new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))) *  (1- percent_df["intro.percent." +str(deme)+"."+str(interval)].astype("float"))
                
            except KeyError:
                pass

            
    return(new_df)

In [572]:
rt_local_df = foo_generate_local_rt(seir_growth_rate, ne_diff_summary)
rt_local_and_intro_df = foo_generate_local_and_intro_rt(seir_growth_rate, ne_diff_summary)


  new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))) *  (1- percent_df["intro.percent." +str(deme)+"."+str(interval)].astype("float"))
  new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))) *  (1- percent_df["intro.percent." +str(deme)+"."+str(interval)].astype("float"))
  new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))) *  (1- percent_df["intro.percent." +str(deme)+"."+str(interval)].astype("float"))
  new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))) *  (1- percent_df["intro.percent." +str(de

0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
37166    0.0
37167    0.0
37168    0.0
37169    0.0
37170    0.0
Name: Ne.CentralEurope.diff.0, Length: 37171, dtype: float64
0        1.0
1        1.0
2        1.0
3        1.0
4        1.0
        ... 
37166    1.0
37167    1.0
37168    1.0
37169    1.0
37170    1.0
Name: Ne.CentralEurope.diff.0, Length: 37171, dtype: float64
0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
37166    0.0
37167    0.0
37168    0.0
37169    0.0
37170    0.0
Name: Ne.CentralEurope.diff.1, Length: 37171, dtype: float64
0        1.0
1        1.0
2        1.0
3        1.0
4        1.0
        ... 
37166    1.0
37167    1.0
37168    1.0
37169    1.0
37170    1.0
Name: Ne.CentralEurope.diff.1, Length: 37171, dtype: float64
0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
37166    0.0
37167    0.0
37168    0.0
37169    0.0
37170    0.0
Name: Ne.CentralEurope.diff.2, Length: 37

0       -0.416385
1       -0.422076
2       -0.444779
3       -0.433126
4       -0.463477
           ...   
37166   -0.460468
37167   -0.456969
37168   -0.415389
37169   -0.442327
37170   -0.418174
Name: Ne.SouthernEurope.diff.18, Length: 37171, dtype: float64
0        0.888920
1        0.887444
2        0.881564
3        0.884580
4        0.876735
           ...   
37166    0.877511
37167    0.878414
37168    0.889179
37169    0.882198
37170    0.888456
Name: Ne.SouthernEurope.diff.18, Length: 37171, dtype: float64
0        0.068848
1        0.069789
2        0.073542
3        0.071616
4        0.076634
           ...   
37166    0.076137
37167    0.075558
37168    0.068683
37169    0.073137
37170    0.069143
Name: Ne.SouthernEurope.diff.19, Length: 37171, dtype: float64
0        1.018944
1        1.019204
2        1.020242
3        1.019709
4        1.021097
           ...   
37166    1.020960
37167    1.020800
37168    1.018899
37169    1.020130
37170    1.019026
Name: Ne.SouthernEu

  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_r

In [573]:
summary_rt_local_df = generate_rt_summary_df(rt_local_df)
summary_rt_local_and_intro_df = generate_rt_summary_df(rt_local_and_intro_df)


  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)


In [574]:
summary_rt_local_df

Unnamed: 0,deme,interval,mean_percent,upper_hpd_log_95,lower_hpd_log_95,upper_hpd_log_50,lower_hpd_log_50
0,CentralEurope,0,0.998632,0.999515,0.997400,0.999245,0.998611
0,CentralEurope,2,0.998623,0.999553,0.997401,0.999242,0.998596
0,CentralEurope,4,0.998925,0.999667,0.997934,0.999410,0.998889
0,CentralEurope,6,0.999141,0.999738,0.998350,0.999552,0.999140
0,CentralEurope,8,0.999101,0.999718,0.998259,0.999535,0.999101
...,...,...,...,...,...,...,...
0,WesternEurope,26,1.179974,1.223352,1.136090,1.196369,1.166676
0,WesternEurope,28,1.262636,1.316540,1.211272,1.282666,1.246368
0,WesternEurope,30,2.153832,2.352303,1.942157,2.226005,2.085423
0,WesternEurope,32,1.437470,1.515675,1.360742,1.463841,1.411685


In [575]:
summary_rt_local_df['days'] = summary_rt_local_df.interval.astype(int) *7
summary_rt_local_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - summary_rt_local_df.days.map(timedelta)
summary_rt_local_df.date = summary_rt_local_df.date.astype(str)

summary_rt_local_and_intro_df['days'] = summary_rt_local_and_intro_df.interval.astype(int) *7 
summary_rt_local_and_intro_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - summary_rt_local_and_intro_df.days.map(timedelta)
summary_rt_local_and_intro_df.date = summary_rt_local_and_intro_df.date.astype(str)

In [576]:
line2 = alt.Chart(summary_rt_local_and_intro_df, ).mark_area(interpolate='monotone', opacity = 1 ,color = "#f58518").encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,format="%B %Y")),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title="Phylo-estimated Rt", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_50' ),
    alt.Color("deme",legend=alt.Legend(title= "Region",offset = -170, labelFontSize = 20, titleFontSize = 20, symbolSize = 150))
).properties(
    width=850,
    height=300
).transform_filter(
   (datum.lower_hpd_log_50 >0) #& (datum.upper_hpd_log_50 < 4)
)
band2 = alt.Chart(summary_rt_local_and_intro_df).mark_area(
    opacity=0.3, interpolate='monotone', color = "#f58518"
).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,format="%B %Y")),
    alt.Y('lower_hpd_log_95', axis=alt.Axis(title="Phylo-estimated Rt", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_95'), 
    alt.Color("deme",  scale=alt.Scale(range=range_))
).properties(
    width=850,
    height=300
).transform_filter(
    (datum.lower_hpd_log_95 >0) #& (datum.upper_hpd_log_95 < 4)
)

phlyo_rt_plot = band2 + line2 +one_line
phlyo_rt_plot

In [577]:
line = alt.Chart(summary_rt_local_df, title = "Rt (local only phylo)").mark_area(interpolate='monotone', opacity = 1 ,color = "#f58518").encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False,format="%B %Y")),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title="local only Rt (phylo)", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_50' ),
    alt.Color("deme")
).properties(
    width=850,
    height=300
).transform_filter(
   (datum.lower_hpd_log_50 >0.4) #& (datum.upper_hpd_log_50 < 4)
)
band = alt.Chart(summary_rt_local_df).mark_area(
    opacity=0.3, interpolate='monotone', color = "#f58518"
).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('lower_hpd_log_95', axis=alt.Axis(title="local only Rt (phylo)", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_95'), 
    alt.Color("deme",  scale=alt.Scale( range=range_))
).properties(
    width=850,
    height=300
).transform_filter(
    (datum.lower_hpd_log_95 >0.4) #& (datum.upper_hpd_log_95 < 4)
)

local_phlyo_rt_plot = band + line +one_line
local_phlyo_rt_plot

In [578]:


(phlyo_rt_plot & local_phlyo_rt_plot).resolve_scale(y = "shared").configure_axis(
    labelFontSize=20,
    titleFontSize=20
)

In [579]:
case_rt = pd.read_csv("../case-rt-analysis/estimates/case-rt-estimates_region.tsv", sep="\t")

In [580]:
case_rt

Unnamed: 0,date,region,median_R,R_upper_95,R_lower_95
0,2022-05-07,Western Europe,4.462712,5.386952,3.585849
1,2022-05-08,Western Europe,4.471126,5.369559,3.632242
2,2022-05-09,Western Europe,4.468350,5.345257,3.670224
3,2022-05-10,Western Europe,4.465077,5.323310,3.695369
4,2022-05-11,Western Europe,4.465657,5.287955,3.713896
...,...,...,...,...,...
1217,2023-01-22,Central Europe,0.230695,1.406764,0.028778
1218,2023-01-23,Central Europe,0.230555,1.446675,0.027869
1219,2023-01-24,Central Europe,0.228316,1.489627,0.027010
1220,2023-01-25,Central Europe,0.229833,1.534110,0.026198


In [581]:
band3 = alt.Chart(case_rt).mark_area(interpolate='monotone', opacity = 0.5 ,color = "#f58518").encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,format="%B %Y")),
    alt.Y('R_lower_95',axis=alt.Axis(title="Case Based Rt", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('R_upper_95' ),
    alt.Color("region",legend=None,  scale=alt.Scale( range=range_))
).properties(
    width=850,
    height=300).transform_filter(
    (datum.R_upper_95 <7) #& (datum.upper_hpd_log_95 < 4)
)

line3 = alt.Chart(case_rt).mark_line(
    opacity=1, interpolate='monotone', color = "#f58518"
).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False)),
    alt.Y('median_R', axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))), 
    alt.Color("region")
).properties(
    width=850,
    height=300).transform_filter(
    (datum.median_R <5) #& (datum.upper_hpd_log_95 < 4)
)

case_rt_plot = band3 + line3 + one_line
case_rt_plot

In [582]:
#creating a joint figure for manuscript!
# Add labels to each chart
phlyo_rt_plot = phlyo_rt_plot.properties(title='A')
case_rt_plot = case_rt_plot.properties(title='B')

combined_plot = alt.vconcat(phlyo_rt_plot, case_rt_plot,  spacing=50).resolve_scale(x='shared', y = "shared", color ='independent')


combined_plot.configure_view(
    strokeWidth=0
).configure_title(
    anchor='start', fontSize= 35
).configure_axis(
    labelFontSize=22,
    titleFontSize=22
).configure_legend(
     labelFontSize=28,
    titleFontSize=28
)





In [607]:
vac_df =pd.read_csv("../data/us_mpox_vaccinaiton.csv")
vac_df.index = vac_df["Unnamed: 0"]
vac_df = vac_df.transpose()
vac_df = vac_df.drop(index = "Unnamed: 0").reset_index()
vac_df = vac_df.replace(',','', regex=True)
vac_df.columns.name = None
vac_df["First doses"] = pd.to_numeric(vac_df["First doses"])
vac_df = vac_df.rename(columns={"index": "date"} )
long_vac = pd.melt(vac_df, id_vars='date', value_vars=['First doses', 'Second doses'])
long_vac.date = long_vac.date.apply(pd.to_datetime)
long_vac = long_vac[long_vac.date >"2022-05-25"]

vac_plot = alt.Chart(long_vac).mark_bar(interpolate='monotone', opacity = 0.5 , width = 20).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,format="%B")),
    alt.Y('value',axis=alt.Axis(title="Vaccination Cases", grid=False)),
    alt.Color("variable",legend=alt.Legend(title= "Dosage",offset = -220, labelFontSize = 20, titleFontSize = 20), scale=alt.Scale(scheme='accent'))
         ).properties(
    width=850,
    height=300)

summary_rt_local_and_intro_df = summary_rt_local_and_intro_df[summary_rt_local_and_intro_df.date >"2022-05-15"]
case_rt = case_rt[case_rt.date >"2022-05-15"]

line2 = alt.Chart(summary_rt_local_and_intro_df).mark_area(interpolate='monotone', opacity = 1 ,color = "#2664A5").encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,format="%B")),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title="Overall Rt", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_50' )).properties(
    width=850,
    height=300
).transform_filter(
   (datum.lower_hpd_log_50 >0) & (datum.deme == "NorthAmerica") #(datum.upper_hpd_log_50 < 4)
)
band2 = alt.Chart(summary_rt_local_and_intro_df).mark_area(
    opacity=0.3, interpolate='monotone',color = "#2664A5"
).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False)),
    alt.Y('lower_hpd_log_95', axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_95')
).properties(
    width=850,
    height=300
).transform_filter(
    (datum.lower_hpd_log_95 >0) & (datum.deme == "NorthAmerica") # (datum.upper_hpd_log_95 < 4)
)

phlyo_rt_plot_NA = band2 + line2 +one_line



band3 = alt.Chart(case_rt).mark_area(interpolate='monotone', opacity = 0.3 ,color = "#2664A5", strokeDash=[3,5]).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,format="%B")),
    alt.Y('R_lower_95',axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('R_upper_95' ),
    alt.Color("region",legend=alt.Legend(title= "Region",offset = -220, labelFontSize = 20, titleFontSize = 20))
, alt.StrokeDash("region", legend=alt.Legend(title= "Region",offset = -220, labelFontSize = 20, titleFontSize = 20))).properties(
    width=850,
    height=300).transform_filter(
    (datum.region == "North America") # (datum.upper_hpd_log_95 < 4)
)

line3 = alt.Chart(case_rt).mark_line(
    opacity=1, interpolate='monotone', color = "#2664A5", strokeDash=[3,5]
).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False)),
    alt.Y('median_R', axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))), 
    alt.Color("region"),
    
).properties(
    width=850,
    height=300).transform_filter(
    (datum.region == "North America") 
)

case_rt_plot_NA =   band3 + line3+  one_line


vac_comb = (vac_plot+ (case_rt_plot_NA + phlyo_rt_plot_NA).resolve_scale(y ="shared")).resolve_scale(y = "independent", color = "independent", strokeDash = "independent")
(vac_plot+ (case_rt_plot_NA + phlyo_rt_plot_NA).resolve_scale(y ="shared")).resolve_scale(y = "independent", color = "independent", strokeDash = "independent").configure_axis(
    labelFontSize=20,
    titleFontSize=20
)


In [608]:
case_rt

Unnamed: 0,date,region,median_R,R_upper_95,R_lower_95
9,2022-05-16,Western Europe,4.373605,5.023422,3.750862
10,2022-05-17,Western Europe,4.332077,4.957124,3.732784
11,2022-05-18,Western Europe,4.287687,4.888767,3.704044
12,2022-05-19,Western Europe,4.242378,4.820107,3.674084
13,2022-05-20,Western Europe,4.195495,4.746343,3.642665
...,...,...,...,...,...
1217,2023-01-22,Central Europe,0.230695,1.406764,0.028778
1218,2023-01-23,Central Europe,0.230555,1.446675,0.027869
1219,2023-01-24,Central Europe,0.228316,1.489627,0.027010
1220,2023-01-25,Central Europe,0.229833,1.534110,0.026198


## now we separate out Rt by contribution 

In [609]:
summary_rt_local_df['days'] = summary_rt_local_df.interval.astype(int) *7
summary_rt_local_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - summary_rt_local_df.days.map(timedelta)
#summary_rt_local_df = summary_rt_local_df[summary_rt_local_df.date >"2022-06-17"]
summary_rt_local_df.date = summary_rt_local_df.date.astype(str)

summary_rt_local_and_intro_df['days'] = summary_rt_local_and_intro_df.interval.astype(int) *7
summary_rt_local_and_intro_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - summary_rt_local_and_intro_df.days.map(timedelta)
#summary_rt_local_and_intro_df = summary_rt_local_and_intro_df[summary_rt_local_and_intro_df.date >"2022-06-17"]
summary_rt_local_and_intro_df.date = summary_rt_local_and_intro_df.date.astype(str)





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_rt_local_df['days'] = summary_rt_local_df.interval.astype(int) *7
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_rt_local_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - summary_rt_local_df.days.map(timedelta)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_rt_local_df.

In [610]:
summary_rt_local_df['Contribution'] = "Local"
summary_rt_local_and_intro_df['Contribution'] = "Local + Intoductions"
summary_rt_local_and_intro_df = summary_rt_local_and_intro_df[summary_rt_local_and_intro_df.deme == "NorthAmerica"]
summary_rt_local_df = summary_rt_local_df[summary_rt_local_df.deme == "NorthAmerica"]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_rt_local_df['Contribution'] = "Local"


In [611]:
summary_rt_local_df

Unnamed: 0,deme,interval,mean_percent,upper_hpd_log_95,lower_hpd_log_95,upper_hpd_log_50,lower_hpd_log_50,days,date,Contribution
0,NorthAmerica,0,0.583764,0.636911,0.534075,0.599915,0.563902,0,2023-01-03,Local
0,NorthAmerica,2,0.702712,0.740362,0.663387,0.716382,0.68933,14,2022-12-20,Local
0,NorthAmerica,4,0.806436,0.835482,0.777984,0.81634,0.795897,28,2022-12-06,Local
0,NorthAmerica,6,0.80672,0.835573,0.778677,0.816323,0.796103,42,2022-11-22,Local
0,NorthAmerica,8,0.84381,0.871197,0.815837,0.854686,0.835072,56,2022-11-08,Local
0,NorthAmerica,10,0.817065,0.844971,0.78758,0.828531,0.808363,70,2022-10-25,Local
0,NorthAmerica,12,0.830724,0.857807,0.802101,0.840949,0.821444,84,2022-10-11,Local
0,NorthAmerica,14,0.849011,0.8758,0.819341,0.860445,0.841094,98,2022-09-27,Local
0,NorthAmerica,16,0.886362,0.905014,0.867685,0.893177,0.880083,112,2022-09-13,Local
0,NorthAmerica,18,1.014049,1.031069,0.996616,1.020198,1.008614,126,2022-08-30,Local


In [612]:
combined_rt = pd.concat([ summary_rt_local_and_intro_df, summary_rt_local_df], ignore_index=True)


In [613]:
combined_rt = combined_rt[combined_rt.date > "2022-05-15"]


In [623]:
stream_rt = alt.Chart(combined_rt, title = "North American Rt by Contribution").mark_area(interpolate='monotone', opacity = 0.8 ,color = "#f58518", clip = True).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,format="%B")),
    alt.Y('mean_percent',axis=alt.Axis(title="Estimated Rt", grid=False),stack = False, scale=alt.Scale(domain=(0, 3.5))),
    #alt.Y2('upper_hpd_log_50' ), 
    alt.Color('Contribution:N',legend=alt.Legend(title= "Contribution",offset = -180, labelFontSize = 16, titleFontSize = 20))#, scale=alt.Scale(domain = ['Local',  "lo"], range = ["#4c90c0",  "#df4327"]))
).properties(
    width=800,
    height=300
)

#.transform_filter(
#    (datum.mean_percent >0) & (datum.mean_percent < 2.5)
#)

In [624]:
contribution_plot = stream_rt + one_line
(stream_rt + one_line).configure_axis(
    labelFontSize=20,
    titleFontSize=20
)


In [625]:
#creating a joint figure for manuscript!
# Add labels to each chart
contribution_plot = contribution_plot.properties(title='A')
vac_comb = vac_comb.properties(title='B')

combined_plot = alt.vconcat(contribution_plot, vac_comb,  spacing=50).resolve_scale(x='shared', y = "shared", color ='independent')


combined_plot.configure_view(
    strokeWidth=0
).configure_title(
    anchor='start', fontSize= 35
).configure_axis(
    labelFontSize=22,
    titleFontSize=22
).configure_legend(
     labelFontSize=28,
    titleFontSize=28
).transform_filter(
    (datum.date > "2022-05-25") # (datum.upper_hpd_log_95 < 4)
)





In [593]:
## Now we calculate the percent of RT that is influenced by introductions

In [594]:
rt_local_intro_northamerica_df = rt_local_and_intro_df.filter(regex='.NorthAmerica.')
rt_local_northamerica_df = rt_local_df.filter(regex='.NorthAmerica.')

In [595]:
rt_local_intro_northamerica_df

Unnamed: 0,rt.NorthAmerica.0,rt.NorthAmerica.1,rt.NorthAmerica.2,rt.NorthAmerica.3,rt.NorthAmerica.4,rt.NorthAmerica.5,rt.NorthAmerica.6,rt.NorthAmerica.7,rt.NorthAmerica.8,rt.NorthAmerica.9,...,rt.NorthAmerica.26,rt.NorthAmerica.27,rt.NorthAmerica.28,rt.NorthAmerica.29,rt.NorthAmerica.30,rt.NorthAmerica.31,rt.NorthAmerica.32,rt.NorthAmerica.33,rt.NorthAmerica.34,rt.NorthAmerica.35
0,0.616599,0.571883,0.739991,1.067453,0.854339,0.854348,0.852724,0.861548,0.899643,0.872760,...,1.147199,1.344113,2.793395,2.839210,2.398714,1.0,1.0,1.0,1.0,1.0
1,0.611936,0.566773,0.736684,1.068389,0.852421,0.852430,0.850786,0.859721,0.898306,0.871076,...,1.149276,1.349144,2.823846,2.870534,2.421784,1.0,1.0,1.0,1.0,1.0
2,0.593486,0.546581,0.723555,1.072126,0.844788,0.844798,0.843072,0.852451,0.892977,0.864372,...,1.157578,1.369302,2.946933,2.997169,2.514891,1.0,1.0,1.0,1.0,1.0
3,0.602925,0.556906,0.730281,1.070207,0.848702,0.848712,0.847028,0.856179,0.895710,0.867810,...,1.153313,1.358938,2.883436,2.931837,2.466888,1.0,1.0,1.0,1.0,1.0
4,0.578476,0.530190,0.712822,1.075209,0.838525,0.838536,0.836744,0.846484,0.888600,0.858869,...,1.164436,1.386010,3.050223,3.103461,2.592848,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37166,0.580881,0.532814,0.714545,1.074713,0.839532,0.839542,0.837761,0.847443,0.889304,0.859754,...,1.163331,1.383315,3.033482,3.086232,2.580224,1.0,1.0,1.0,1.0,1.0
37167,0.583682,0.535871,0.716550,1.074136,0.840703,0.840713,0.838944,0.848559,0.890122,0.860783,...,1.162047,1.380184,3.014074,3.066258,2.565582,1.0,1.0,1.0,1.0,1.0
37168,0.617417,0.572780,0.740570,1.067289,0.854675,0.854684,0.853064,0.861868,0.899878,0.873055,...,1.146836,1.343233,2.788078,2.833742,2.394684,1.0,1.0,1.0,1.0,1.0
37169,0.595466,0.548746,0.724968,1.071723,0.845611,0.845621,0.843904,0.853234,0.893552,0.865095,...,1.156680,1.367119,2.933519,2.983367,2.504755,1.0,1.0,1.0,1.0,1.0


In [596]:
rt_diff_df = rt_local_northamerica_df.div(rt_local_intro_northamerica_df.values)

In [597]:
rt_df_summary = generate_rt_summary_df(rt_diff_df)

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)


In [598]:
rt_df_summary['days'] = rt_df_summary.interval.astype(int) *7
rt_df_summary['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - rt_df_summary.days.map(timedelta)
rt_df_summary = rt_df_summary[rt_df_summary.date >"2022-06-17"]
rt_df_summary.date = rt_df_summary.date.astype(str)

In [599]:
line = alt.Chart(rt_df_summary, title = "North America" ).mark_area( interpolate='monotone', opacity = 1 ,color = "#f58518").encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False,format="%B %Y")),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title="Percent of Rt due to Introductions ", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_50' )
).properties(
    width=850,
    height=300
).transform_filter(
   (datum.lower_hpd_log_50 >0) #& (datum.upper_hpd_log_50 < 4)
)
band = alt.Chart(rt_df_summary).mark_area(
    opacity=0.3, interpolate='monotone', color = "#f58518"
).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('lower_hpd_log_95', axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_95')
).properties(
    width=850,
    height=300
).transform_filter(
    (datum.lower_hpd_log_95 >0) #& (datum.upper_hpd_log_95 < 4)
)

rt_diff_plot = line + band +one_line
(vac_plot + (rt_diff_plot)).resolve_scale(y="independent")