In [1]:
import baltic as bt
import pandas as pd
import numpy as np
import matplotlib as mpl
from matplotlib import pyplot as plt
from datetime import datetime as dt
from datetime import timedelta
import time
#import pymc3
import math
import arviz as az
import re
#from hpd import hpd
import scipy.stats as stats
from io import StringIO
import altair as alt
from altair import datum
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')


### Load Log files

In [2]:
from datetime import date
current_date = str(date.today())

In [3]:
log_file_path = "../../../mpox_rhino/300_region_glm_air_travel.log"


In [4]:
domain = ['CentralEurope', 'NorthAmerica', 'SouthAmerica' ,"SouthernEurope", "WesternEurope"]
range_ = ['#EEC060', '#2664A5', '#A76BB1', "#EEA160", "#356D4C"]

## we can then estimate the transmission rates via the change in Ne's over time plus the uninfectious rate

In [5]:
def read_in_Ne_changes_mascot(log_file_path):
    
    Ne_skyline_dict = {"sample":[]}
    
    with open(log_file_path, "r") as infile:
        line_number = 0
        for line in infile:
            line_number += 1
            if not line.startswith("#"):  # log combiner will sometimes put the entire xml at the start of the log file
                # use the first line to find the migration rate columns
            
            # use the first line to find the migration rate columns
                if "posterior" in line:
                    all_cols = line.split("\t")
                    Ne_column_indices = []   # list to store column indices
                    Nes_key = {}   # dictionary to store the column index to map to column name

                    for i in range(len(all_cols)):
                        col = all_cols[i]
                        if "Ne." in col:
                            Ne_column_indices.append(i)

                    # make an empty dictionary to store Nes and generate dictionary to convert index to name
                    for n in Ne_column_indices:
                        name = line.split("\t")[n]
                        deme = name.split(".")[1]# the syntax here is "NeLog.state01" where 0 is deme and 1 is interval 1
                        interval = name.split(".")[2]
                       
                        Nes_key[n] = name
                        Ne_skyline_dict[name] = []


                # read in actual parameter estimates and store in dictionary
                else:
                    sample = line.split("\t")[0]
                    Ne_skyline_dict["sample"].append(sample)

                    for index in Ne_column_indices:
                        name = Nes_key[index]
                        Ne_skyline_dict[name].append(line.split("\t")[index])
                    
                
    return(Ne_skyline_dict)

In [6]:
# make a new dataframe that summarizes the 95% HPD estimate with mean for each deme and interval 
def generate_summary_df(input_df):
    
    
    new_df = pd.DataFrame()
    
    for i in input_df.columns.tolist():
        if "Ne" in i:
            deme = i.split(".")[1]
            #print(deme)
            interval = i.split(".")[2]
            #print(interval)
            #print(i)
            next_interval = int(interval)+7
            local_series = input_df[i].astype('float').to_numpy()
            #print(local_series)
            mean_log = local_series.mean()
            mean_linear = 10**mean_log
            hpd_95 = az.hdi(local_series, 0.95)
            lower_hpd_log_95 = hpd_95[0]
            lower_hpd_linear_95 = 10**lower_hpd_log_95
            upper_hpd_log_95 = hpd_95[1]
            upper_hpd_linear_95 = 10**upper_hpd_log_95
            hpd_50 = az.hdi(local_series, 0.50)
            lower_hpd_log_50 = hpd_50[0]
            lower_hpd_linear_50 = 10**lower_hpd_log_50
            upper_hpd_log_50 = hpd_50[1]
            upper_hpd_linear_50 = 10**upper_hpd_log_50
            
            try:
                next_local_series = input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float').to_numpy()
                diff_series = np.subtract(local_series, next_local_series)
                #print(local_series)
                #print(next_local_series)
                #print(diff_series)
                diff_mean_log = diff_series.mean()
                diff_hpd_95 = az.hdi(diff_series, 0.95)
                diff_lower_hpd_log_95 = diff_hpd_95[0]
                diff_lower_hpd_linear_95 = math.exp(diff_lower_hpd_log_95)
                diff_upper_hpd_log_95 = diff_hpd_95[1]
                diff_upper_hpd_linear_95 = math.exp(diff_upper_hpd_log_95)
                diff_hpd_50 = az.hdi(diff_series, 0.50)
                diff_lower_hpd_log_50 = diff_hpd_50[0]
                diff_lower_hpd_linear_50 = math.exp(diff_lower_hpd_log_50)
                diff_upper_hpd_log_50 = diff_hpd_50[1]
                diff_upper_hpd_linear_50 = math.exp(diff_upper_hpd_log_50)
            except KeyError:
                pass   
            
            try:
                local_df = pd.DataFrame.from_dict({"deme":deme, "interval":interval, "mean_Ne_log":mean_log,"mean_Ne_linear":mean_linear, 
                                                   "upper_hpd_log_95":upper_hpd_log_95,"lower_hpd_log_95":[lower_hpd_log_95], 
                                                   "upper_hpd_log_50":upper_hpd_log_50,"lower_hpd_log_50":lower_hpd_log_50,
                                                   "upper_hpd_linear":upper_hpd_linear_95,"lower_hpd_linear":lower_hpd_linear_95,
                                                   "diff_mean_Ne_log":diff_mean_log, 
                                                   "diff_upper_hpd_log_95":diff_upper_hpd_log_95,"diff_lower_hpd_log_95":diff_lower_hpd_log_95, 
                                                   "diff_upper_hpd_log_50":diff_upper_hpd_log_50,"diff_lower_hpd_log_50":diff_lower_hpd_log_50,
                                                   "diff_upper_hpd_linear":diff_upper_hpd_linear_95,"diff_lower_hpd_linear":diff_lower_hpd_linear_95,
                                                   "diff_upper_hpd_linear_50":diff_upper_hpd_linear_50,"diff_lower_hpd_linear_50":diff_lower_hpd_linear_50})
                new_df = new_df.append(local_df)
                #print(new_df)
            except:
                pass
            
    return(new_df)

In [7]:
Ne_skyline = read_in_Ne_changes_mascot(log_file_path)

In [8]:
Ne_df = pd.DataFrame.from_dict(Ne_skyline)
print(len(Ne_df))
burnin_percent = 0.3

rows_to_remove = int(len(Ne_df)* burnin_percent)
Ne_df = Ne_df.iloc[rows_to_remove:]

print(len(Ne_df))
Ne_df = Ne_df.reset_index()
Ne_df

20828
14580


Unnamed: 0,index,sample,Ne.CentralEurope.0,Ne.CentralEurope.1,Ne.CentralEurope.2,Ne.CentralEurope.3,Ne.CentralEurope.4,Ne.CentralEurope.5,Ne.CentralEurope.6,Ne.CentralEurope.7,...,Ne.WesternEurope.29,Ne.WesternEurope.30,Ne.WesternEurope.31,Ne.WesternEurope.32,Ne.WesternEurope.33,Ne.WesternEurope.34,Ne.WesternEurope.35,Ne.WesternEurope.36,Ne.WesternEurope.37,Ne.WesternEurope.38
0,6248,6248000,0.17149726409873828,0.17149726409873828,0.17149726409873828,0.17149726409873828,0.17149726409873828,0.17149726409873828,0.17149726409873828,0.17149726409873828,...,2.3352146661015833,2.0795960530001922,1.609146071371652,0.8536512304576634,0.3832229091020474,0.3832229091020474,0.3832229091020474,0.3832229091020474,0.3832229091020474,0.3832229091020474
1,6249,6249000,0.182829848243187,0.182829848243187,0.182829848243187,0.182829848243187,0.182829848243187,0.182829848243187,0.182829848243187,0.182829848243187,...,2.1486863837083443,1.9252579211324738,1.5100741823680641,0.8284172909955214,0.38799172193968995,0.38799172193968995,0.38799172193968995,0.38799172193968995,0.38799172193968995,0.38799172193968995
2,6250,6250000,0.22529918459959855,0.22529918459959855,0.22529918459959855,0.22529918459959855,0.22529918459959855,0.22529918459959855,0.22529918459959855,0.22529918459959855,...,2.385595277038128,2.1480539490134616,1.7032261071256545,0.9598081039070678,0.4650400462162867,0.4650400462162867,0.4650400462162867,0.4650400462162867,0.4650400462162867,0.4650400462162867
3,6251,6251000,0.19197809845408687,0.19197809845408687,0.19197809845408687,0.19197809845408687,0.19197809845408687,0.19197809845408687,0.19197809845408687,0.19197809845408687,...,2.113743744044876,1.9008721532526358,1.5030313616160917,0.8411711919078873,0.4040224824117912,0.4040224824117912,0.4040224824117912,0.4040224824117912,0.4040224824117912,0.4040224824117912
4,6252,6252000,0.21339003819015032,0.21339003819015032,0.21339003819015032,0.21339003819015032,0.21339003819015032,0.21339003819015032,0.21339003819015032,0.21339003819015032,...,2.3903090357227517,2.147946151989791,1.6955310002716129,0.9449542735523817,0.451485262462401,0.451485262462401,0.451485262462401,0.451485262462401,0.451485262462401,0.451485262462401
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14575,20823,20823000,0.21806142959385705,0.21806142959385705,0.21806142959385705,0.21806142959385705,0.21806142959385705,0.21806142959385705,0.21806142959385705,0.21806142959385705,...,2.121320183994583,1.9196552657778365,1.5390329621063694,0.8912889210630168,0.446998721914444,0.446998721914444,0.446998721914444,0.446998721914444,0.446998721914444,0.446998721914444
14576,20824,20824000,0.1914716028081317,0.1914716028081317,0.1914716028081317,0.1914716028081317,0.1914716028081317,0.1914716028081317,0.1914716028081317,0.1914716028081317,...,2.7358045254699617,2.4339304272888986,1.879210379584147,0.9915487940201955,0.4421002748686486,0.4421002748686486,0.4421002748686486,0.4421002748686486,0.4421002748686486,0.4421002748686486
14577,20825,20825000,0.21611199243625676,0.21611199243625676,0.21611199243625676,0.21611199243625676,0.21611199243625676,0.21611199243625676,0.21611199243625676,0.21611199243625676,...,2.3585363824000813,2.1207848886927185,1.676520275251349,0.9377146550023782,0.4500599533281953,0.4500599533281953,0.4500599533281953,0.4500599533281953,0.4500599533281953,0.4500599533281953
14578,20826,20826000,0.20191934487715088,0.20191934487715088,0.20191934487715088,0.20191934487715088,0.20191934487715088,0.20191934487715088,0.20191934487715088,0.20191934487715088,...,2.9454283756823196,2.618310655443354,2.0179632623020813,1.0600733630070596,0.4700260647970965,0.4700260647970965,0.4700260647970965,0.4700260647970965,0.4700260647970965,0.4700260647970965


## calculating transmission rate

In [364]:
def generate_summary_diff_df(input_df):
    
    
    new_df = pd.DataFrame()
   
    for i in input_df.columns.tolist():
        if "Ne" in i:
            deme = i.split(".")[1]
            interval = i.split(".")[2]
            next_interval = int(interval)+2 #averaging over three weeks to reduce noise
            local_series = input_df[i].astype('float').to_numpy()
           
            try:
                new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
            
            
            except KeyError:
                pass 
            
            
    return(new_df)

In [365]:
ne_diff_summary = generate_summary_diff_df(Ne_df)

  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(in

  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(in

  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(in

  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(input_df[i].astype("float")) - np.log(input_df["Ne"+"."+ str(deme) +"." + str(next_interval)].astype('float')))
  new_df["Ne"+"."+ str(deme) +".diff." + str(interval)] = (52/2)*(np.log(in

In [366]:
ne_diff_summary

Unnamed: 0,Ne.CentralEurope.diff.0,Ne.CentralEurope.diff.1,Ne.CentralEurope.diff.2,Ne.CentralEurope.diff.3,Ne.CentralEurope.diff.4,Ne.CentralEurope.diff.5,Ne.CentralEurope.diff.6,Ne.CentralEurope.diff.7,Ne.CentralEurope.diff.8,Ne.CentralEurope.diff.9,...,Ne.WesternEurope.diff.27,Ne.WesternEurope.diff.28,Ne.WesternEurope.diff.29,Ne.WesternEurope.diff.30,Ne.WesternEurope.diff.31,Ne.WesternEurope.diff.32,Ne.WesternEurope.diff.33,Ne.WesternEurope.diff.34,Ne.WesternEurope.diff.35,Ne.WesternEurope.diff.36
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,9.664121,8.068780,9.682405,23.150562,37.305895,20.823553,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,9.152829,7.641892,9.170145,21.925754,35.332181,19.721858,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,8.743496,7.300131,8.760038,20.945189,33.752054,18.839856,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,8.848653,7.387929,8.865394,21.197095,34.157987,19.066441,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,8.912233,7.441013,8.929094,21.349402,34.403421,19.203439,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14575,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,8.327239,6.952589,8.342993,19.948039,32.145199,17.942936,0.0,0.0,0.0,0.0
14576,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,9.746481,8.137545,9.764921,23.347858,37.623826,21.001017,0.0,0.0,0.0,0.0
14577,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,8.857585,7.395386,8.874343,21.218492,34.192466,19.085687,0.0,0.0,0.0,0.0
14578,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,9.813738,8.193699,9.832304,23.508971,37.883452,21.145936,0.0,0.0,0.0,0.0


In [367]:
uninfectious_rate = 365/4.5

#taken from https://www.medrxiv.org/content/10.1101/2022.08.17.22278897v1.full.pdf


In [368]:
incubation_period = 365/8

In [369]:
seir_growth_rate = ((ne_diff_summary*2 + uninfectious_rate + incubation_period)**2 - (incubation_period- uninfectious_rate)**2)/(4*incubation_period)



seir_growth_rate



Unnamed: 0,Ne.CentralEurope.diff.0,Ne.CentralEurope.diff.1,Ne.CentralEurope.diff.2,Ne.CentralEurope.diff.3,Ne.CentralEurope.diff.4,Ne.CentralEurope.diff.5,Ne.CentralEurope.diff.6,Ne.CentralEurope.diff.7,Ne.CentralEurope.diff.8,Ne.CentralEurope.diff.9,...,Ne.WesternEurope.diff.27,Ne.WesternEurope.diff.28,Ne.WesternEurope.diff.29,Ne.WesternEurope.diff.30,Ne.WesternEurope.diff.31,Ne.WesternEurope.diff.32,Ne.WesternEurope.diff.33,Ne.WesternEurope.diff.34,Ne.WesternEurope.diff.35,Ne.WesternEurope.diff.36
0,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,110.002911,104.951354,110.061451,157.165045,215.242262,148.458322,81.111111,81.111111,81.111111,81.111111
1,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,108.371785,103.618556,108.426840,152.552721,206.617441,144.419018,81.111111,81.111111,81.111111,81.111111
2,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,107.074188,102.557294,107.126483,148.907555,199.835609,141.223555,81.111111,81.111111,81.111111,81.111111
3,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,107.406838,102.829443,107.459840,149.839971,201.567407,142.041212,81.111111,81.111111,81.111111,81.111111
4,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,107.608200,102.994152,107.661629,150.405076,202.617987,142.536673,81.111111,81.111111,81.111111,81.111111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14575,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,105.762175,101.483332,105.811693,145.243957,193.051306,138.009014,81.111111,81.111111,81.111111,81.111111
14576,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,110.266729,105.166792,110.325835,157.914161,216.647543,149.113960,81.111111,81.111111,81.111111,81.111111
14577,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,107.435115,102.852574,107.488177,149.919297,201.714836,142.110766,81.111111,81.111111,81.111111,81.111111
14578,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,110.482386,105.342874,110.541954,158.527162,217.798395,149.650385,81.111111,81.111111,81.111111,81.111111


In [253]:
ne_diff_summary += uninfectious_rate

In [248]:
ne_diff_summary

Unnamed: 0,Ne.CentralEurope.diff.0,Ne.CentralEurope.diff.1,Ne.CentralEurope.diff.2,Ne.CentralEurope.diff.3,Ne.CentralEurope.diff.4,Ne.CentralEurope.diff.5,Ne.CentralEurope.diff.6,Ne.CentralEurope.diff.7,Ne.CentralEurope.diff.8,Ne.CentralEurope.diff.9,...,Ne.WesternEurope.diff.27,Ne.WesternEurope.diff.28,Ne.WesternEurope.diff.29,Ne.WesternEurope.diff.30,Ne.WesternEurope.diff.31,Ne.WesternEurope.diff.32,Ne.WesternEurope.diff.33,Ne.WesternEurope.diff.34,Ne.WesternEurope.diff.35,Ne.WesternEurope.diff.36
0,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,90.775232,89.179892,90.793516,104.261673,118.417006,101.934664,81.111111,81.111111,81.111111,81.111111
1,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,90.263940,88.753003,90.281256,103.036865,116.443292,100.832969,81.111111,81.111111,81.111111,81.111111
2,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,89.854607,88.411242,89.871149,102.056300,114.863165,99.950967,81.111111,81.111111,81.111111,81.111111
3,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,89.959764,88.499040,89.976505,102.308206,115.269098,100.177553,81.111111,81.111111,81.111111,81.111111
4,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,90.023344,88.552124,90.040205,102.460513,115.514532,100.314550,81.111111,81.111111,81.111111,81.111111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14575,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,89.438350,88.063700,89.454104,101.059150,113.256310,99.054047,81.111111,81.111111,81.111111,81.111111
14576,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,90.857593,89.248656,90.876032,104.458969,118.734937,102.112128,81.111111,81.111111,81.111111,81.111111
14577,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,89.968696,88.506497,89.985454,102.329603,115.303577,100.196798,81.111111,81.111111,81.111111,81.111111
14578,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,81.111111,...,90.924849,89.304810,90.943415,104.620082,118.994563,102.257047,81.111111,81.111111,81.111111,81.111111


In [311]:
seir_growth_rate.filter(regex='Ne.NorthAmerica.diff.2')

Unnamed: 0,Ne.NorthAmerica.diff.2,Ne.NorthAmerica.diff.20,Ne.NorthAmerica.diff.21,Ne.NorthAmerica.diff.22,Ne.NorthAmerica.diff.23,Ne.NorthAmerica.diff.24,Ne.NorthAmerica.diff.25,Ne.NorthAmerica.diff.26,Ne.NorthAmerica.diff.27,Ne.NorthAmerica.diff.28,Ne.NorthAmerica.diff.29
0,60.356054,97.761240,107.505490,110.521475,111.876710,106.210738,102.994224,92.841187,108.515710,223.519554,227.148355
1,61.384304,96.844229,106.022466,108.859451,110.133701,104.804027,101.775663,92.202215,106.972920,214.300735,217.667657
2,62.213129,96.112991,104.842151,107.537396,108.747558,103.684177,100.804997,91.692142,105.745266,207.055923,210.218817
3,61.999727,96.300599,105.144781,107.876306,109.102868,103.971327,101.053945,91.823054,106.060014,208.905594,212.120425
4,61.870861,96.414112,105.327955,108.081460,109.317960,104.145123,101.204602,91.902247,106.250529,210.027800,213.274193
...,...,...,...,...,...,...,...,...,...,...,...
14575,63.061100,95.372037,103.648231,106.200765,107.346437,102.551172,99.822378,91.174792,104.503671,199.812272,202.772820
14576,60.191150,97.909332,107.745284,110.790307,112.158684,106.438158,103.191149,92.944307,108.765197,225.022149,228.693863
14577,61.981616,96.316542,105.170505,107.905115,109.133073,103.995734,101.075103,91.834177,106.086769,209.063069,212.282326
14578,60.056638,98.030343,107.941289,111.010065,112.389195,106.624041,103.352090,93.028555,108.969131,226.252804,229.959713


In [295]:
for i in range(190,252, 1):

    seir_growth_rate = seir_growth_rate.drop(columns="Ne.NorthAmerica.diff."+str(i))

KeyError: "['Ne.NorthAmerica.diff.190'] not found in axis"

In [260]:
# make a new dataframe that summarizes the 95% HPD estimate with mean for each deme and interval 
def generate_summary_df(input_df):
    
    
    new_df = pd.DataFrame()

    for i in input_df.columns.tolist():
        if "Ne" in i:
            deme = i.split(".")[1]
            interval = i.split(".")[3]
            local_series = input_df[i].astype('float').to_numpy()
            mean_percent = local_series.mean()
            hpd_95 = az.hdi(local_series, 0.95)
            lower_hpd_log_95 = hpd_95[0]
            upper_hpd_log_95 = hpd_95[1]
            hpd_50 = az.hdi(local_series, 0.50)
            lower_hpd_log_50 = hpd_50[0]
            upper_hpd_log_50 = hpd_50[1]
            

            
            
            try:
                local_df = pd.DataFrame.from_dict({"deme": deme, "interval":interval, "mean_percent":mean_percent, 
                                                   "upper_hpd_log_95":upper_hpd_log_95,"lower_hpd_log_95":[lower_hpd_log_95], 
                                                   "upper_hpd_log_50":upper_hpd_log_50,"lower_hpd_log_50":lower_hpd_log_50})
                new_df = new_df.append(local_df)
            except:
                pass
            
    return(new_df)

In [300]:
#just checking to make sure the changes in Ne look okay
test_north = generate_summary_df(seir_growth_rate)

In [301]:
test_north

In [298]:
test_north['days'] = test_north.interval.astype(int) *7
test_north['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - test_north.days.map(timedelta)
test_north.date = test_north.date.astype(str)

AttributeError: 'DataFrame' object has no attribute 'interval'

In [299]:
line = alt.Chart(test_north).mark_area().encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title="transmission rate"), scale = alt.Scale(zero= False)),
    alt.Y2('upper_hpd_log_50'),
    color=alt.Color('deme:N')
).properties(
    width=850,
    height=300
)

band = alt.Chart(test_north).mark_area(
    opacity=0.3
).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('lower_hpd_log_95'),
    alt.Y2('upper_hpd_log_95'),
    color=alt.Color('deme:N')
).properties(
    width=850,
    height=300
)

band + line

ValueError: lower_hpd_log_95 encoding field is specified without a type; the type cannot be inferred because it does not match any column in the data.

alt.LayerChart(...)

## calculating backward migration rates

In [20]:
def read_in_forward_migration_rates_mascot(log_file_path):
    
    mig_rates_dict = {"sample":[]}
    
    with open(log_file_path, "r") as infile:
        line_number = 0
        for line in infile:
            #print(line_number)
            line_number += 1
            if not line.startswith("#"):  # log combiner will sometimes put the entire xml at the start of the log file
                # use the first line to find the migration rate columns
                
            # use the first line to find the migration rate columns
                if "posterior" in line:
                    all_cols = line.split("\t")
                    mig_column_indices = []   # list to store column indices
                    mig_key = {}   # dictionary to store the column index to map to column name

                    for i in range(len(all_cols)):
                        col = all_cols[i]
                        if "mig." in col:
                            mig_column_indices.append(i)

                    # make an empty dictionary to store Nes and generate dictionary to convert index to name
                    for n in mig_column_indices:
                        name = line.split("\t")[n]
                        deme = name.split(".")[1]# the syntax here is "NeLog.state01" where 0 is deme and 1 is interval 1
                        #interval = name.split(".")[2]
                       
                        mig_key[n] = name
                        mig_rates_dict[name] = []


                # read in actual parameter estimates and store in dictionary
                else:
                    sample = line.split("\t")[0]
                    mig_rates_dict["sample"].append(sample)

                    for index in mig_column_indices:
                        name = mig_key[index]
                        mig_rates_dict[name].append(line.split("\t")[index])
                    
                
                
                
    return(mig_rates_dict)

In [21]:
migration_rates_f = read_in_forward_migration_rates_mascot(log_file_path)

In [22]:
mig_df_f = pd.DataFrame.from_dict(migration_rates_f)


In [23]:
burnin_percent = 0.3
print(len(mig_df_f))
rows_to_remove = int(len(mig_df_f)* burnin_percent)
mig_df_f = mig_df_f.iloc[rows_to_remove:]

print(len(mig_df_f))
mig_df_f = mig_df_f.reset_index()
mig_df_f.head()

20828
14580


Unnamed: 0,index,sample,mig.CentralEurope_to_NorthAmerica,mig.CentralEurope_to_SouthAmerica,mig.CentralEurope_to_SouthernEurope,mig.CentralEurope_to_WesternEurope,mig.NorthAmerica_to_CentralEurope,mig.NorthAmerica_to_SouthAmerica,mig.NorthAmerica_to_SouthernEurope,mig.NorthAmerica_to_WesternEurope,...,mig.SouthAmerica_to_SouthernEurope,mig.SouthAmerica_to_WesternEurope,mig.SouthernEurope_to_CentralEurope,mig.SouthernEurope_to_NorthAmerica,mig.SouthernEurope_to_SouthAmerica,mig.SouthernEurope_to_WesternEurope,mig.WesternEurope_to_CentralEurope,mig.WesternEurope_to_NorthAmerica,mig.WesternEurope_to_SouthAmerica,mig.WesternEurope_to_SouthernEurope
0,6248,6248000,0.0230191151773491,0.0038908902726254,0.0818596895333981,0.1929343217849323,0.026041811849736,0.6505462035976334,0.7094069213364713,1.537803687206762,...,0.3562941099881785,0.1868339665562161,0.0833445534664238,0.6981622117124185,0.3239856922242947,2.855517779027983,0.1993010393648946,1.4977251438014925,0.1937044849993873,2.949840466336304
1,6249,6249000,0.0304117905843345,0.0064377386089075,0.091759605251035,0.193456767456294,0.0338516478607259,0.5598844873929666,0.603163895053174,1.1884396693393389,...,0.3321107429348099,0.1888782746796169,0.0921421718586459,0.5918095546150538,0.3079431327584309,2.0603389124348084,0.1994262193113729,1.1605074046662596,0.1942784546133692,2.094884919655771
2,6250,6250000,0.0449341537688709,0.0118499618570454,0.1837113422019167,0.396451615126732,0.0492467929927087,0.8333398059828189,0.651590120700336,1.1686012600584426,...,0.4132651283980588,0.248017625217344,0.1872554770943246,0.6413433907374555,0.3862007368394263,3.42678600071646,0.4080184274382105,1.1517777788200674,0.251251463958338,3.520260607501221
3,6251,6251000,0.1119905867083576,0.0455610922566255,0.3336166251539106,0.5808299968069213,0.1209591556148983,0.927550961322504,0.716906455356746,1.0459409268136557,...,0.5260407407273846,0.3691336000192193,0.3411515202047898,0.6891393106745948,0.501105530002333,2.593821483642381,0.5953899214082818,1.0269438659036576,0.3801675118590034,2.6535625427524927
4,6252,6252000,0.1088506320183881,0.0418563560020543,0.2075269359517726,0.3268018354719497,0.1146721298319434,0.6355383578148428,0.6721440507581152,0.9922744020896668,...,0.4602564047166944,0.3291760316885648,0.2115588015932193,0.6461106387764076,0.4257528540521499,1.3734684828819974,0.3305308622470763,0.9742520675842612,0.3390155567635849,1.4051022951312917


In [24]:
def calc_backwards_mig_rates(mig_df_f, targetName):    
    mig_rates = {}
    target = targetName
    target_columns = []
    ne_interval= 0
    mig_list = []

    for name in mig_df_f.columns:
        if target in name.split("_")[0]:
            target_columns.append(name)

    for name in Ne_df.columns:
        if target in name:
            ne_interval+=1

    for interval in range(0, ne_interval,1):
        mig_rates[str(interval)+ "." + "bmr" + "." +  str(target)] = []
        for index_1, row_1 in mig_df_f.iterrows():
            mig_list = []
            for direction in target_columns:
                other_loc = direction.split("_")[2]
                mig_rate_b = pd.to_numeric(row_1[direction]) * ((pd.to_numeric(Ne_df.loc[index_1,"Ne."+ str(target) + "." + str(interval)]))/(pd.to_numeric(Ne_df.loc[index_1, "Ne."+ str(other_loc) + "." + str(interval)])))
                #print(mig_rate_b)
                mig_list.append(mig_rate_b)
            combined_mig = np.sum(np.array(mig_list))
            #print(combined_mig)
            mig_rates[str(interval)+ "." + "bmr" + "." + str(target)].append(combined_mig)

    return(mig_rates) 


In [25]:
unique_column= []
unique_target = []
for name in mig_df_f.columns:
    try:

        if name.split("_")[0].split(".")[1] not in unique_target:
            unique_column.append(name)
            unique_target.append(name.split("_")[0].split(".")[1])
    except:
        pass



In [26]:
mig_rates = []
for target in unique_target:
    mig_rates.append(calc_backwards_mig_rates(mig_df_f, target))
    


In [27]:
mr_b_df = pd.DataFrame()
for x in mig_rates:
    x_df = pd.DataFrame(x)
    mr_b_df = pd.concat([mr_b_df, x_df], axis=1)


In [28]:
mr_b_df

Unnamed: 0,0.bmr.CentralEurope,1.bmr.CentralEurope,2.bmr.CentralEurope,3.bmr.CentralEurope,4.bmr.CentralEurope,5.bmr.CentralEurope,6.bmr.CentralEurope,7.bmr.CentralEurope,8.bmr.CentralEurope,9.bmr.CentralEurope,...,29.bmr.WesternEurope,30.bmr.WesternEurope,31.bmr.WesternEurope,32.bmr.WesternEurope,33.bmr.WesternEurope,34.bmr.WesternEurope,35.bmr.WesternEurope,36.bmr.WesternEurope,37.bmr.WesternEurope,38.bmr.WesternEurope
0,0.077704,0.096926,0.077126,0.062269,0.058754,0.053860,0.048052,0.059851,0.049718,0.035407,...,8.893854,9.291355,19.332997,11.764177,10.756357,10.756357,10.756357,10.756357,10.756357,10.756357
1,0.087305,0.106573,0.087355,0.071892,0.068129,0.062526,0.055609,0.068216,0.057793,0.042092,...,7.002122,7.302110,13.915708,8.779545,7.762209,7.762209,7.762209,7.762209,7.762209,7.762209
2,0.183641,0.221826,0.183597,0.152363,0.144482,0.133273,0.119420,0.145566,0.123852,0.091236,...,9.589274,9.842670,15.301515,10.588967,10.835680,10.835680,10.835680,10.835680,10.835680,10.835680
3,0.286555,0.341642,0.290391,0.244908,0.232319,0.212414,0.186903,0.226497,0.196069,0.145266,...,10.839233,11.155893,15.735232,11.026038,9.766201,9.766201,9.766201,9.766201,9.766201,9.766201
4,0.172592,0.205461,0.175699,0.147714,0.140959,0.128475,0.112408,0.134740,0.117757,0.087974,...,7.991339,8.231283,13.012291,8.477935,6.437144,6.437144,6.437144,6.437144,6.437144,6.437144
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14575,0.171632,0.202298,0.173905,0.147170,0.140665,0.129385,0.114707,0.136777,0.119689,0.090606,...,7.996437,8.306460,13.526212,9.090962,7.542244,7.542244,7.542244,7.542244,7.542244,7.542244
14576,0.130661,0.159094,0.132885,0.110420,0.104529,0.094401,0.081612,0.100028,0.086031,0.062291,...,8.061368,8.304087,13.857475,8.608885,6.259337,6.259337,6.259337,6.259337,6.259337,6.259337
14577,0.140376,0.167468,0.142427,0.119545,0.113944,0.104106,0.091490,0.110036,0.095707,0.071339,...,6.472446,6.700126,10.933837,7.123951,5.513246,5.513246,5.513246,5.513246,5.513246,5.513246
14578,0.118150,0.148468,0.116731,0.093721,0.088075,0.080820,0.072431,0.090949,0.074767,0.052697,...,9.301724,9.431029,14.866505,10.021809,11.739367,11.739367,11.739367,11.739367,11.739367,11.739367


In [133]:
# make a new dataframe that summarizes the 95% HPD estimate with mean for each deme and interval 
def generate_summary_df(input_df):
    
    count = 0
    new_df = pd.DataFrame()

    for i in input_df.columns.tolist():
        #if count %7 == 0:
        deme = i.split(".")[2]
        interval = i.split(".")[0]
        local_series = input_df[i].astype('float').to_numpy()
        mean_percent = local_series.mean()
        hpd_95 = az.hdi(local_series, 0.95)
        lower_hpd_log_95 = hpd_95[0]
        upper_hpd_log_95 = hpd_95[1]
        hpd_50 = az.hdi(local_series, 0.50)
        lower_hpd_log_50 = hpd_50[0]
        upper_hpd_log_50 = hpd_50[1]




        try:
            local_df = pd.DataFrame.from_dict({"deme": deme, "interval":interval, "mean_percent":mean_percent, 
                                               "upper_hpd_log_95":upper_hpd_log_95,"lower_hpd_log_95":[lower_hpd_log_95], 
                                               "upper_hpd_log_50":upper_hpd_log_50,"lower_hpd_log_50":lower_hpd_log_50})
            new_df = new_df.append(local_df)
        except:
            pass
        #count +=1  
    return(new_df)

In [134]:
south_mrb_df = generate_summary_df(mr_b_df)

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)


In [135]:
south_mrb_df[south_mrb_df.deme == "WesternEurope"]

Unnamed: 0,deme,interval,mean_percent,upper_hpd_log_95,lower_hpd_log_95,upper_hpd_log_50,lower_hpd_log_50
0,WesternEurope,0,3.20442,5.833484,1.329448,3.070766,1.747374
0,WesternEurope,1,2.172163,3.839924,0.959613,2.228767,1.367402
0,WesternEurope,2,4.070427,7.54693,1.601475,3.959076,2.246892
0,WesternEurope,3,6.352747,12.2695,2.350789,6.04178,3.264275
0,WesternEurope,4,6.252104,11.913322,2.402928,6.061645,3.365249
0,WesternEurope,5,5.702938,10.814965,2.281757,5.532739,3.112974
0,WesternEurope,6,4.468097,8.311535,1.844863,4.328904,2.488193
0,WesternEurope,7,3.9915,7.579665,1.616737,3.835151,2.149545
0,WesternEurope,8,5.631243,10.687818,2.176448,5.345731,2.958332
0,WesternEurope,9,6.962505,13.32983,2.872632,6.579644,3.65321


In [136]:
south_mrb_df['days'] = south_mrb_df.interval.astype(int)  *7
south_mrb_df['date'] = dt.strptime("2023-01-06",  "%Y-%m-%d") - south_mrb_df.days.map(timedelta)
south_mrb_df.date = south_mrb_df.date.astype(str)



In [137]:
south_mrb_plot = alt.Chart(south_mrb_df).mark_area(interpolate='monotone', opacity = 0.5, color = "orange").encode(
    alt.X('date:T',axis=alt.Axis(title=None, grid=False, format="%B %Y")),
    alt.Y('upper_hpd_log_50',axis=alt.Axis(title="introductions", grid=False)),
    alt.Y2('lower_hpd_log_50' ), 
    alt.Color("deme", legend = alt.Legend(title = "Phylo Introductions"),  scale=alt.Scale( range=range_))
).properties(
    width=800,
    height=600
)

median = alt.Chart(south_mrb_df).mark_line(interpolate = "monotone").encode(
    alt.X('date:T',axis=alt.Axis(title=None, grid=False, format="%B %Y")),
    alt.Y('mean_percent',axis=alt.Axis(title="", grid=False)), 
    alt.Color("deme")
).properties(
    width = 800,
    height = 600
)

In [138]:
 south_mrb_plot

In [139]:
eii_df = pd.read_csv("estimated_importation_intensity_region.csv")

In [140]:
eii_df

Unnamed: 0.1,Unnamed: 0,index,place,year-month,import_risk
0,0,North America.2022-04,North America,2022-04-01,0.0
1,1,Southern Europe.2022-04,Southern Europe,2022-04-01,0.0
2,2,Western Europe.2022-04,Western Europe,2022-04-01,0.0
3,3,Central Europe.2022-04,Central Europe,2022-04-01,0.0
4,4,South America.2022-04,South America,2022-04-01,0.0
5,5,North America.2022-05,North America,2022-05-01,0.081573
6,6,Southern Europe.2022-05,Southern Europe,2022-05-01,0.249455
7,7,Western Europe.2022-05,Western Europe,2022-05-01,0.415544
8,8,Central Europe.2022-05,Central Europe,2022-05-01,0.002354
9,9,South America.2022-05,South America,2022-05-01,0.005387


In [141]:
eii_plot = alt.Chart(eii_df).mark_line().encode(
    alt.X("year-month:T",axis=alt.Axis(title=None, grid=False, format="%B %Y")),
    alt.Y("import_risk"), 
    alt.Color("place:N",legend = alt.Legend(title = "EII"),  scale=alt.Scale(range=range_) )).properties(
    width = 800,
    height = 600
)
eii_plot

In [142]:
(eii_plot + south_mrb_plot).resolve_scale(y = "shared", x = "shared", color= "independent")

###  percent of new cases from intros


In [370]:
def generate_percent_intro_df(input_df):
    
    temp_df = pd.DataFrame()
    new_df = pd.DataFrame()
   
    for i in input_df.columns.tolist():
        
        interval = i.split(".")[0]
        deme = i.split(".")[2]

        try:
            temp_df["total."+ str(interval)] = seir_growth_rate["Ne." + str(deme) + ".diff." + str(interval)].astype("float") +  input_df[i].astype("float")

            new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0) 


        except KeyError:
            pass 
            
            
    return(new_df)

In [371]:
percent_df = generate_percent_intro_df(mr_b_df)

  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_df["intro.percent"+"." + str(deme) + "." + str(interval)] = input_df[i].astype("float").div(temp_df["total."+ str(interval)], axis = 0)
  new_

In [372]:
percent_df

Unnamed: 0,intro.percent.CentralEurope.0,intro.percent.CentralEurope.1,intro.percent.CentralEurope.2,intro.percent.CentralEurope.3,intro.percent.CentralEurope.4,intro.percent.CentralEurope.5,intro.percent.CentralEurope.6,intro.percent.CentralEurope.7,intro.percent.CentralEurope.8,intro.percent.CentralEurope.9,...,intro.percent.WesternEurope.27,intro.percent.WesternEurope.28,intro.percent.WesternEurope.29,intro.percent.WesternEurope.30,intro.percent.WesternEurope.31,intro.percent.WesternEurope.32,intro.percent.WesternEurope.33,intro.percent.WesternEurope.34,intro.percent.WesternEurope.35,intro.percent.WesternEurope.36
0,0.000957,0.001194,0.000950,0.000767,0.000724,0.000664,0.000592,0.000737,0.000613,0.000436,...,0.087948,0.093468,0.074766,0.055819,0.082417,0.073424,0.117086,0.117086,0.117086,0.117086
1,0.001075,0.001312,0.001076,0.000886,0.000839,0.000770,0.000685,0.000840,0.000712,0.000519,...,0.071102,0.075864,0.060662,0.045680,0.063100,0.057308,0.087340,0.087340,0.087340,0.087340
2,0.002259,0.002727,0.002258,0.001875,0.001778,0.001640,0.001470,0.001791,0.001525,0.001124,...,0.097665,0.101643,0.082159,0.062001,0.071124,0.069750,0.117847,0.117847,0.117847,0.117847
3,0.003520,0.004194,0.003567,0.003010,0.002856,0.002612,0.002299,0.002785,0.002411,0.001788,...,0.109988,0.116182,0.091626,0.069293,0.072412,0.072034,0.107466,0.107466,0.107466,0.107466
4,0.002123,0.002527,0.002161,0.001818,0.001735,0.001581,0.001384,0.001658,0.001450,0.001083,...,0.080193,0.087860,0.069098,0.051888,0.060345,0.056140,0.073527,0.073527,0.073527,0.073527
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14575,0.002112,0.002488,0.002139,0.001811,0.001731,0.001593,0.001412,0.001683,0.001473,0.001116,...,0.081605,0.087266,0.070262,0.054096,0.065478,0.061801,0.085076,0.085076,0.085076,0.085076
14576,0.001608,0.001958,0.001636,0.001359,0.001287,0.001162,0.001005,0.001232,0.001060,0.000767,...,0.080176,0.088778,0.068093,0.049959,0.060118,0.054582,0.071641,0.071641,0.071641,0.071641
14577,0.001728,0.002060,0.001753,0.001472,0.001403,0.001282,0.001127,0.001355,0.001179,0.000879,...,0.066324,0.072227,0.056795,0.042780,0.051417,0.047737,0.063645,0.063645,0.063645,0.063645
14578,0.001455,0.001827,0.001437,0.001154,0.001085,0.000995,0.000892,0.001120,0.000921,0.000649,...,0.094538,0.097748,0.077615,0.056151,0.063897,0.062765,0.126433,0.126433,0.126433,0.126433


In [373]:
# make a new dataframe that summarizes the 95% HPD estimate with mean for each deme and interval 
def generate_summary_df(input_df):
    
    
    new_df = pd.DataFrame()

    for i in input_df.columns.tolist():
        if "percent" in i:
            deme = i.split(".")[2]
            interval = i.split(".")[3]
            local_series = input_df[i].astype('float').to_numpy()
            mean_percent = local_series.mean()
            hpd_95 = az.hdi(local_series, 0.95)
            lower_hpd_log_95 = hpd_95[0]
            upper_hpd_log_95 = hpd_95[1]
            hpd_50 = az.hdi(local_series, 0.50)
            lower_hpd_log_50 = hpd_50[0]
            upper_hpd_log_50 = hpd_50[1]
            

            
            
            try:
                local_df = pd.DataFrame.from_dict({"deme":deme, "interval":interval, "mean_percent":mean_percent, 
                                                   "upper_hpd_log_95":upper_hpd_log_95,"lower_hpd_log_95":[lower_hpd_log_95], 
                                                   "upper_hpd_log_50":upper_hpd_log_50,"lower_hpd_log_50":lower_hpd_log_50})
                new_df = new_df.append(local_df)
            except:
                pass
            
    return(new_df)

In [374]:
final_north_df = generate_summary_df(percent_df)

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)


In [375]:
#making sure that any numbers >1 are excluded
percent_df =pd.DataFrame(np.where(percent_df <1, percent_df, 1), columns=percent_df.columns )
percent_df =pd.DataFrame(np.where(percent_df >0, percent_df, 0), columns=percent_df.columns )
percent_df

Unnamed: 0,intro.percent.CentralEurope.0,intro.percent.CentralEurope.1,intro.percent.CentralEurope.2,intro.percent.CentralEurope.3,intro.percent.CentralEurope.4,intro.percent.CentralEurope.5,intro.percent.CentralEurope.6,intro.percent.CentralEurope.7,intro.percent.CentralEurope.8,intro.percent.CentralEurope.9,...,intro.percent.WesternEurope.27,intro.percent.WesternEurope.28,intro.percent.WesternEurope.29,intro.percent.WesternEurope.30,intro.percent.WesternEurope.31,intro.percent.WesternEurope.32,intro.percent.WesternEurope.33,intro.percent.WesternEurope.34,intro.percent.WesternEurope.35,intro.percent.WesternEurope.36
0,0.000957,0.001194,0.000950,0.000767,0.000724,0.000664,0.000592,0.000737,0.000613,0.000436,...,0.087948,0.093468,0.074766,0.055819,0.082417,0.073424,0.117086,0.117086,0.117086,0.117086
1,0.001075,0.001312,0.001076,0.000886,0.000839,0.000770,0.000685,0.000840,0.000712,0.000519,...,0.071102,0.075864,0.060662,0.045680,0.063100,0.057308,0.087340,0.087340,0.087340,0.087340
2,0.002259,0.002727,0.002258,0.001875,0.001778,0.001640,0.001470,0.001791,0.001525,0.001124,...,0.097665,0.101643,0.082159,0.062001,0.071124,0.069750,0.117847,0.117847,0.117847,0.117847
3,0.003520,0.004194,0.003567,0.003010,0.002856,0.002612,0.002299,0.002785,0.002411,0.001788,...,0.109988,0.116182,0.091626,0.069293,0.072412,0.072034,0.107466,0.107466,0.107466,0.107466
4,0.002123,0.002527,0.002161,0.001818,0.001735,0.001581,0.001384,0.001658,0.001450,0.001083,...,0.080193,0.087860,0.069098,0.051888,0.060345,0.056140,0.073527,0.073527,0.073527,0.073527
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14575,0.002112,0.002488,0.002139,0.001811,0.001731,0.001593,0.001412,0.001683,0.001473,0.001116,...,0.081605,0.087266,0.070262,0.054096,0.065478,0.061801,0.085076,0.085076,0.085076,0.085076
14576,0.001608,0.001958,0.001636,0.001359,0.001287,0.001162,0.001005,0.001232,0.001060,0.000767,...,0.080176,0.088778,0.068093,0.049959,0.060118,0.054582,0.071641,0.071641,0.071641,0.071641
14577,0.001728,0.002060,0.001753,0.001472,0.001403,0.001282,0.001127,0.001355,0.001179,0.000879,...,0.066324,0.072227,0.056795,0.042780,0.051417,0.047737,0.063645,0.063645,0.063645,0.063645
14578,0.001455,0.001827,0.001437,0.001154,0.001085,0.000995,0.000892,0.001120,0.000921,0.000649,...,0.094538,0.097748,0.077615,0.056151,0.063897,0.062765,0.126433,0.126433,0.126433,0.126433


In [376]:
final_north_df['days'] = final_north_df.interval.astype(int) *7
final_north_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - final_north_df.days.map(timedelta)
#final_north_df = final_north_df[final_north_df.date >"2022-06-15"]
final_north_df.date = final_north_df.date.astype(str)

In [377]:
final_north_df[final_north_df.deme == "WesternEurope"]

Unnamed: 0,deme,interval,mean_percent,upper_hpd_log_95,lower_hpd_log_95,upper_hpd_log_50,lower_hpd_log_50,days,date
0,WesternEurope,0,0.040697,0.072069,0.017382,0.04109,0.024635,0,2023-01-03
0,WesternEurope,1,0.059405,0.100537,0.028634,0.061621,0.039603,7,2022-12-27
0,WesternEurope,2,0.070016,0.124288,0.03016,0.071306,0.042909,14,2022-12-20
0,WesternEurope,3,0.076559,0.140424,0.030676,0.075637,0.043016,21,2022-12-13
0,WesternEurope,4,0.072727,0.132539,0.030525,0.072824,0.042332,28,2022-12-06
0,WesternEurope,5,0.054123,0.098679,0.022434,0.05346,0.030977,35,2022-11-29
0,WesternEurope,6,0.054706,0.09836,0.024042,0.053386,0.031304,42,2022-11-22
0,WesternEurope,7,0.091595,0.164852,0.039296,0.089312,0.051455,49,2022-11-15
0,WesternEurope,8,0.101085,0.181414,0.042908,0.100679,0.058679,56,2022-11-08
0,WesternEurope,9,0.08318,0.15018,0.036251,0.080048,0.045993,63,2022-11-01


In [378]:
#final_north_df.to_csv("../data-files/north_percent_intro.csv")

In [379]:
line1 = alt.Chart(final_north_df).mark_area(interpolate='monotone', opacity = 0.9).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title="", grid=False)),
    alt.Y2('upper_hpd_log_50' ), 
    alt.Color("deme",  scale=alt.Scale( range=range_))
).properties(
    width=1000,
    height=300
).transform_filter(
    (datum.lower_hpd_log_50 >0) & (datum.upper_hpd_log_50 < 1)
)

band1 = alt.Chart(final_north_df).mark_area(
    opacity=0.3, interpolate='monotone'
).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('lower_hpd_log_95', axis=alt.Axis(title="", grid=False)),
    alt.Y2('upper_hpd_log_95'),
    alt.Color("deme")
).properties(
    width=1000,
    height=300
).transform_filter(
    (datum.lower_hpd_log_95 >0) & (datum.upper_hpd_log_95 < 1)
)

band1 + line1

## working on doing Rt calculations based on percent intro

In [322]:
one_line = alt.Chart(pd.DataFrame({'y': [1.0]})).mark_rule(strokeDash=[1,1]).encode(y='y').properties(
    width=850,
    height=300
)

In [323]:
# make a new dataframe that summarizes the 95% HPD estimate with mean for each deme and interval 
def generate_rt_summary_df(input_df):
    
    
    new_df = pd.DataFrame()
    count = 0
    for i in input_df.columns.tolist():
        #print(i)
        if "rt" in i:
            #if count %7 == 0:
            deme = i.split(".")[1]
            interval = i.split(".")[2]
            #print(deme, interval)
            local_series = input_df[i].astype('float').to_numpy()
            mean_percent = local_series.mean()
            hpd_95 = az.hdi(local_series, 0.95)
            lower_hpd_log_95 = hpd_95[0]
            upper_hpd_log_95 = hpd_95[1]
            hpd_50 = az.hdi(local_series, 0.50)
            lower_hpd_log_50 = hpd_50[0]
            upper_hpd_log_50 = hpd_50[1]




            try:
                local_df = pd.DataFrame.from_dict({"deme": deme, "interval":interval, "mean_percent":mean_percent, 
                                                   "upper_hpd_log_95":upper_hpd_log_95,"lower_hpd_log_95":[lower_hpd_log_95], 
                                                   "upper_hpd_log_50":upper_hpd_log_50,"lower_hpd_log_50":lower_hpd_log_50})
                new_df = new_df.append(local_df)
            except:
                pass
            #count+=1
            
    return(new_df)

In [324]:
def generate_local_rt(input_df):
    
    
    new_df = pd.DataFrame()
   
    for i in input_df.columns.tolist():
        if "Ne" in i:
            interval = i.split(".")[3]
            deme = i.split(".")[1]
            try:
                new_df["rt"+"."+ str(deme) + "." + str(interval)] = (input_df[i].astype("float") * (1- percent_df["intro.percent." +str(deme)+"."+str(interval)].astype("float")))/ uninfectious_rate
            except KeyError:
                pass

            
    return(new_df)

In [325]:
def generate_local_and_intro_rt(input_df):
    
    
    new_df = pd.DataFrame()
   
    for i in input_df.columns.tolist():
        if "Ne" in i:
            interval = i.split(".")[3]
            deme = i.split(".")[1]
            #print((input_df[i].astype("float") ))
            
            new_df["rt" + "." + str(deme)+ "." + str(interval)] = (input_df[i].astype("float") )/ uninfectious_rate
            

            
    return(new_df)

In [326]:
# def foo_generate_local_and_intro_rt(input_df, foo_ne_growth):
    
    
#     new_df = pd.DataFrame()
#     incubation_period = 8
#     #uninfectious_rate = 4.5
    
#     for i in input_df.columns.tolist():
#         if "Ne" in i:
#             interval = i.split(".")[3]
#             deme = i.split(".")[1]
#             #print((input_df[i].astype("float") ))
#             print( (foo_ne_growth[i].astype("float")) / 8)
#             print((input_df[i].astype("float")) / uninfectious_rate)
#             new_df["rt" + "." + str(deme)+ "." + str(interval)] = ((input_df[i].astype("float")) / uninfectious_rate) * ( (foo_ne_growth[i].astype("float")) / incubation_period)
            

            
#     return(new_df)

In [327]:
def foo_generate_local_and_intro_rt(input_df, foo_ne_growth):
    
    
    new_df = pd.DataFrame()
    #incubation_period = 8
    #uninfectious_rate = 4.5
    
    for i in input_df.columns.tolist():
        if "Ne" in i:
            interval = i.split(".")[3]
            deme = i.split(".")[1]
            #print((input_df[i].astype("float") ))
            print( (foo_ne_growth[i].astype("float")) / 8)
            print((input_df[i].astype("float")) / uninfectious_rate)
            new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
            

            
    return(new_df)

In [328]:
# def foo_generate_local_rt(input_df, foo_ne_growth):
    
    
#     new_df = pd.DataFrame()
#     incubation_period = 365/8

#     for i in input_df.columns.tolist():
#         if "Ne" in i:
#             interval = i.split(".")[3]
#             deme = i.split(".")[1]
#             try:
#                 new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((input_df[i].astype("float")) / uninfectious_rate) * ( (foo_ne_growth[i].astype("float")) / incubation_period) *  (1- percent_df["intro.percent." +str(deme)+"."+str(interval)].astype("float"))
                
#             except KeyError:
#                 pass

            
#     return(new_df)

In [329]:
def foo_generate_local_rt(input_df, foo_ne_growth):
    
    
    new_df = pd.DataFrame()
    incubation_period = 365/8

    for i in input_df.columns.tolist():
        if "Ne" in i:
            interval = i.split(".")[3]
            deme = i.split(".")[1]
            try:
                new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))) *  (1- percent_df["intro.percent." +str(deme)+"."+str(interval)].astype("float"))
                
            except KeyError:
                pass

            
    return(new_df)

In [330]:
rt_local_df = foo_generate_local_rt(seir_growth_rate, ne_diff_summary)
rt_local_and_intro_df = foo_generate_local_and_intro_rt(seir_growth_rate, ne_diff_summary)


  new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))) *  (1- percent_df["intro.percent." +str(deme)+"."+str(interval)].astype("float"))
  new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))) *  (1- percent_df["intro.percent." +str(deme)+"."+str(interval)].astype("float"))
  new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))) *  (1- percent_df["intro.percent." +str(deme)+"."+str(interval)].astype("float"))
  new_df["rt"+"."+ str(deme) + "." + str(interval)] = ((1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))) *  (1- percent_df["intro.percent." +str(de

0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
14575    0.0
14576    0.0
14577    0.0
14578    0.0
14579    0.0
Name: Ne.CentralEurope.diff.0, Length: 14580, dtype: float64
0        1.0
1        1.0
2        1.0
3        1.0
4        1.0
        ... 
14575    1.0
14576    1.0
14577    1.0
14578    1.0
14579    1.0
Name: Ne.CentralEurope.diff.0, Length: 14580, dtype: float64
0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
14575    0.0
14576    0.0
14577    0.0
14578    0.0
14579    0.0
Name: Ne.CentralEurope.diff.1, Length: 14580, dtype: float64
0        1.0
1        1.0
2        1.0
3        1.0
4        1.0
        ... 
14575    1.0
14576    1.0
14577    1.0
14578    1.0
14579    1.0
Name: Ne.CentralEurope.diff.1, Length: 14580, dtype: float64
0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
14575    0.0
14576    0.0
14577    0.0
14578    0.0
14579    0.0
Name: Ne.CentralEurope.diff.2, Length: 14

0       -0.612365
1       -0.579967
2       -0.554029
3       -0.560693
4       -0.564721
           ...   
14575   -0.527653
14576   -0.617583
14577   -0.561259
14578   -0.621845
14579   -0.643713
Name: Ne.SouthernEurope.diff.13, Length: 14580, dtype: float64
0        0.838714
1        0.846922
2        0.853519
3        0.851822
4        0.850797
           ...   
14575    0.860252
14576    0.837395
14577    0.851678
14578    0.836319
14579    0.830806
Name: Ne.SouthernEurope.diff.13, Length: 14580, dtype: float64
0       -0.572665
1       -0.542368
2       -0.518112
3       -0.524343
4       -0.528111
           ...   
14575   -0.493446
14576   -0.577546
14577   -0.524872
14578   -0.581531
14579   -0.601982
Name: Ne.SouthernEurope.diff.14, Length: 14580, dtype: float64
0        0.848777
1        0.856493
2        0.862694
3        0.861099
4        0.860135
           ...   
14575    0.869020
14576    0.847537
14577    0.860964
14578    0.846525
14579    0.841341
Name: Ne.SouthernEu

  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_rate)) * (1+ (foo_ne_growth[i].astype("float") / incubation_period))
  new_df["rt" + "." + str(deme)+ "." + str(interval)] = (1+ (foo_ne_growth[i].astype("float") / uninfectious_r

In [331]:
rt_local_df

Unnamed: 0,rt.CentralEurope.0,rt.CentralEurope.1,rt.CentralEurope.2,rt.CentralEurope.3,rt.CentralEurope.4,rt.CentralEurope.5,rt.CentralEurope.6,rt.CentralEurope.7,rt.CentralEurope.8,rt.CentralEurope.9,...,rt.WesternEurope.26,rt.WesternEurope.27,rt.WesternEurope.28,rt.WesternEurope.29,rt.WesternEurope.30,rt.WesternEurope.31,rt.WesternEurope.32,rt.WesternEurope.33,rt.WesternEurope.34,rt.WesternEurope.35
0,0.999043,0.998806,0.999050,0.999233,0.999276,0.999336,0.999408,0.999263,0.999387,0.933834,...,1.157670,1.189868,1.241031,1.576656,2.127242,1.805693,1.395040,0.882914,0.882914,0.882914
1,0.998925,0.998688,0.998924,0.999114,0.999161,0.999230,0.999315,0.999160,0.999288,0.937178,...,1.165413,1.196695,1.244785,1.557503,2.072748,1.798863,1.396041,0.912660,0.912660,0.912660
2,0.997741,0.997273,0.997742,0.998125,0.998222,0.998360,0.998530,0.998209,0.998475,0.939320,...,1.118864,1.149175,1.196634,1.497636,1.983717,1.735740,1.352842,0.882153,0.882153,0.882153
3,0.996480,0.995806,0.996433,0.996990,0.997144,0.997388,0.997701,0.997215,0.997589,0.937950,...,1.110619,1.136108,1.181841,1.492463,1.984819,1.743795,1.354471,0.892534,0.892534,0.892534
4,0.997877,0.997473,0.997839,0.998182,0.998265,0.998419,0.998616,0.998342,0.998550,0.938227,...,1.153088,1.177252,1.220439,1.527937,2.026516,1.779513,1.385084,0.926473,0.926473,0.926473
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14575,0.997888,0.997512,0.997861,0.998189,0.998269,0.998407,0.998588,0.998317,0.998527,0.942125,...,1.132800,1.157619,1.199674,1.482995,1.940010,1.704980,1.344291,0.914924,0.914924,0.914924
14576,0.998392,0.998042,0.998364,0.998641,0.998713,0.998838,0.998995,0.998768,0.998940,0.932952,...,1.176139,1.202884,1.250230,1.592231,2.150821,1.871419,1.433181,0.928359,0.928359,0.928359
14577,0.998272,0.997940,0.998247,0.998528,0.998597,0.998718,0.998873,0.998645,0.998821,0.938799,...,1.166350,1.193865,1.238394,1.540538,2.035777,1.795097,1.396586,0.936355,0.936355,0.936355
14578,0.998545,0.998173,0.998563,0.998846,0.998915,0.999005,0.999108,0.998880,0.999079,0.932620,...,1.147564,1.185521,1.240981,1.584003,2.148419,1.869121,1.422125,0.873567,0.873567,0.873567


In [332]:
summary_rt_local_df = generate_rt_summary_df(rt_local_df)
summary_rt_local_and_intro_df = generate_rt_summary_df(rt_local_and_intro_df)


  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

In [333]:
summary_rt_local_df['days'] = summary_rt_local_df.interval.astype(int) *7
summary_rt_local_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - summary_rt_local_df.days.map(timedelta)
summary_rt_local_df.date = summary_rt_local_df.date.astype(str)

summary_rt_local_and_intro_df['days'] = summary_rt_local_and_intro_df.interval.astype(int) *7 
summary_rt_local_and_intro_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - summary_rt_local_and_intro_df.days.map(timedelta)
summary_rt_local_and_intro_df.date = summary_rt_local_and_intro_df.date.astype(str)

In [334]:
summary_rt_local_and_intro_df[summary_rt_local_and_intro_df.deme == "NorthAmerica"]

Unnamed: 0,deme,interval,mean_percent,upper_hpd_log_95,lower_hpd_log_95,upper_hpd_log_50,lower_hpd_log_50,days,date
0,NorthAmerica,0,0.609665,0.685012,0.533542,0.637475,0.58603,0,2023-01-03
0,NorthAmerica,1,0.564385,0.647182,0.481319,0.594799,0.538435,7,2022-12-27
0,NorthAmerica,2,0.73492,0.788029,0.68039,0.754743,0.71823,14,2022-12-20
0,NorthAmerica,3,1.068967,1.084679,1.054105,1.073653,1.063305,21,2022-12-13
0,NorthAmerica,4,0.851332,0.881992,0.819472,0.862872,0.841683,28,2022-12-06
0,NorthAmerica,5,0.851342,0.882,0.819484,0.862881,0.841694,35,2022-11-29
0,NorthAmerica,6,0.849686,0.880674,0.817491,0.861348,0.839935,42,2022-11-22
0,NorthAmerica,7,0.858681,0.887874,0.828324,0.869674,0.849493,49,2022-11-15
0,NorthAmerica,8,0.89753,0.918881,0.875251,0.90559,0.890808,56,2022-11-08
0,NorthAmerica,9,0.870112,0.897013,0.842109,0.880249,0.861644,63,2022-11-01


In [335]:
line2 = alt.Chart(summary_rt_local_and_intro_df, title = "Rt (Local + Intro Phylo)").mark_area(interpolate='monotone', opacity = 1 ,color = "#f58518").encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False,format="%d %B %Y")),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title=" Rt", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_50' ),
    alt.Color("deme",legend=alt.Legend(title= "Region",offset = -120, labelFontSize = 12, titleFontSize = 12))
).properties(
    width=850,
    height=300
).transform_filter(
   (datum.lower_hpd_log_50 >0) #& (datum.upper_hpd_log_50 < 4)
)
band2 = alt.Chart(summary_rt_local_and_intro_df).mark_area(
    opacity=0.3, interpolate='monotone', color = "#f58518"
).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False,format="%d %B %Y")),
    alt.Y('lower_hpd_log_95', axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_95'), 
    alt.Color("deme",  scale=alt.Scale(range=range_))
).properties(
    width=850,
    height=300
).transform_filter(
    (datum.lower_hpd_log_95 >0) #& (datum.upper_hpd_log_95 < 4)
)

phlyo_rt_plot = band2 + line2 +one_line
phlyo_rt_plot

In [336]:
line = alt.Chart(summary_rt_local_df, title = "Rt (local only phylo)").mark_area(interpolate='monotone', opacity = 1 ,color = "#f58518").encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False,format="%B %Y")),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title="local only Rt (phylo)", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_50' ),
    alt.Color("deme")
).properties(
    width=850,
    height=300
).transform_filter(
   (datum.lower_hpd_log_50 >0.4) #& (datum.upper_hpd_log_50 < 4)
)
band = alt.Chart(summary_rt_local_df).mark_area(
    opacity=0.3, interpolate='monotone', color = "#f58518"
).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('lower_hpd_log_95', axis=alt.Axis(title="local only Rt (phylo)", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_95'), 
    alt.Color("deme",  scale=alt.Scale( range=range_))
).properties(
    width=850,
    height=300
).transform_filter(
    (datum.lower_hpd_log_95 >0.4) #& (datum.upper_hpd_log_95 < 4)
)

local_phlyo_rt_plot = band + line +one_line
local_phlyo_rt_plot

In [337]:
(phlyo_rt_plot & local_phlyo_rt_plot).resolve_scale(y = "shared")

In [338]:
case_rt = pd.read_csv("../case-rt-analysis/estimates/case-rt-estimates_region.tsv", sep="\t")

In [339]:
case_rt

Unnamed: 0,date,location,median_R,R_upper_80,R_lower_80
0,2022-05-07,Western Europe,15.350703,15.970606,14.856801
1,2022-05-08,Western Europe,14.309429,14.859400,13.862485
2,2022-05-09,Western Europe,13.353642,13.841060,12.949375
3,2022-05-10,Western Europe,12.476653,12.906902,12.102924
4,2022-05-11,Western Europe,11.672797,12.049124,11.324210
...,...,...,...,...,...
1217,2023-01-22,Central Europe,0.771716,0.771716,0.771716
1218,2023-01-23,Central Europe,0.771716,0.771716,0.771715
1219,2023-01-24,Central Europe,0.771715,0.771715,0.771715
1220,2023-01-25,Central Europe,0.771715,0.771715,0.771715


In [340]:
band3 = alt.Chart(case_rt).mark_area(interpolate='monotone', opacity = 0.5 ,color = "#f58518").encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False,format="%B %Y")),
    alt.Y('R_lower_80',axis=alt.Axis(title="Overall Rt (Cases Only)", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('R_upper_80' ),
    alt.Color("location",legend=alt.Legend(title= "Region",offset = -120, labelFontSize = 12, titleFontSize = 12),  scale=alt.Scale( range=range_))
).properties(
    width=850,
    height=300).transform_filter(
    (datum.R_upper_80 <7) #& (datum.upper_hpd_log_95 < 4)
)

line3 = alt.Chart(case_rt).mark_line(
    opacity=1, interpolate='monotone', color = "#f58518"
).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('median_R', axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))), 
    alt.Color("location")
).properties(
    width=850,
    height=300).transform_filter(
    (datum.median_R <7) #& (datum.upper_hpd_log_95 < 4)
)

case_rt_plot = band3 + line3 + one_line
case_rt_plot

In [341]:
(phlyo_rt_plot & case_rt_plot).resolve_scale(color='independent', x='shared', y = 'shared'
)

In [342]:
vac_df =pd.read_csv("../data/us_mpox_vaccinaiton.csv")
vac_df.index = vac_df["Unnamed: 0"]
vac_df = vac_df.transpose()
vac_df = vac_df.drop(index = "Unnamed: 0").reset_index()
vac_df = vac_df.replace(',','', regex=True)
vac_df.columns.name = None
vac_df["First doses"] = pd.to_numeric(vac_df["First doses"])
vac_df = vac_df.rename(columns={"index": "date", "First doses" : "dose_first", "Second doses" : "dose_second"} )
long_vac = pd.melt(vac_df, id_vars='date', value_vars=['dose_first', 'dose_second'])

vac_plot = alt.Chart(long_vac).mark_bar(interpolate='monotone', opacity = 0.3 ,color = "#f58518", width = 20).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False,format="%B %Y")),
    alt.Y('value',axis=alt.Axis(title="Vaccinaiton Cases", grid=False)),
    alt.Color("variable",legend=alt.Legend(title= "Dosage",offset = -130, labelFontSize = 12, titleFontSize = 12))
         ).properties(
    width=850,
    height=300)

line2 = alt.Chart(summary_rt_local_and_intro_df).mark_area(interpolate='monotone', opacity = 1 ).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False,format="%B %Y")),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title="Overall Rt", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_50' ),
    alt.Color("deme",legend=alt.Legend(title= "Region",offset = -120, labelFontSize = 12, titleFontSize = 12))
).properties(
    width=850,
    height=300
).transform_filter(
   (datum.lower_hpd_log_50 >0) & (datum.deme == "NorthAmerica") #(datum.upper_hpd_log_50 < 4)
)
band2 = alt.Chart(summary_rt_local_and_intro_df).mark_area(
    opacity=0.3, interpolate='monotone'
).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('lower_hpd_log_95', axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_95'), 
    alt.Color("deme")
).properties(
    width=850,
    height=300
).transform_filter(
    (datum.lower_hpd_log_95 >0) & (datum.deme == "NorthAmerica") # (datum.upper_hpd_log_95 < 4)
)

phlyo_rt_plot_NA = band2 + line2 +one_line



band3 = alt.Chart(case_rt).mark_area(interpolate='monotone', opacity = 0.3 ,color = "#f58518").encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False,format="%B %Y")),
    alt.Y('R_lower_80',axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('R_upper_80' ),
    alt.Color("location",legend=alt.Legend(title= "Region",offset = -120, labelFontSize = 12, titleFontSize = 12))
).properties(
    width=850,
    height=300).transform_filter(
    (datum.location == "North America") # (datum.upper_hpd_log_95 < 4)
)

line3 = alt.Chart(case_rt).mark_line(
    opacity=1, interpolate='monotone', color = "#f58518"
).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('median_R', axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))), 
    alt.Color("location")
).properties(
    width=850,
    height=300).transform_filter(
    (datum.location == "North America") # (datum.upper_hpd_log_95 < 4)
)

case_rt_plot_NA = band3 + line3 + one_line



(vac_plot+ (case_rt_plot_NA + phlyo_rt_plot_NA).resolve_scale(y ="shared")).resolve_scale(y = "independent")


## now we separate out Rt by contribution 

In [343]:
summary_rt_local_df['days'] = summary_rt_local_df.interval.astype(int) *7
summary_rt_local_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - summary_rt_local_df.days.map(timedelta)
summary_rt_local_df = summary_rt_local_df[summary_rt_local_df.date >"2022-06-17"]
summary_rt_local_df.date = summary_rt_local_df.date.astype(str)

summary_rt_local_and_intro_df['days'] = summary_rt_local_and_intro_df.interval.astype(int) *7
summary_rt_local_and_intro_df['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - summary_rt_local_and_intro_df.days.map(timedelta)
summary_rt_local_and_intro_df = summary_rt_local_and_intro_df[summary_rt_local_and_intro_df.date >"2022-06-17"]
summary_rt_local_and_intro_df.date = summary_rt_local_and_intro_df.date.astype(str)





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_rt_local_df.date = summary_rt_local_df.date.astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_rt_local_and_intro_df.date = summary_rt_local_and_intro_df.date.astype(str)


In [344]:
summary_rt_local_df['Contribution'] = "Local"
summary_rt_local_and_intro_df['Contribution'] = "Local + Intoductions"
summary_rt_local_and_intro_df = summary_rt_local_and_intro_df[summary_rt_local_and_intro_df.deme == "NorthAmerica"]
summary_rt_local_df = summary_rt_local_df[summary_rt_local_df.deme == "NorthAmerica"]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_rt_local_df['Contribution'] = "Local"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  summary_rt_local_and_intro_df['Contribution'] = "Local + Intoductions"


In [345]:
summary_rt_local_df

Unnamed: 0,deme,interval,mean_percent,upper_hpd_log_95,lower_hpd_log_95,upper_hpd_log_50,lower_hpd_log_50,days,date,Contribution
0,NorthAmerica,0,0.564368,0.651124,0.482442,0.592278,0.538,0,2023-01-03,Local
0,NorthAmerica,1,0.527247,0.616854,0.439528,0.555552,0.497847,7,2022-12-27,Local
0,NorthAmerica,2,0.686268,0.754951,0.625433,0.70421,0.663167,14,2022-12-20,Local
0,NorthAmerica,3,0.986477,1.02506,0.942033,1.00614,0.977387,21,2022-12-13,Local
0,NorthAmerica,4,0.792064,0.842637,0.741475,0.809001,0.777423,28,2022-12-06,Local
0,NorthAmerica,5,0.793839,0.84555,0.745704,0.810481,0.779362,35,2022-11-29,Local
0,NorthAmerica,6,0.792425,0.845146,0.744976,0.808803,0.777611,42,2022-11-22,Local
0,NorthAmerica,7,0.786024,0.84375,0.727028,0.809326,0.772525,49,2022-11-15,Local
0,NorthAmerica,8,0.829497,0.877766,0.779646,0.848499,0.817294,56,2022-11-08,Local
0,NorthAmerica,9,0.811868,0.85957,0.763539,0.828912,0.799221,63,2022-11-01,Local


In [346]:
combined_rt = pd.concat([ summary_rt_local_and_intro_df, summary_rt_local_df], ignore_index=True)


In [347]:
stream_rt = alt.Chart(combined_rt, title = "North American Rt by Contribution").mark_area(interpolate='monotone', opacity = 0.8 ,color = "#f58518", clip = True).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False,format="%B %Y")),
    alt.Y('mean_percent',axis=alt.Axis(title="Local Rt", grid=False),stack = False, scale=alt.Scale(domain=(0, 2))),
    #alt.Y2('upper_hpd_log_50' ), 
    alt.Color('Contribution:N',legend=alt.Legend(title= "Contribution",offset = -130, labelFontSize = 12, titleFontSize = 12))#, scale=alt.Scale(domain = ['Local',  "lo"], range = ["#4c90c0",  "#df4327"]))
).properties(
    width=800,
    height=300
)#.transform_filter(
#    (datum.mean_percent >0) & (datum.mean_percent < 2.5)
#)

In [348]:
stream_rt + one_line

In [349]:
## Now we calculate the percent of RT that is influenced by introductions

In [350]:
rt_local_intro_northamerica_df = rt_local_and_intro_df.filter(regex='.NorthAmerica.')
rt_local_northamerica_df = rt_local_df.filter(regex='.NorthAmerica.')

In [351]:
rt_local_intro_northamerica_df

Unnamed: 0,rt.NorthAmerica.0,rt.NorthAmerica.1,rt.NorthAmerica.2,rt.NorthAmerica.3,rt.NorthAmerica.4,rt.NorthAmerica.5,rt.NorthAmerica.6,rt.NorthAmerica.7,rt.NorthAmerica.8,rt.NorthAmerica.9,...,rt.NorthAmerica.26,rt.NorthAmerica.27,rt.NorthAmerica.28,rt.NorthAmerica.29,rt.NorthAmerica.30,rt.NorthAmerica.31,rt.NorthAmerica.32,rt.NorthAmerica.33,rt.NorthAmerica.34,rt.NorthAmerica.35
0,0.622424,0.578272,0.744116,1.066288,0.856729,0.856738,0.855139,0.863824,0.901310,0.874858,...,1.144617,1.337865,2.755721,2.800459,2.370150,1.0,1.0,1.0,1.0,1.0
1,0.640385,0.597997,0.756793,1.062732,0.864054,0.864063,0.862544,0.870800,0.906413,0.881287,...,1.136740,1.318844,2.642064,2.683574,2.283837,1.0,1.0,1.0,1.0,1.0
2,0.654927,0.613997,0.767011,1.059889,0.869940,0.869948,0.868492,0.876403,0.910508,0.886449,...,1.130451,1.303709,2.552744,2.591739,2.215849,1.0,1.0,1.0,1.0,1.0
3,0.651177,0.609869,0.764380,1.060619,0.868426,0.868435,0.866962,0.874962,0.909455,0.885121,...,1.132065,1.307589,2.575548,2.615183,2.233220,1.0,1.0,1.0,1.0,1.0
4,0.648915,0.607379,0.762791,1.061061,0.867512,0.867520,0.866038,0.874091,0.908819,0.884319,...,1.133041,1.309938,2.589384,2.629408,2.243755,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14575,0.669862,0.630458,0.777466,1.057001,0.875944,0.875952,0.874560,0.882118,0.914681,0.891713,...,1.124073,1.288401,2.463439,2.499939,2.147726,1.0,1.0,1.0,1.0,1.0
14576,0.619552,0.575121,0.742083,1.066862,0.855551,0.855561,0.853949,0.862702,0.900489,0.873824,...,1.145889,1.340941,2.774246,2.819513,2.384198,1.0,1.0,1.0,1.0,1.0
14577,0.650859,0.609519,0.764157,1.060681,0.868298,0.868306,0.866832,0.874839,0.909366,0.885009,...,1.132202,1.307919,2.577490,2.617179,2.234699,1.0,1.0,1.0,1.0,1.0
14578,0.617211,0.572554,0.740424,1.067330,0.854590,0.854600,0.852978,0.861787,0.899819,0.872981,...,1.146927,1.343455,2.789418,2.835120,2.395700,1.0,1.0,1.0,1.0,1.0


In [352]:
rt_diff_df = rt_local_northamerica_df.div(rt_local_intro_northamerica_df.values)

In [353]:
rt_df_summary = generate_rt_summary_df(rt_diff_df)

  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.append(local_df)
  new_df = new_df.ap

In [356]:
rt_df_summary['days'] = rt_df_summary.interval.astype(int) *7
rt_df_summary['date'] = dt.strptime("2023-01-03",  "%Y-%m-%d") - rt_df_summary.days.map(timedelta)
rt_df_summary = rt_df_summary[rt_df_summary.date >"2022-06-17"]
rt_df_summary.date = rt_df_summary.date.astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rt_df_summary.date = rt_df_summary.date.astype(str)


In [357]:
line = alt.Chart(rt_df_summary, title = "North America" ).mark_area( interpolate='monotone', opacity = 1 ,color = "#f58518").encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False,format="%B %Y")),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title="Percent of Rt due to Introductions ", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_50' )
).properties(
    width=850,
    height=300
).transform_filter(
   (datum.lower_hpd_log_50 >0) #& (datum.upper_hpd_log_50 < 4)
)
band = alt.Chart(rt_df_summary).mark_area(
    opacity=0.3, interpolate='monotone', color = "#f58518"
).encode(
    alt.X('date:T', axis=alt.Axis(title="Date", grid=False)),
    alt.Y('lower_hpd_log_95', axis=alt.Axis(title="", grid=False)),# scale=alt.Scale(domain=(0.6, 1.3))),
    alt.Y2('upper_hpd_log_95')
).properties(
    width=850,
    height=300
).transform_filter(
    (datum.lower_hpd_log_95 >0) #& (datum.upper_hpd_log_95 < 4)
)

rt_diff_plot = line + band +one_line
(vac_plot + (rt_diff_plot)).resolve_scale(y="independent")