In [1]:
import pandas as pd
import numpy as np
import os, glob
import altair as alt

## the following analyses are based on calculations from https://www.sciencedirect.com/science/article/pii/S0092867420304840#sec4

In [2]:
#first read in air traffic data for countries of interest
air_folder = "../data/2022_apr_dec/"

concat_air = pd.DataFrame()
count = 0
for filename in glob.glob("../data/2022_apr_dec/specific_countries/*"):
    if count == 0:
        concat_air =  pd.read_csv(filename)
        count+=1
    else:
        temp = pd.read_csv(filename)
        concat_air = pd.concat([concat_air,temp])

concat_air["Date"] = concat_air.apply(lambda row: str(row.Month) + "-01-2022", axis = 1)
concat_air["Date"] = pd.to_datetime(concat_air["Date"])

In [3]:
concat_air

Unnamed: 0,Month,Orig Country,Dest Country,Total Market Pax,Date
0,12,Austria,Austria,6428,2022-12-01
1,12,Austria,Belgium,19280,2022-12-01
2,12,Austria,Canada,4165,2022-12-01
3,12,Austria,Colombia,986,2022-12-01
4,12,Austria,France,40413,2022-12-01
...,...,...,...,...,...
210,11,United States,Slovenia,811,2022-11-01
211,11,United States,Spain,125618,2022-11-01
212,11,United States,Switzerland,60465,2022-11-01
213,11,United States,United Kingdom,527992,2022-11-01


In [4]:
concat_air["year-month"] = pd.to_datetime(concat_air['Date']).dt.to_period('M')

In [5]:
#now read in population sizes for each country
# taken from the world bank https://data.worldbank.org/indicator/SP.POP.TOTL

pop = pd.read_csv("../data/country_pop.csv")

In [6]:
#list of countries included in glm analysis based on having >5 sequences 
countries = ["Austria",
"Belgium",
"Canada",
"Colombia",
"France",
"Germany",
"Italy",
"Peru",
"Portugal",
"Slovak Republic",
"Slovenia",
"Spain",
"Switzerland",
"United States",
"United Kingdom",
]

regions = ["NorthAmerica", "WesternEurope", "CentralEurope", "SouthernEurope", "SouthAmerica"]

north_america = ['Canada','United States of America']
western_europe = ['Austria','Germany','Switzerland','United Kingdom','France','Belgium']
central_europe = ['Slovakia','Slovenia']
southern_europe = ['Italy','Portugal','Spain']
south_american = ['Colombia','Peru']

In [7]:
pop = pop[pop["Country Name"].isin(countries)]
pop["Country Name"][pop["Country Name"] == "Slovak Republic"] = "Slovakia"

In [8]:
pop

Unnamed: 0,Country Name,2021
14,Austria,8955797.0
17,Belgium,11592952.0
35,Canada,38246108.0
37,Switzerland,8703405.0
45,Colombia,51516562.0
55,Germany,83196078.0
70,Spain,47415750.0
77,France,67749632.0
81,United Kingdom,67326569.0
116,Italy,59109668.0


In [9]:
## now read in prevalence estimates from Marlin's evofr models
case_prevalence = pd.read_csv("../case-rt-analysis/estimates/case-prevalence-estimates_country.tsv", sep="\t", parse_dates= ["date"])

In [10]:
case_prevalence.set_index('date', inplace=True)
case_prevalence.index = pd.to_datetime(case_prevalence.index)
month_prevalence = case_prevalence.groupby(["location"]).resample('1M').sum().reset_index()
month_prevalence["year-month"]= pd.to_datetime(month_prevalence['date']).dt.to_period('M')

In [11]:
month_prevalence

Unnamed: 0,location,date,median_prev,prev_upper_80,prev_lower_80,year-month
0,Austria,2022-05-31,147.193414,147.193780,147.193085,2022-05
1,Austria,2022-06-30,1114.559632,1114.565076,1114.554897,2022-06
2,Austria,2022-07-31,2694.629072,2694.650442,2694.603314,2022-07
3,Austria,2022-08-31,2445.611126,2445.639375,2445.576886,2022-08
4,Austria,2022-09-30,1008.003817,1008.017584,1007.986420,2022-09
...,...,...,...,...,...,...
124,United States,2022-09-30,202887.613900,210180.939500,196339.247200,2022-09
125,United States,2022-10-31,77762.170300,80329.301200,74725.048000,2022-10
126,United States,2022-11-30,29969.925100,31078.777010,28645.900280,2022-11
127,United States,2022-12-31,19934.721940,20883.921470,18734.466340,2022-12


In [12]:
import_dict = {}
for month in concat_air["year-month"].unique():
    for place in concat_air["Dest Country"].unique():
        import_list = []
        import_dict[str(place) +"." + str(month)] = []
        for orig in concat_air["Orig Country"].unique():
            #prev= (concat_air["Total Market Pax"].loc[(concat_air["year-month"] == month) & (concat_air["Dest Country"] == place) & (concat_air["Orig Country"] == orig)].tolist()[0])
            #print(month, place, orig, prev, "\n")
            if place != orig:
                try:
                    import_risk = (concat_air["Total Market Pax"].loc[(concat_air["year-month"] == month) & (concat_air["Dest Country"] == place) & (concat_air["Orig Country"] == orig)].tolist()[0]) * \
                    ((month_prevalence.median_prev.loc[(month_prevalence["year-month"] == month) & (month_prevalence.location == orig)].tolist()[0])/((pop["2021"].loc[pop["Country Name"] == orig].tolist()[0]) *1000 ))
                    import_list.append(import_risk)
                    #print(month, place, orig, import_risk, prev, "\n")
                except IndexError:
                    pass

        import_dict[str(place) +"." + str(month)] = {"place" : place, "year-month": month, "import_risk":np.sum(np.array(import_list))}
        
        
    
    

In [13]:
"""this will generate a multi-index dataframe from the migrations dictionary"""
import_df = pd.DataFrame.from_dict({(i): import_dict[i] 
                       for i in import_dict.keys()}, orient='index')

import_df.reset_index(inplace=True)
#migrations_df.rename(columns={'level_0': 'tree_number', 'level_1': 'migration_event_number'}, inplace=True)

import_df

Unnamed: 0,index,place,year-month,import_risk
0,Austria.2022-12,Austria,2022-12,0.021989
1,Belgium.2022-12,Belgium,2022-12,0.038758
2,Canada.2022-12,Canada,2022-12,0.064917
3,Colombia.2022-12,Colombia,2022-12,0.034265
4,France.2022-12,France,2022-12,0.117733
...,...,...,...,...
130,Slovenia.2022-11,Slovenia,2022-11,0.000457
131,Spain.2022-11,Spain,2022-11,0.134270
132,Switzerland.2022-11,Switzerland,2022-11,0.047357
133,United Kingdom.2022-11,United Kingdom,2022-11,0.291823


In [14]:
import_df["year-month"] = import_df["year-month"].dt.to_timestamp()

In [15]:
alt.Chart(import_df).mark_line().encode(
    alt.X("year-month:T"),
    alt.Y("import_risk"), 
    alt.Color("place:N"))

In [16]:
## now do the same but by region to compare to glm

In [17]:
region = {'Canada':'North America',
          'United States':'North America',
         'Finland':'Northern Europe',
           'Austria': 'Western Europe',
        'Germany':'Western Europe', 
        'Switzerland':'Western Europe',
         'United Kingdom':'Western Europe',
        'France':'Western Europe',
        'Belgium':'Western Europe',
        'Netherlands':'Western Europe',
        'Slovakia':'Central Europe',
        'Slovenia':'Central Europe',
        'Italy':'Southern Europe',
        'Portugal':'Southern Europe',
        'Spain':'Southern Europe',
         'Colombia':'South America',
          
        'Peru':'South America'}

In [18]:
concat_air["region_orig"] = np.nan
concat_air["region_dest"] = np.nan
concat_air.region_orig = concat_air["Orig Country"].map(region)
concat_air.region_dest = concat_air["Dest Country"].map(region)
region_air=  concat_air.groupby(["year-month","region_orig", "region_dest"])["Total Market Pax"].sum().reset_index()

pop["region"] = np.nan
pop.region = pop["Country Name"].map(region)
region_pop = pop.groupby(["region"])["2021"].sum().reset_index()

month_prevalence["region"] = np.nan
month_prevalence.region = month_prevalence["location"].map(region)
region_prev = month_prevalence.groupby(["year-month","region"])["median_prev"].sum().reset_index()



In [19]:
import_dict = {}
for month in region_air["year-month"].unique():
    for place in region_air["region_dest"].unique():
        import_list = []
        import_dict[str(place) +"." + str(month)] = []
        for orig in region_air["region_orig"].unique():
            #prev= (concat_air["Total Market Pax"].loc[(concat_air["year-month"] == month) & (concat_air["Dest Country"] == place) & (concat_air["Orig Country"] == orig)].tolist()[0])
            #print(month, place, orig, prev, "\n")
            if orig != place:
                try:
                    import_risk = (region_air["Total Market Pax"].loc[(region_air["year-month"] == month) & (region_air["region_dest"] == place) & (region_air["region_orig"] == orig)].tolist()[0]) * \
                    ((region_prev.median_prev.loc[(region_prev["year-month"] == month) & (region_prev.region == orig)].tolist()[0])/((region_pop["2021"].loc[region_pop["region"] == orig].tolist()[0]) *1000))
                    import_list.append(import_risk)
                    #print(month, place, orig, import_risk, prev, "\n")
                except IndexError:
                    pass

        import_dict[str(place) +"." + str(month)] = {"place" : place, "year-month": month, "import_risk":np.sum(np.array(import_list))}
        
        
    
    

In [20]:
import_dict

{'North America.2022-04': {'place': 'North America',
  'year-month': Period('2022-04', 'M'),
  'import_risk': 0.0},
 'Southern Europe.2022-04': {'place': 'Southern Europe',
  'year-month': Period('2022-04', 'M'),
  'import_risk': 0.0},
 'Western Europe.2022-04': {'place': 'Western Europe',
  'year-month': Period('2022-04', 'M'),
  'import_risk': 0.0},
 'Central Europe.2022-04': {'place': 'Central Europe',
  'year-month': Period('2022-04', 'M'),
  'import_risk': 0.0},
 'South America.2022-04': {'place': 'South America',
  'year-month': Period('2022-04', 'M'),
  'import_risk': 0.0},
 'North America.2022-05': {'place': 'North America',
  'year-month': Period('2022-05', 'M'),
  'import_risk': 0.08157338602948179},
 'Southern Europe.2022-05': {'place': 'Southern Europe',
  'year-month': Period('2022-05', 'M'),
  'import_risk': 0.24945490462950873},
 'Western Europe.2022-05': {'place': 'Western Europe',
  'year-month': Period('2022-05', 'M'),
  'import_risk': 0.41554398646033497},
 'Central 

In [21]:
"""this will generate a multi-index dataframe from the migrations dictionary"""
import_df = pd.DataFrame.from_dict({(i): import_dict[i] 
                       for i in import_dict.keys()}, orient='index')

import_df.reset_index(inplace=True)
#migrations_df.rename(columns={'level_0': 'tree_number', 'level_1': 'migration_event_number'}, inplace=True)

import_df

Unnamed: 0,index,place,year-month,import_risk
0,North America.2022-04,North America,2022-04,0.0
1,Southern Europe.2022-04,Southern Europe,2022-04,0.0
2,Western Europe.2022-04,Western Europe,2022-04,0.0
3,Central Europe.2022-04,Central Europe,2022-04,0.0
4,South America.2022-04,South America,2022-04,0.0
5,North America.2022-05,North America,2022-05,0.081573
6,Southern Europe.2022-05,Southern Europe,2022-05,0.249455
7,Western Europe.2022-05,Western Europe,2022-05,0.415544
8,Central Europe.2022-05,Central Europe,2022-05,0.002354
9,South America.2022-05,South America,2022-05,0.005387


In [22]:
import_df["year-month"] = import_df["year-month"].dt.to_timestamp()

In [23]:
alt.Chart(import_df).mark_line().encode(
    alt.X("year-month:T",axis=alt.Axis(title=None, grid=False, format="%B %Y")),
    alt.Y("import_risk"), 
    alt.Color("place:N"))

In [101]:
import_df.to_csv("estimated_importation_intensity_region.csv")