In [1]:
import pandas as pd
import numpy as np
import geopandas

##### The GeoPandas introduction [site](https://geopandas.org/en/stable/getting_started/introduction.html)

In [2]:
covid_cases = geopandas.read_file('covid-variants.csv')
covid_cases['date'] = pd.to_datetime(covid_cases['date'])

  for feature in features_lst:


In [3]:
omicron_cases = covid_cases[covid_cases.variant == 'Omicron'] # Selecting just one variant, we care about the total sequences, which is duplicated in each variant entry.
omicron_cases.head()

Unnamed: 0,location,date,variant,num_sequences,perc_sequences,num_sequences_total,geometry
19,Angola,2020-07-06,Omicron,0,0.0,3,
43,Angola,2020-08-31,Omicron,0,0.0,1,
67,Angola,2020-09-28,Omicron,0,0.0,10,
91,Angola,2020-10-12,Omicron,0,0.0,29,
115,Angola,2020-10-26,Omicron,0,0.0,7,


In [4]:
restricted_day = omicron_cases[omicron_cases['date'] == '2021-12-27'] # Plotting just one day to make things a bit simpler

In [5]:
# The starting call to get country Point data
country_locations = geopandas.tools.geocode(restricted_day.location)

restricted_day.index.equals(country_locations.index)

True

In [6]:
restricted_day = restricted_day.assign(geometry=country_locations['geometry']) # Fill the Geometry column

In [7]:
# The GeoPandas geocode call grabbed the wrong Morocco, Switzerland, and Georgia, so we have to manually correct.

from geopy.geocoders import Photon
from shapely.geometry import Point

photon_fixer = Photon() # The Geocoding object that will make our requests

# .geocode is returning a list of entries thanks to 'exactly_one=False' so we can index for the correct one.
Morocco = photon_fixer.geocode("Morocco", exactly_one=False, language='en')[0]
Switzerland = photon_fixer.geocode("Switzerland", exactly_one=False, language='en')[1]
Georgia = photon_fixer.geocode("Georgia", exactly_one=False, language='en')[2]

In [8]:
def fix_geometry(gdf, points):
    """Simple function to correct Geometry column with new points."""
    for point in points:
        gdf.loc[gdf.location == f'{point}'.split(', ')[0], ['geometry']] = Point(point.latitude, point.longitude)

fix_geometry(restricted_day, [Morocco, Switzerland, Georgia])

In [9]:
fixed_day = restricted_day.astype({'num_sequences': 'int64', 'num_sequences_total': 'int64'})

In [11]:
cases_sorted = fixed_day.sort_values(by='num_sequences').drop('date', axis=1)

cases_sorted.explore(column='num_sequences',
                     tooltip='location',
                     popup='num_sequences',
                     marker_type='circle_marker',
                     marker_kwds={'radius':10})

In [12]:
covid_cases.variant.value_counts() # Just including this to clarify that even though Omicron cases were not present at the earliest date, 
                                   # each day has an omicron entry that includes total sequences, so we can use it to get total counts for the data set.

Alpha             4184
B.1.1.277         4184
others            4184
S:677P.Pelican    4184
S:677H.Robin1     4184
Omicron           4184
Mu                4184
Lambda            4184
Kappa             4184
Iota              4184
Gamma             4184
Eta               4184
Epsilon           4184
Delta             4184
Beta              4184
B.1.620           4184
B.1.367           4184
B.1.258           4184
B.1.221           4184
B.1.177           4184
B.1.160           4184
B.1.1.519         4184
B.1.1.302         4184
non_who           4184
Name: variant, dtype: int64

In [13]:
fixed_sequence_type = omicron_cases.astype({'num_sequences_total': 'int64'})
total_country_cases = fixed_sequence_type.loc[:, ['location', 'num_sequences_total']].groupby('location').sum()

total_country_cases = total_country_cases.reset_index()

In [14]:
total_locations = geopandas.tools.geocode(total_country_cases.index)

In [15]:
total_country_cases = total_country_cases.assign(geometry=total_locations['geometry']) # Fill the Geometry column

In [16]:
fix_geometry(total_country_cases, [Morocco, Switzerland, Georgia])

total_country_cases.explore(column='num_sequences_total',
                     tooltip='location',
                     popup='num_sequences_total',
                     marker_type='circle_marker',
                     marker_kwds={'radius':10})

In [17]:
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
world.head()

Unnamed: 0,pop_est,continent,name,iso_a3,gdp_md_est,geometry
0,920938,Oceania,Fiji,FJI,8374.0,"MULTIPOLYGON (((180.00000 -16.06713, 180.00000..."
1,53950935,Africa,Tanzania,TZA,150600.0,"POLYGON ((33.90371 -0.95000, 34.07262 -1.05982..."
2,603253,Africa,W. Sahara,ESH,906.5,"POLYGON ((-8.66559 27.65643, -8.66512 27.58948..."
3,35623680,North America,Canada,CAN,1674000.0,"MULTIPOLYGON (((-122.84000 49.00000, -122.9742..."
4,326625791,North America,United States of America,USA,18560000.0,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000..."


In [25]:
import pycountry

mapping = {country.name: country.alpha_3 for country in pycountry.countries}
total_country_cases['country_codes'] = total_country_cases['location'].replace(to_replace=mapping)

In [26]:
# This shows how GeoPandas can store two different geometries for each entry and switch between them.
combined_df = total_country_cases.merge(world, left_on='country_codes', right_on='iso_a3')
combined_df = combined_df.set_geometry('geometry_y')

In [27]:
combined_df.explore(column='num_sequences_total',
                     tooltip='location',
                     popup='num_sequences_total',
                     marker_type='circle_marker',
                     marker_kwds={'radius':10})
                     