In [1]:
#import libaries
import pandas as pd
import numpy as np
import geopandas
import pycountry
from geopy import Nominatim
import matplotlib.pyplot as plt
import folium
from ipywidgets import interact
from ipywidgets import widgets
import plotly.express as px

In [2]:
#import vaccine data into dataframe
df = pd.read_csv('VaccineData.csv')

In [3]:
#review top 5 rows 
df.head()

Unnamed: 0,Country,Date,Vaccine_Manufacturer,Total_Vaccinations,Severe Disease Ancestral,Infection Ancestral,Severe Disease Alpha,Infection Alpha,Severe Disease Beta,Infection Beta,Severe Disease Gamma,Infection Gamma,Severe Disease Delta,Infection Delta,Severe Disease Omicron,Infection Omicron
0,Argentina,12/29/20,Oxford/AstraZeneca,1,1,1,1,1,1,1,1,1,1,1,1,0
1,Argentina,12/29/20,Sinopharm/Beijing,1,1,1,1,1,1,1,1,1,1,1,1,0
2,Argentina,12/29/20,Sputnik V,20488,18849,17620,18849,17620,18234,17415,18234,17415,18234,17415,11268,7376
3,Argentina,12/30/20,Sputnik V,40590,37343,34907,37343,34907,36125,34502,36125,34502,36125,34502,22325,14612
4,Argentina,12/31/20,Sputnik V,43396,39924,37321,39924,37321,38622,36887,38622,36887,38622,36887,23868,15623


In [4]:
#change Date col to datetime
df['Date'] = pd.to_datetime(df['Date'])

In [5]:
#review dataframe info (data types, nulls, etc)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48150 entries, 0 to 48149
Data columns (total 16 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   Country                   48150 non-null  object        
 1   Date                      48150 non-null  datetime64[ns]
 2   Vaccine_Manufacturer      48150 non-null  object        
 3   Total_Vaccinations        48150 non-null  int64         
 4   Severe Disease Ancestral  48150 non-null  int64         
 5   Infection Ancestral       48150 non-null  int64         
 6   Severe Disease Alpha      48150 non-null  int64         
 7   Infection Alpha           48150 non-null  int64         
 8   Severe Disease Beta       48150 non-null  int64         
 9   Infection Beta            48150 non-null  int64         
 10  Severe Disease Gamma      48150 non-null  int64         
 11  Infection Gamma           48150 non-null  int64         
 12  Severe Disease Del

In [6]:
#list unique values for countries
list(df['Country'].unique())

['Argentina',
 'Austria',
 'Belgium',
 'Bulgaria',
 'Canada',
 'Chile',
 'Croatia',
 'Cyprus',
 'Czechia',
 'Denmark',
 'Ecuador',
 'Estonia',
 'Finland',
 'France',
 'Germany',
 'Hong Kong',
 'Hungary',
 'Iceland',
 'Ireland',
 'Italy',
 'Japan',
 'Latvia',
 'Liechtenstein',
 'Lithuania',
 'Luxembourg',
 'Malta',
 'Nepal',
 'Netherlands',
 'Norway',
 'Peru',
 'Poland',
 'Portugal',
 'Romania',
 'Slovakia',
 'Slovenia',
 'South Africa',
 'South Korea',
 'Spain',
 'Sweden',
 'Switzerland',
 'Ukraine',
 'United States',
 'Uruguay',
 'European Union']

In [7]:
#drop European Union rows, since they are unneeded for this analysis
df.drop(df.loc[df['Country'] == 'European Union'].index, inplace=True, axis=0)

In [8]:
#find number of unique vaccines giving by vaccine manufacturer
df.groupby('Country')['Vaccine_Manufacturer'].nunique()

Country
Argentina        6
Austria          6
Belgium          5
Bulgaria         4
Canada           6
Chile            5
Croatia          5
Cyprus           5
Czechia          8
Denmark          4
Ecuador          4
Estonia          5
Finland          5
France           5
Germany          6
Hong Kong        2
Hungary          6
Iceland          4
Ireland          5
Italy            5
Japan            4
Latvia           7
Liechtenstein    4
Lithuania        4
Luxembourg       5
Malta            4
Nepal            5
Netherlands      5
Norway           4
Peru             4
Poland           5
Portugal         8
Romania          4
Slovakia         6
Slovenia         5
South Africa     2
South Korea      6
Spain            4
Sweden           4
Switzerland      4
Ukraine          5
United States    3
Uruguay          3
Name: Vaccine_Manufacturer, dtype: int64

In [9]:
#function for finding country codes
def countrycode(column):
    CODE = []
    for country in column:
        try:
            code=pycountry.countries.get(name=country).alpha_3
            CODE.append(code)
        except:
            CODE.append('None')
    return CODE

In [10]:
#create nominatim object to obtain lat long of country
geolocator = Nominatim(user_agent = 'DSEI270_Proj1')

#function to get lat and long from country name
def latlong(column):
    loclist = []
    for country in column:
        try:
            loc = geolocator.geocode(country)
            loclist.append([country, loc.latitude, loc.longitude])
        except:
            loclist.append(['None','None','None'])
    return pd.DataFrame(loclist, columns=['code','lat','long'])

In [11]:
#create code column of 3 letter code for each country; used to merge with geopandas dataset
df['code'] = countrycode(df['Country'])

In [12]:
#import world dataset from geopandas, rename code column, and drop unneeded columns
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
world.columns = ['pop_est', 'continent', 'name', 'code', 'gdp_md_est', 'geometry']
world = world[['continent','code','geometry']]

In [13]:
#create dataframe of lat and long info for each unique country
latlongdf = latlong(df['code'].unique())

In [14]:
#merge geometry and lat/long dataframes to df
df = pd.merge(df, world, on='code')
df = pd.merge(df, latlongdf, on='code')

In [15]:
#create geopandas dataframe
gdf = geopandas.GeoDataFrame(df, geometry=df['geometry'])