In [1]:
#import libaries
import pandas as pd
import numpy as np
import geopandas
import pycountry
from geopy import Nominatim
import matplotlib.pyplot as plt
import folium
from ipywidgets import interact
from ipywidgets import widgets
import plotly.express as px

In [2]:
#import vaccine data into dataframe
df = pd.read_csv('VaccineData.csv')

In [3]:
#review top 5 rows 
df.head()

Unnamed: 0,Country,Date,Vaccine_Manufacturer,Total_Vaccinations,Severe Disease Ancestral,Infection Ancestral,Severe Disease Alpha,Infection Alpha,Severe Disease Beta,Infection Beta,Severe Disease Gamma,Infection Gamma,Severe Disease Delta,Infection Delta,Severe Disease Omicron,Infection Omicron
0,Argentina,12/29/20,Oxford/AstraZeneca,1,1,1,1,1,1,1,1,1,1,1,1,0
1,Argentina,12/29/20,Sinopharm/Beijing,1,1,1,1,1,1,1,1,1,1,1,1,0
2,Argentina,12/29/20,Sputnik V,20488,18849,17620,18849,17620,18234,17415,18234,17415,18234,17415,11268,7376
3,Argentina,12/30/20,Sputnik V,40590,37343,34907,37343,34907,36125,34502,36125,34502,36125,34502,22325,14612
4,Argentina,12/31/20,Sputnik V,43396,39924,37321,39924,37321,38622,36887,38622,36887,38622,36887,23868,15623


In [4]:
#change Date col to datetime
df['Date'] = pd.to_datetime(df['Date'])

In [5]:
#review dataframe info (data types, nulls, etc)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48150 entries, 0 to 48149
Data columns (total 16 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   Country                   48150 non-null  object        
 1   Date                      48150 non-null  datetime64[ns]
 2   Vaccine_Manufacturer      48150 non-null  object        
 3   Total_Vaccinations        48150 non-null  int64         
 4   Severe Disease Ancestral  48150 non-null  int64         
 5   Infection Ancestral       48150 non-null  int64         
 6   Severe Disease Alpha      48150 non-null  int64         
 7   Infection Alpha           48150 non-null  int64         
 8   Severe Disease Beta       48150 non-null  int64         
 9   Infection Beta            48150 non-null  int64         
 10  Severe Disease Gamma      48150 non-null  int64         
 11  Infection Gamma           48150 non-null  int64         
 12  Severe Disease Del

In [6]:
#list unique values for countries
list(df['Country'].unique())

['Argentina',
 'Austria',
 'Belgium',
 'Bulgaria',
 'Canada',
 'Chile',
 'Croatia',
 'Cyprus',
 'Czechia',
 'Denmark',
 'Ecuador',
 'Estonia',
 'Finland',
 'France',
 'Germany',
 'Hong Kong',
 'Hungary',
 'Iceland',
 'Ireland',
 'Italy',
 'Japan',
 'Latvia',
 'Liechtenstein',
 'Lithuania',
 'Luxembourg',
 'Malta',
 'Nepal',
 'Netherlands',
 'Norway',
 'Peru',
 'Poland',
 'Portugal',
 'Romania',
 'Slovakia',
 'Slovenia',
 'South Africa',
 'South Korea',
 'Spain',
 'Sweden',
 'Switzerland',
 'Ukraine',
 'United States',
 'Uruguay',
 'European Union']

In [7]:
#drop European Union rows, since they are unneeded for this analysis
df.drop(df.loc[df['Country'] == 'European Union'].index, inplace=True, axis=0)

In [8]:
#find number of unique vaccines giving by vaccine manufacturer
df.groupby('Country')['Vaccine_Manufacturer'].nunique()

Country
Argentina        6
Austria          6
Belgium          5
Bulgaria         4
Canada           6
Chile            5
Croatia          5
Cyprus           5
Czechia          8
Denmark          4
Ecuador          4
Estonia          5
Finland          5
France           5
Germany          6
Hong Kong        2
Hungary          6
Iceland          4
Ireland          5
Italy            5
Japan            4
Latvia           7
Liechtenstein    4
Lithuania        4
Luxembourg       5
Malta            4
Nepal            5
Netherlands      5
Norway           4
Peru             4
Poland           5
Portugal         8
Romania          4
Slovakia         6
Slovenia         5
South Africa     2
South Korea      6
Spain            4
Sweden           4
Switzerland      4
Ukraine          5
United States    3
Uruguay          3
Name: Vaccine_Manufacturer, dtype: int64

In [9]:
#function for finding country codes
def countrycode(column):
    CODE = []
    for country in column:
        try:
            code=pycountry.countries.get(name=country).alpha_3
            CODE.append(code)
        except:
            CODE.append('None')
    return CODE

In [10]:
#create nominatim object to obtain lat long of country
geolocator = Nominatim(user_agent = 'DSEI270_Proj1')

#function to get lat and long from country name
def latlong(column):
    loclist = []
    for country in column:
        try:
            loc = geolocator.geocode(country)
            loclist.append([country, loc.latitude, loc.longitude])
        except:
            loclist.append(['None','None','None'])
    return pd.DataFrame(loclist, columns=['code','lat','long'])
            

In [11]:
#create code column of 3 letter code for each country; used to merge with geopandas dataset
df['code'] = countrycode(df['Country'])

In [12]:
#import world dataset from geopandas, rename code column, and drop unneeded columns
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
world.columns = ['pop_est', 'continent', 'name', 'code', 'gdp_md_est', 'geometry']
world = world[['continent','code','geometry']]

In [13]:
#create dataframe of lat and long info for each unique country
latlongdf = latlong(df['code'].unique())

In [14]:
#merge geometry and lat/long dataframes to df
df = pd.merge(df, world, on='code')
df = pd.merge(df, latlongdf, on='code')

In [15]:
#create geopandas dataframe
gdf = geopandas.GeoDataFrame(df, geometry=df['geometry'])

In [16]:
#Dataframe for visualizing which vaccine variant was administered in which country
countryByMan = gdf.groupby('code')['Vaccine_Manufacturer'].unique()
countryByMan = pd.DataFrame(countryByMan).explode('Vaccine_Manufacturer')
countryByMan.reset_index(inplace=True)
countryByMan = pd.merge(countryByMan,world,on='code')
countryByMan = pd.merge(countryByMan,latlongdf,on='code')
countryByMan = geopandas.GeoDataFrame(countryByMan,geometry=countryByMan['geometry'])
countryByMan

Unnamed: 0,code,Vaccine_Manufacturer,continent,geometry,lat,long
0,ARG,Oxford/AstraZeneca,South America,"MULTIPOLYGON (((-68.63401 -52.63637, -68.25000...",31.184568,-7.919683
1,ARG,Sinopharm/Beijing,South America,"MULTIPOLYGON (((-68.63401 -52.63637, -68.25000...",31.184568,-7.919683
2,ARG,Sputnik V,South America,"MULTIPOLYGON (((-68.63401 -52.63637, -68.25000...",31.184568,-7.919683
3,ARG,Pfizer/BioNTech,South America,"MULTIPOLYGON (((-68.63401 -52.63637, -68.25000...",31.184568,-7.919683
4,ARG,CanSino,South America,"MULTIPOLYGON (((-68.63401 -52.63637, -68.25000...",31.184568,-7.919683
...,...,...,...,...,...,...
176,USA,Moderna,North America,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000...",39.783730,-100.445882
177,USA,Pfizer/BioNTech,North America,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000...",39.783730,-100.445882
178,USA,Johnson&Johnson,North America,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000...",39.783730,-100.445882
179,ZAF,Johnson&Johnson,Africa,"POLYGON ((16.34498 -28.57671, 16.82402 -28.082...",13.342138,44.293177


In [17]:
#interactive world map for countries that have administered specific vaccines
def on_manuf_change(Manufacturer):
    df_manuf=countryByMan[countryByMan['Vaccine_Manufacturer'] == Manufacturer]
    fig,ax = plt.subplots(figsize=(15,15))
    df_manuf.plot(ax=ax,edgecolor='black',linewidth=1)
    world.plot(ax=ax,alpha=0.3,color='gray')
    print('Number of Countries: ' + str(len(df_manuf)))


interact(on_manuf_change,Manufacturer=list(set(countryByMan['Vaccine_Manufacturer'])))


interactive(children=(Dropdown(description='Manufacturer', options=('Novavax', 'Covaxin', 'Medicago', 'Sinopha…

<function __main__.on_manuf_change(Manufacturer)>

In [18]:
#Create DataFrame of Country Count for each Manuf
countryByManuf = pd.DataFrame(countryByMan.groupby('Vaccine_Manufacturer')['code'].unique())
numManufCountry = pd.DataFrame(countryByMan.groupby('Vaccine_Manufacturer')['code'].nunique())
numManufCountry = pd.merge(numManufCountry,countryByManuf,on='Vaccine_Manufacturer')
numManufCountry.reset_index(inplace=True)
numManufCountry.columns = ['Vaccine Manufacturer','Number of Countries','Countries']
numManufCountry['Countries'] = numManufCountry['Countries'].apply(lambda x: ','.join(map(str,x)))
numManufCountry

Unnamed: 0,Vaccine Manufacturer,Number of Countries,Countries
0,CanSino,3,"ARG,CHL,ECU"
1,Covaxin,2,"CZE,PRT"
2,Johnson&Johnson,30,"AUT,BEL,BGR,CAN,CHE,CYP,CZE,DEU,DNK,ESP,EST,FI..."
3,Medicago,1,CAN
4,Moderna,34,"ARG,AUT,BEL,BGR,CAN,CHE,CHL,CYP,CZE,DEU,DNK,ES..."
5,Novavax,21,"AUT,BEL,CAN,CHE,CYP,CZE,DEU,EST,FIN,HRV,IRL,IT..."
6,Oxford/AstraZeneca,34,"ARG,AUT,BEL,BGR,CAN,CHL,CYP,CZE,DEU,DNK,ECU,ES..."
7,Pfizer/BioNTech,37,"ARG,AUT,BEL,BGR,CAN,CHE,CHL,CYP,CZE,DEU,DNK,EC..."
8,Sinopharm/Beijing,7,"ARG,CZE,HUN,LVA,NPL,PER,PRT"
9,Sinovac,7,"CHL,CZE,ECU,LVA,PRT,UKR,URY"


In [21]:
#Treemap
tm = px.treemap(numManufCountry,path=['Vaccine Manufacturer','Countries'],values='Number of Countries',color='Number of Countries',color_continuous_scale='RdBu')
tm.data[0].textinfo = 'label+value'
#fig.data[0].customdata = numManufCountry['Countries']
tm.data[0].hovertemplate = 'Manufacturer=%{label}<br>Number of Countries=%{value}'
tm.show()

In [20]:
lp = px.scatter(df, x='Date', y='Total_Vaccinations',color = 'Country')
lp.show()

In [20]:
#Find latest date for each country/manuf pair
test = df.loc[df.groupby(['Country','Vaccine_Manufacturer']).Date.idxmax()]

In [21]:
test

Unnamed: 0,Country,Date,Vaccine_Manufacturer,Total_Vaccinations,Severe Disease Ancestral,Infection Ancestral,Severe Disease Alpha,Infection Alpha,Severe Disease Beta,Infection Beta,...,Infection Gamma,Severe Disease Delta,Infection Delta,Severe Disease Omicron,Infection Omicron,code,continent,geometry,lat,long
3197,Argentina,2022-10-15,CanSino,979735,646625,607436,646625,607436,627030,597638,...,597638,627030,597638,470273,313515,ARG,South America,"MULTIPOLYGON (((-68.63401 -52.63637, -68.25000...",31.184568,-7.919683
3201,Argentina,2022-10-16,Moderna,13252207,12854641,12192030,12854641,12192030,12854641,12059508,...,12059508,12854641,12059508,9674111,6361059,ARG,South America,"MULTIPOLYGON (((-68.63401 -52.63637, -68.25000...",31.184568,-7.919683
3188,Argentina,2022-10-13,Oxford/AstraZeneca,26759220,25153667,16858309,25153667,16858309,25153667,18463862,...,18463862,25153667,18463862,18999046,9633319,ARG,South America,"MULTIPOLYGON (((-68.63401 -52.63637, -68.25000...",31.184568,-7.919683
3202,Argentina,2022-10-16,Pfizer/BioNTech,19136214,18179403,16457144,18179403,16457144,18179403,16074420,...,16074420,18179403,16074420,13778074,8419934,ARG,South America,"MULTIPOLYGON (((-68.63401 -52.63637, -68.25000...",31.184568,-7.919683
3203,Argentina,2022-10-16,Sinopharm/Beijing,28878166,21081061,19637153,21081061,19637153,20503498,19348371,...,19348371,20503498,19348371,15305428,10107358,ARG,South America,"MULTIPOLYGON (((-68.63401 -52.63637, -68.25000...",31.184568,-7.919683
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33404,United States,2022-10-12,Moderna,235757021,228684310,216896459,228684310,216896459,228684310,214538889,...,214538889,228684310,214538889,172102625,113163370,USA,North America,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000...",39.783730,-100.445882
33405,United States,2022-10-12,Pfizer/BioNTech,372455530,353832754,320311756,353832754,320311756,353832754,312862645,...,312862645,353832754,312862645,268167982,163880433,USA,North America,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000...",39.783730,-100.445882
34586,Uruguay,2022-10-12,Oxford/AstraZeneca,91141,85673,57419,85673,57419,85673,62887,...,62887,85673,62887,64710,32811,URY,South America,"POLYGON ((-57.62513 -30.21629, -56.97603 -30.1...",48.342555,2.604870
34591,Uruguay,2022-10-17,Pfizer/BioNTech,2556226,2428415,2198354,2428415,2198354,2428415,2147230,...,2147230,2428415,2147230,1840483,1124739,URY,South America,"POLYGON ((-57.62513 -30.21629, -56.97603 -30.1...",48.342555,2.604870
