## Preprocessing: Clean data into JSON 
Source: https://towardsdatascience.com/a-complete-guide-to-an-interactive-geographical-map-using-python-f4c5197e23e0#_=_

In [158]:
import pandas as pd
import geopandas as gpd
import json

In [159]:
data = pd.read_csv('../data/clean_data.csv', index_col = 0)
data.head()

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website,population
0,Albania,ALB,2021-01-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/covid19-ministria-e...,2854191.0
1,Albania,ALB,2021-01-11,0.0,0.0,0.0,64.0,0.0,0.0,0.0,22.423166,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/covid19-ministria-e...,2854191.0
2,Albania,ALB,2021-01-12,128.0,128.0,0.0,64.0,0.0,0.0,0.0,22.423166,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/covid19-ministria-e...,2854191.0
3,Albania,ALB,2021-01-13,188.0,188.0,0.0,63.0,0.01,0.01,0.0,22.072805,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/covid19-ministria-e...,2854191.0
4,Albania,ALB,2021-01-14,266.0,266.0,0.0,66.0,0.01,0.01,0.0,23.12389,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/covid19-ministria-e...,2854191.0


In [160]:
fully_vaccinations = data[['country', 'iso_code', 'date', 'people_fully_vaccinated', 'population']]

In [161]:
fully_vaccinations.loc[:, 'fully_vac_per_capita'] = fully_vaccinations.loc[:, 'people_fully_vaccinated']/fully_vaccinations.loc[:, 'population']
fully_vaccinations.loc[:, 'date'] = pd.to_datetime(fully_vaccinations.loc[:, 'date'], format='%Y-%m-%d')
fully_vaccinations = fully_vaccinations[['country', 'iso_code', 'date', 'fully_vac_per_capita']]
#max_full_vaccinations = fully_vaccinations.groupby('country', 'iso_code')['fully_vac_per_capita'].max().sort_values()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)


In [162]:
max_full_vaccinations = fully_vaccinations.groupby('iso_code')['fully_vac_per_capita'].max().sort_values()

In [163]:
fully_vaccinated_df=pd.DataFrame({'iso_code':max_full_vaccinations.index.to_list(), 'fully_vac_per_capita': max_full_vaccinations.to_list()})

Import geo-location data for world map drawing

In [164]:
shape = '../data/world_shape/ne_110m_admin_0_countries.shp'
gdf = gpd.read_file(shape)[['ADMIN', 'ADM0_A3', 'geometry']]
gdf.columns = ['geo_country', 'country_code', 'geometry']
gdf = gdf[gdf['geo_country']!= 'Antarctica']

To prep for `Bokeh`, we need to have a final data format of GeoJSON

In [165]:
fully_geo_vaccinations = gdf.merge(fully_vaccinated_df, left_on = 'country_code', right_on = 'iso_code', how = 'left')

In [166]:
fully_geo_vaccinations.head()

Unnamed: 0,geo_country,country_code,geometry,iso_code,fully_vac_per_capita
0,Fiji,FJI,"MULTIPOLYGON (((180.00000 -16.06713, 180.00000...",,
1,United Republic of Tanzania,TZA,"POLYGON ((33.90371 -0.95000, 34.07262 -1.05982...",,
2,Western Sahara,SAH,"POLYGON ((-8.66559 27.65643, -8.66512 27.58948...",,
3,Canada,CAN,"MULTIPOLYGON (((-122.84000 49.00000, -122.9742...",CAN,0.01501
4,United States of America,USA,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000...",USA,0.090715


In [167]:
fully_vaccinations = fully_geo_vaccinations.to_json()

## Plotting using Bokeh 

### Fully Vaccinated World Map (Heat Map)

In [173]:
from bokeh.io import output_notebook, show, output_file, save
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, ColorBar
from bokeh.palettes import RdYlGn, Spectral6
from bokeh.transform import linear_cmap

#Input GeoJSON source that contains features for plotting.
geosource = GeoJSONDataSource(geojson = fully_vaccinations)

#Define a continuous multi-hue color palette. Reverse color order so that we go from red to green.
palette = RdYlGn[8]

palette = palette[::-1]
mapper = linear_cmap(field_name='fully_vac_per_capita', palette=palette ,low=0 ,high=0.1)
#Create color bar. 
color_bar = ColorBar(color_mapper=mapper['transform'], label_standoff=8,width = 500, height = 20,
border_line_color=None,location = (200,0), orientation = 'horizontal')

#Create figure object.
p = figure(title = 'Proportion of adults fully vaccinated against COVID-19', plot_height = 600 , plot_width = 1000, tools = 'hover')
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.title.align = "center"
#Add patch renderer to figure. 
p.patches('xs','ys', source = geosource,fill_color = mapper,
          line_color = 'black', line_width = 0.25, fill_alpha = 1)
#Specify figure layout.
p.add_layout(color_bar, 'below')
#Display figure inline in Jupyter Notebook.
output_notebook()
#Display figure.
output_file('fully_vac_map.html')
save(p)

'C:\\Users\\frank\\OneDrive\\Documents\\Data Projects\\covid-19-vaccines\\scripts\\fully_vac_map.html'