## Preprocessing: Clean data into JSON 
Source: https://towardsdatascience.com/a-complete-guide-to-an-interactive-geographical-map-using-python-f4c5197e23e0#_=_

In [70]:
import pandas as pd
import geopandas as gpd
import json

In [71]:
data = pd.read_csv('../data/clean_data.csv', index_col = 0)
data.head()

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website,population
0,Albania,ALB,2021-01-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/covid19-ministria-e...,2854191.0
1,Albania,ALB,2021-01-11,0.0,0.0,0.0,64.0,0.0,0.0,0.0,22.423166,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/covid19-ministria-e...,2854191.0
2,Albania,ALB,2021-01-12,128.0,128.0,0.0,64.0,0.0,0.0,0.0,22.423166,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/covid19-ministria-e...,2854191.0
3,Albania,ALB,2021-01-13,188.0,188.0,0.0,63.0,0.01,0.01,0.0,22.072805,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/covid19-ministria-e...,2854191.0
4,Albania,ALB,2021-01-14,266.0,266.0,0.0,66.0,0.01,0.01,0.0,23.12389,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/covid19-ministria-e...,2854191.0


In [72]:
total_vaccinations = data[['country', 'iso_code', 'date', 'total_vaccinations', 'population']]
daily_vaccinations = data[['country', 'iso_code', 'date', 'daily_vaccinations', 'population']]
fully_vaccinations = data[['country', 'iso_code', 'date', 'people_fully_vaccinated', 'population']]

Import geo-location data for world map drawing

In [73]:
shape = '../data/world_shape/ne_110m_admin_0_countries.shp'
gdf = gpd.read_file(shape)[['ADMIN', 'ADM0_A3', 'geometry']]
gdf.columns = ['geo_country', 'country_code', 'geometry']
gdf.head()

Unnamed: 0,geo_country,country_code,geometry
0,Fiji,FJI,"MULTIPOLYGON (((180.00000 -16.06713, 180.00000..."
1,United Republic of Tanzania,TZA,"POLYGON ((33.90371 -0.95000, 34.07262 -1.05982..."
2,Western Sahara,SAH,"POLYGON ((-8.66559 27.65643, -8.66512 27.58948..."
3,Canada,CAN,"MULTIPOLYGON (((-122.84000 49.00000, -122.9742..."
4,United States of America,USA,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000..."


To prep for `Bokeh`, we need to have a final data format of GeoJSON

In [74]:
fully_geo_vaccinations = gdf.merge(fully_vaccinations, left_on = 'country_code', right_on = 'iso_code', how = 'left')

In [75]:
fully_vaccinations = fully_geo_vaccinations.to_json()

## Plotting using Bokeh 

In [76]:
from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.palettes import brewer

## Total Vaccinations per Capita (Bar Graph)

In [77]:
total_vaccinations.loc[:, 'total_vaccinations_per_capita'] = total_vaccinations.loc[:, 'total_vaccinations']/total_vaccinations.loc[:,'population']
total_vaccinations.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)


Unnamed: 0,country,iso_code,date,total_vaccinations,population,total_vaccinations_per_capita
0,Albania,ALB,2021-01-10,0.0,2854191.0,0.0
1,Albania,ALB,2021-01-11,0.0,2854191.0,0.0
2,Albania,ALB,2021-01-12,128.0,2854191.0,4.5e-05
3,Albania,ALB,2021-01-13,188.0,2854191.0,6.6e-05
4,Albania,ALB,2021-01-14,266.0,2854191.0,9.3e-05


In [78]:
total_vaccinations.loc[:, 'date'] = pd.to_datetime(total_vaccinations.loc[:, 'date'], format='%Y-%m-%d')
most_recent_date = max(total_vaccinations['date'])
total_vaccinations = total_vaccinations[total_vaccinations['date'] == most_recent_date]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)


In [79]:
total_vaccinations_plot = total_vaccinations[['country', 'total_vaccinations_per_capita']]

In [80]:
from bokeh.plotting import figure, output_file, show
import bokeh.io

sorted_total_plot = total_vaccinations_plot.sort_values(by = 'total_vaccinations_per_capita', ascending = True)

countries = sorted_total_plot['country'].to_list()

p = figure(x_axis_label= 'Total Vaccinations per Capita (By Country)', x_range = [0, 1.5], y_range = countries, tools = '')

p.hbar(source = sorted_total_plot, y = 'country', right = 'total_vaccinations_per_capita', height = 0.6)
p.ygrid.grid_line_color = None

bokeh.io.show(p)

In [65]:
total_vaccinations_plot.sort_values(by = 'total_vaccinations_per_capita', ascending = False)

Unnamed: 0,country,total_vaccinations_per_capita
1882,Gibraltar,1.262514
2450,Israel,0.954958
4644,United Arab Emirates,0.641302
365,Bahrain,0.308632
2372,Isle of Man,0.287395
2963,Maldives,0.27454
4804,United States,0.26783
1007,Chile,0.243417
4008,Serbia,0.237586
1622,Faeroe Islands,0.162805


### Fully Vaccinated World Map (Heat Map)

## Daily Vaccination per Capita (Line Graph)

In [40]:
counts

array([2.99921460e-03, 9.04547639e-02, 3.08632489e-01, 9.22271152e-03,
       5.01140284e-02, 4.11593803e-02, 4.05464045e-03, 6.19757313e-02,
       2.43416882e-01, 5.37175193e-03, 7.67078090e-02, 3.49442655e-02,
       1.62804552e-01, 1.02431854e-01, 1.26251447e+00, 1.01877211e-01,
       1.31932521e-01, 1.53118210e-02, 2.87394779e-01, 9.54958192e-01,
       8.82199291e-02, 4.28912964e-02, 8.85976411e-03, 1.06145159e-01,
       2.74540308e-01, 2.16797455e-02, 1.23179026e-01, 4.58537141e-02,
       1.04056177e-01, 9.85481454e-02, 4.56030463e-02, 1.25840365e-02,
       3.93443593e-03, 2.37586168e-01, 9.58529525e-02, 6.08511872e-03,
       3.32191900e-02, 1.18466410e-01, 3.83844553e-04, 6.41302022e-01,
       2.67829791e-01, 2.18593219e-03])