In [17]:
import pandas as pd
import numpy as np
import geopandas as gpd

from geopandas import GeoDataFrame

import matplotlib.pyplot as plt
import descartes
from mpl_toolkits.axes_grid1 import make_axes_locatable

from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

import matplotlib as mpl
import seaborn as sns
# This is just a bit of formatting for our plots to make them look a bit nicer than 
# the default
sns.set_context("talk", font_scale=1, rc={"lines.linewidth": 2.0, 'lines.markersize': 5})
sns.set_style("ticks")
sns.set_style({"xtick.direction": "in","ytick.direction": "in"})

tw = 1.5
sns.set_style({"xtick.major.size": 6, "ytick.major.size": 6,
               "xtick.minor.size": 4, "ytick.minor.size": 4,
               'axes.labelsize': 18,
               'xtick.major.width': tw, 'xtick.minor.width': tw,
               'ytick.major.width': tw, 'ytick.minor.width': tw})

mpl.rc('xtick', labelsize= 18) 
mpl.rc('ytick', labelsize= 18)
mpl.rc('axes', linewidth=1.75)
# Setting this font type allows the figure object to be edited in Illustrator / Affinity 
# to change font size, color, etc.
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
plt.gcf().subplots_adjust(bottom=.5)
sns.set_style({'axes.labelsize': 50})

# This enables high res graphics inline
%matplotlib inline
%config InlineBackend.figure_format = 'svg'

import warnings; warnings.simplefilter('ignore')

import bokeh
from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.palettes import brewer
from bokeh.models import HoverTool 
import bokeh.models as bmo
import bokeh.plotting as bpl
from bokeh.palettes import d3
from bokeh.models import ColumnDataSource
from bokeh.transform import jitter
from bokeh.models import Plot, Range1d, BasicTickFormatter, LinearAxis, LogTicker, FixedTicker, FuncTickFormatter


bpl.output_notebook()

In [18]:
shapefile = 'ne_10m_admin_1_states_provinces/ne_10m_admin_1_states_provinces.shp'
afghan_district_shapefile = 'afg_admbndp_region_unama_agcho_itos_20180522/afg_admbndp_region_unama_agcho_itos_20180522.shp'
gdf = gpd.read_file(shapefile)

polis_df = pd.read_csv('polis.es.cases.comb.csv')

case_cluster_ref = pd.read_csv('Case_Cluster_Information_17-19_CLEAN.csv', encoding = "ISO-8859-1")

In [19]:
reference_polis_df = pd.read_csv('polis.cases.csv')

In [20]:
def get_more_clusters(polis_df):
    virus_clusters = []

    for index, row in polis_df.iterrows():
        if type(row['Cluster']) == float:
            if len(reference_polis_df.loc[reference_polis_df['AnonymousEPID'] == row['UniqueId']]) > 0:
                cluster = reference_polis_df.loc[reference_polis_df['AnonymousEPID'] == row['UniqueId']]['Cluster'].values[0]
                
                if type(cluster) != float:
                    virus_clusters.append(cluster)
                else:
                    virus_clusters.append(row['Cluster']) 
            else:
                virus_clusters.append(row['Cluster'])
        else:
            virus_clusters.append(row['Cluster'])

    polis_df['Virus_cluster'] = virus_clusters
    
    return polis_df

In [21]:
new_df = get_more_clusters(polis_df)

In [22]:
polis_df.dropna(subset=['Virus_cluster'], inplace=True)

lst_col = 'Virus_cluster'
x = polis_df.assign(**{lst_col:polis_df[lst_col].str.split('+')})

enumerated_polis_df = pd.DataFrame({col:np.repeat(x[col].values, x[lst_col].str.len())
                       
                       for col in x.columns.difference([lst_col])
                    }).assign(**{lst_col:np.concatenate(x[lst_col].values)})[x.columns.tolist()]

enumerated_polis_df['Cluster'] = enumerated_polis_df['Virus_cluster'].str.lstrip()

In [23]:
unique_virus_clusters = list(np.sort(list(set(enumerated_polis_df['Virus_cluster'].values))))

print('Number of unique virus clusters = ', len(unique_virus_clusters))

colors = bokeh.palettes.magma(len(unique_virus_clusters))

color_column = []
for index, row in enumerated_polis_df.iterrows():
    color_column.append(colors[unique_virus_clusters.index(row['Virus_cluster'])])
    
enumerated_polis_df['Cluster_hex_color'] = color_column

Number of unique virus clusters =  22


In [24]:
es_df = enumerated_polis_df[enumerated_polis_df.Source == 'ES']
patients_df = enumerated_polis_df[enumerated_polis_df.Source == 'Case']

In [25]:
print('Number of ES with cluster information = ', len(es_df))

Number of ES with cluster information =  607


In [26]:
print('Number of patients with cluster information = ', len(patients_df))

Number of patients with cluster information =  228


In [42]:
afg_df = gdf[gdf['adm1_code'].str.contains('AFG')]
pak_df = gdf[gdf['adm1_code'].str.contains('PAK')]
afg_and_pak_df = gdf[gdf['adm1_code'].str.contains('AFG|PAK')]


afg_and_pak_df['Artificial_vaccine_coverage'] = np.random.rand(42)

In [43]:
lqas_df = pd.read_csv('polis.lqas.csv')
lqas_provinces = list(set(lqas_df['Province'].values))

gis_provinces = [x.upper() for x in afg_and_pak_df['name'].values]

gis_provinces_list = []
vaccination_coverage_list = []
for gis_province in gis_provinces:
    subset_lqas = lqas_df.loc[lqas_df['Province'] == gis_province]
    percent_vaccinated = 1 - np.sum(subset_lqas['Children Unvaccinated']) / np.sum(subset_lqas['Children Checked'])
    gis_provinces_list.append(gis_province)
    vaccination_coverage_list.append(percent_vaccinated)
    
afg_and_pak_df['Vaccination_coverage'] = vaccination_coverage_list

In [44]:
afg_and_pak_df['Vaccination_coverage'].fillna(0.8, inplace=True)

In [45]:
geosource = GeoJSONDataSource(geojson = afg_and_pak_df.to_json())

In [53]:
## Create figure object.

es_source = ColumnDataSource(data=es_df)
patient_source = ColumnDataSource(data=patients_df)

p = figure(title = 'Polio in AFG and PAK', plot_height = 600 ,
           plot_width = 950, toolbar_location = 'below',
           tools = "pan, wheel_zoom, box_zoom, reset")
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.yaxis.axis_label = 'Latitude'
p.xaxis.axis_label = 'Longitude'


# Add patch renderer to figure.
states = p.patches('xs','ys', source = geosource,
                   fill_color = 'white',
                   line_color = 'gray', line_width = 1, fill_alpha = 0.5)


es_scatter = p.scatter(x=jitter('Longitude', 0.01), y=jitter('Latitude', 0.01), source=es_source, size=20,
                      fill_color='Cluster_hex_color', line_color='Cluster_hex_color', marker='diamond', fill_alpha=0.5,
                      legend_label='Environmental_sample')

patient_scatter = p.scatter(x='Longitude', y='Latitude', source=patient_source, size=10,
                            fill_color='Cluster_hex_color', line_color='Cluster_hex_color', fill_alpha=0.5)
#,
                            #legend_field='Virus_cluster')

    
hover1 = HoverTool(renderers=[es_scatter, patient_scatter])
hover1.tooltips = [("cluster", '@Virus_cluster')]


hover2 = HoverTool(renderers=[states])
hover2.tooltips = [("Province", '@name')]
    
    
p.add_tools(hover1)
p.add_tools(hover2)

show(p)

In [54]:
## Create figure object.

es_source = ColumnDataSource(data=es_df)
patient_source = ColumnDataSource(data=patients_df)

p = figure(title = 'Polio in AFG and PAK', plot_height = 600 ,
           plot_width = 950, toolbar_location = 'below',
           tools = "pan, wheel_zoom, box_zoom, reset")
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.yaxis.axis_label = 'Latitude'
p.xaxis.axis_label = 'Longitude'


color_mapper = color_mapper = LinearColorMapper(palette = bokeh.palettes.grey(32)[::-1], low = 0.8, high = 1)

ticker = FixedTicker(ticks=[0.8,.85,.90,.95,1])

tick_labels = {'0.8': '80% Vaccination', '0.85': '85% Vaccination', '0.9': '90% Vaccination',
               '0.95': '95% Vaccination', '1': '100% Vaccination'}

# Add patch renderer to figure.
states = p.patches('xs','ys', source = geosource,
                   fill_color = {'field' :'Vaccination_coverage', 'transform' : color_mapper},
                   line_color = 'gray', line_width = 2, fill_alpha = 0.5)


es_scatter = p.scatter(x=jitter('Longitude', 0.01), y=jitter('Latitude', 0.01), source=es_source, size=20,
                      fill_color='Cluster_hex_color', line_color='Cluster_hex_color', marker='diamond', fill_alpha=0.5,
                      legend_label='Environmental_sample')

patient_scatter = p.scatter(x='Longitude', y='Latitude', source=patient_source, size=10,
                            fill_color='Cluster_hex_color', line_color='Cluster_hex_color', fill_alpha=0.5)
#,
                            #legend_field='Virus_cluster')


color_bar = ColorBar(color_mapper=color_mapper, label_standoff=30,width = 50, height = 500,
                     border_line_color='white', orientation = 'vertical', ticker=ticker, major_label_overrides=tick_labels)

    
hover1 = HoverTool(renderers=[es_scatter, patient_scatter])
hover1.tooltips = [("cluster", '@Virus_cluster')]


hover2 = HoverTool(renderers=[states])
hover2.tooltips = [("Vaccine coverage", '@Vaccination_coverage'), ("Province", '@name')]
    
    
p.add_tools(hover1)
p.add_tools(hover2)
p.add_layout(color_bar, 'left')

show(p)

In [55]:
## Create figure object.

es_source = ColumnDataSource(data=es_df)
patient_source = ColumnDataSource(data=patients_df)

p = figure(title = 'Polio in AFG and PAK', plot_height = 600 ,
           plot_width = 950, toolbar_location = 'below',
           tools = "pan, wheel_zoom, box_zoom, reset")
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.yaxis.axis_label = 'Latitude'
p.xaxis.axis_label = 'Longitude'


color_mapper = color_mapper = LinearColorMapper(palette = bokeh.palettes.grey(32)[::-1], low = 0.8, high = 1)

ticker = FixedTicker(ticks=[0.8,.85,.90,.95,1])

tick_labels = {'0.8': '80% Vaccination', '0.85': '85% Vaccination', '0.9': '90% Vaccination',
               '0.95': '95% Vaccination', '1': '100% Vaccination'}

# Add patch renderer to figure.
states = p.patches('xs','ys', source = geosource,
                   fill_color = {'field' :'Vaccination_coverage', 'transform' : color_mapper},
                   line_color = 'gray', line_width = 2, fill_alpha = 0.5)


es_scatter = p.scatter(x=jitter('Longitude', 0.01), y=jitter('Latitude', 0.01), source=es_source, size=20,
                      fill_color='Cluster_hex_color', line_color='Cluster_hex_color', marker='diamond', fill_alpha=0.5,
                      legend_label='Environmental_sample')

patient_scatter = p.scatter(x='Longitude', y='Latitude', source=patient_source, size=10,
                            fill_color='Cluster_hex_color', line_color='Cluster_hex_color', fill_alpha=0.5,
                            legend_field='Virus_cluster')


color_bar = ColorBar(color_mapper=color_mapper, label_standoff=30,width = 50, height = 500,
                     border_line_color='white', orientation = 'vertical', ticker=ticker, major_label_overrides=tick_labels)

    
hover1 = HoverTool(renderers=[es_scatter, patient_scatter])
hover1.tooltips = [("cluster", '@Virus_cluster')]


hover2 = HoverTool(renderers=[states])
hover2.tooltips = [("Vaccine coverage", '@Vaccination_coverage'), ("Province", '@name')]
    
    
p.add_tools(hover1)
p.add_tools(hover2)
p.add_layout(color_bar, 'left')

show(p)