In [1]:
import os
import folium
from os.path import join

import pandas as pd
import geopandas as gpd
from folium import Map
from shapely.geometry import mapping

from covidcaremap.data import external_data_path, processed_data_path

In [2]:
geoc = gpd.read_file(processed_data_path('dh_geocoded_v1_0326202.geojson'))
geoc.sort_values(by = 'orig_index', inplace= True)

In [3]:
orig = gpd.read_file(processed_data_path('dh_orig_for_mapping_v1_0326202.geojson'))
orig.sort_values(by = 'orig_index', inplace= True)

In [4]:
no_orig = geoc[~geoc['orig_index'].isin(orig['orig_index'])].copy()
geoc = geoc[geoc['orig_index'].isin(orig['orig_index'])]
geoc['distance'] = geoc['distance'].apply(lambda x: round(float(x)))

In [5]:
def gdf_for_folium(gdf):
    info = []
    
    for _, r in gdf.iterrows():
        coords = mapping(r.geometry)['coordinates']
        coords = (coords[1], coords[0])
        name = "<b>Name:</b> {}".format(r['HOSP10_Name'])
        addr = "<b>Addr:</b> {} nm sq".format(r['Street_Addr'])
        state = "<b>State:</b> {}".format(r['STATE_NAME'])
        county = "<b>County:</b> {}".format(r['COUNTY_NAME'])
        city = "<b>City:</b> {}".format(r['CITY_NAME'])
        zip_code = "<b>Zip Code:</b> {}".format(r['ZIP_CODE'])
        if 'distance' in r and r['distance']:
            x = round(r['distance'])
        else:
            x = 'N/A'
        distance = "<b>Distance discrepancy:</b> {} meters".format(x)
        
        g_source = "<b>Geocode source:</b> {}".format('geocoded:' + r['source'] if 'source' in r else 'original')
        c_source = "<b>Validation source:</b> {}".format(r['confirmation_source'] if 'confirmation_source' in r else 'N/A')
        
        tooltip = '<br>'.join([name, addr, state, county, city, zip_code, distance, g_source, c_source])
        info.append((coords, tooltip))
        
    return info

In [6]:
class HospMap(Map):
    def __init__(self, location, zoom_start):
        super().__init__(location, zoom_start=zoom_start)
        self.has_layer_control = False
    
    def add_point_subset(self, gdf, name, color):
        fg = folium.FeatureGroup(name, show=True)
        self.add_points(fg, gdf, color)
        self.add_child(fg)
    
    
    def add_layer_selector(self):
        self.add_child(folium.map.LayerControl(collapsed=False))
    
    def add_set(self, gdf1, gdf2, name, colors, lines=True, sort_col='OBJECTID'):
        fg = folium.FeatureGroup(name, show=True)
        self.add_points(fg, gdf2, colors[1])
        self.add_points(fg, gdf1, colors[0])
                
        
        if lines:
            gdf1.sort_values(sort_col, inplace=True)
            gdf2.sort_values(sort_col, inplace=True)
            self.add_connecting_lines(fg, gdf1, gdf2)
        
        self.add_child(fg)
    
    def __repr__(self):
        if not self.has_layer_control:
            self.add_layer_selector()
        return ''
    
    @staticmethod
    def add_connecting_lines(fg, gdf1, gdf2):
        l = list(zip([i for i, _ in gdf_for_folium(gdf1)], [i for i, _ in gdf_for_folium(gdf2)]))
        for ll in l:
            fg.add_child(folium.PolyLine(ll, color='grey', opacity=0.5))
    
    @staticmethod
    def add_points(fg, gdf, color):
        for coords, tt in gdf_for_folium(gdf):
            fg.add_child(folium.CircleMarker(
                coords, 
                control=True, 
                fill_color=color, 
                fill_opacity=0.25,
                color=color, 
                weight=1.5,
                tooltip = tt))

In [7]:
m = HospMap((39.8333333,-98.585522), 4)

all points

In [8]:
# m.add_set(geoc, orig, 'All DH',  ('purple', 'orange'), show=False)
m.add_point_subset(no_orig, 'No DH geom', 'blue')

greater than 1km discrepancy

In [9]:
gt_1km_geoc = geoc[geoc['distance'] > 1000].copy()
gt_1km_orig = orig[orig['distance'] > 1000].copy()
m.add_set(gt_1km_geoc, gt_1km_orig, '> 1km discrepancy',  ('purple', 'orange'))

validated by state

In [10]:
state_val_geoc = geoc[geoc['confirmation_source'] == 'state'].copy()
state_val_orig = orig[orig['orig_index'].isin(state_val_geoc['orig_index'])].copy()
m.add_set(state_val_geoc, state_val_orig, 'Validated by state',  ('purple', 'orange'))

mapbox

In [11]:
mapbox_val_geoc = geoc[geoc['source'] == 'mapbox'].copy()
mapbox_val_orig = orig[orig['orig_index'].isin(mapbox_val_geoc['orig_index'])].copy()
m.add_set(mapbox_val_geoc, mapbox_val_orig, 'Mapbox geocoded',  ('purple', 'orange'))

top 50

In [12]:
top_50_geoc = geoc.sort_values(by = 'distance', ascending=False).head(50).copy()
top_50_orig = orig.sort_values(by = 'distance', ascending=False).head(50).copy()
m.add_set(top_50_geoc, top_50_orig, 'Top 50 most extreme discrepancies', ('purple', 'orange'))

In [13]:
m