In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_1

import pandas as pd
import numpy as np
import altair as alt
from altair_saver import save
import datetime
import dateutil.parser
from os.path import join

from constants_1_1 import SITE_FILE_TYPES
from utils_1_1 import (
    get_site_file_paths,
    get_site_file_info,
    get_site_ids,
    get_visualization_subtitle,
    get_country_color_map,
    apply_theme,
)
from web import for_website

alt.data_transformers.disable_max_rows(); # Allow using rows more than 5000

# Data Preprocessing

## Site Descriptions From Figshare

In [None]:
DATA_RELEASE = '2021-02-04'

In [None]:
sites_df = pd.read_csv(join("..", "data", "Health_Systems_Participating_1.1.tsv"), sep='\t', skiprows=2, header=None, thousands=',')
sites_column_map = {
    0: "site_name",
    1: "siteid",
    2: "city",
    3: "country",
    4: "patient_type",
    6: "adult_num_hosp",
    7: "adult_num_beds",
    8: "adult_num_yearly_discharge",
    10: "ped_num_hosp",
    11: "ped_num_beds",
    12: "ped_num_yearly_discharge",
    32: "lat",
    33: "lon"
}
sites_df = sites_df.rename(columns=sites_column_map)
sites_df = sites_df[list(sites_column_map.values())]
sites_df = sites_df.dropna(subset=["site_name"])
# sites_df = sites_df.dropna(subset=["lat", "lon"])
sites_df = sites_df.set_index("siteid")
sites_df["country"] = sites_df["country"].replace({"UK": "United Kingdom"})#.apply(lambda x: x.upper())
sites_df

## Participating Sites

In [None]:
registered_df = pd.read_csv(join("..", "data", "site.register_1.1.csv"))
registered = registered_df.SiteID.unique().tolist()

sites_df = sites_df.reset_index()
sites_df = sites_df[sites_df.siteid.isin(registered)]

if(len(sites_df.siteid.unique().tolist()) != len(registered)):
    print('somthing wrong')
    
sites_df

In [None]:
num_sites_na = sites_df.loc[sites_df["country"] == "USA"].shape[0]
num_sites_eur = sites_df.loc[sites_df["country"].isin(["Spain", "United Kingdom", "Italy", "France", "Germany"])].shape[0]
num_sites_asia = sites_df.loc[sites_df["country"].isin(["Singapore"])].shape[0]
num_sites_sa = sites_df.loc[sites_df["country"].isin(["Brazil"])].shape[0]

## Topology of Countries

In [None]:
topo_url = "https://raw.githubusercontent.com/danutzthe/topojson_world_map/master/world-110m.json"
countries = alt.topo_feature(topo_url, 'countries')

## Country Codes

In [None]:
code_df = pd.read_json(
    "https://raw.githubusercontent.com/alisle/world-110m-country-codes/master/world-110m-country-codes.json"
)

# Add a missing country
singapore_df = pd.DataFrame(data=[{"code": "Singapore", "id": 702, "name": "Singapore"}])
code_df = pd.concat([code_df, singapore_df])

## Merge Data

In [None]:
# Add custom encodigs to code_df
df = pd.DataFrame(data=[
    # Custom encoding for each city
    {"id": 250, "color": "#0072B2", "fill": "lightgray", "opacity": 1, "country": "France"},
    {"id": 276, "color": "#E69F00", "fill": "lightgray", "opacity": 1, "country": "Germany"},
    {"id": 380, "color": "#009E73", "fill": "lightgray", "opacity": 1, "country": "Italy"},
    {"id": 702, "color": "#CC79A7", "fill": "lightgray", "opacity": 1, "country": "Singapore"},
    {"id": 840, "color": "#D55E00", "fill": "lightgray", "opacity": 1, "country": "USA"},
    {"id": 826, "color": "#382633", "fill": "lightgray", "opacity": 1, "country": "United Kingdom"},
    {"id": 724, "color": "#A3333D", "fill": "lightgray", "opacity": 1, "country": "Spain"},
    {"id": 76, "color": "#0E3B43", "fill": "lightgray", "opacity": 1, "country": "Brazil"},
])
code_df = code_df.set_index("id")
df = df.set_index("id")
df = code_df.join(df).reset_index()
df.loc[df["opacity"] != 1, "opacity"] = 0.2 # default opacity

# Add site descriptions
df = df.set_index("country")
sites_df = sites_df.set_index("country")
df = df.join(sites_df).reset_index()

# Add lists of sites for each city
UNIQUE_CITIES = sites_df["city"].unique().tolist()
for c in UNIQUE_CITIES:
    f = df["city"] == c
    sites_str =  ", ".join(sites_df[sites_df["city"] == c]["site_name"].unique().tolist())
    df.loc[f, "sites"] = sites_str

# Visualization: Symbol Map

## Map Background

In [None]:
background = alt.Chart(countries).mark_geoshape(
    fill="lightgray",
    stroke='lightgray'
).encode(
    opacity=alt.Opacity("opacity:Q", legend=None),
#     color=alt.Color("Hospitals:Q")
).transform_lookup(
    lookup="id",
    from_=alt.LookupData(df, "id", ["opacity", "color", "Hospitals"])
)

background
# df

In [None]:
df = df.rename(columns={'country': 'Country', 'sites': 'Sites', 'city': 'City', 'adult_num_hosp': 'Hospitals', 'lon': 'Longitude', 'lat': 'Latitude', 'siteid': 'SiteID'})
df

## Symbols

In [None]:
# COUNTRIES = ["France", "Germany", "Italy", "Singapore", "USA", "United Kingdom", "Spain", "Brazil"]
# COUNTRY_COLOR = ["#0072B2", "#E69F00", "#009E73", "#CC79A7", "#D55E00", "#382633", "#A3333D", "#0E3B43"]

COUNTRIES = ['ALL', 'Brazil', 'France', 'Germany', 'Italy', 'Spain', 'USA']
COUNTRY_COLOR = ['black', '#CB7AA7', '#0072B2', '#E79F00', '#029F73', '#57B4E9', '#D45E00']

color_scale = alt.Scale(domain=COUNTRIES, range=COUNTRY_COLOR)

circle = alt.Chart(df).transform_aggregate(
    Latitude_Mean='mean(Latitude)',
    Longitude_Mean='mean(Longitude)',
    Hospitals_Total='sum(Hospitals)',
    Sites_Total='count()',
    groupby=['City','Country', 'Sites', 'SiteID']
).transform_filter(
    alt.datum["City"] != None
).mark_circle().encode(
    longitude='Longitude_Mean:Q',
    latitude='Latitude_Mean:Q',
    size=alt.Size(
        'Hospitals_Total:Q', 
        title='Number of Hospitals', 
        scale=alt.Scale(domain=[1, 40], range=[60, 400]),
        legend=None
    ),
    color=alt.Color(
        'Country:N', 
        scale=color_scale, 
        legend=None
    ),
    tooltip=[
        alt.Tooltip('Country:N'),
        alt.Tooltip('City:N'),
        alt.Tooltip('Sites:N', title="Participating sites"),
        alt.Tooltip('Sites_Total:Q', title="Number of sites"), 
        alt.Tooltip('Hospitals_Total:Q', title="Number of hospitals"),
    ]
)

labels = circle.mark_text(
    align='center',
    baseline='top',
    dy=7
).encode(
    longitude='Longitude_Mean:Q',
    latitude='Latitude_Mean:Q',
    text='SiteID',
    size=alt.value(8),
    color=alt.value('black'),
    opacity=alt.value(1)
)

## Map Visualizations by Continent

In [None]:
label_offsets = {
    # Custom dx and dy
    "Boston, MA": {"dx": 0, "dy": -12},
    "Bedford, MA": {"dx": 0, "dy": 13},
    "Ann Arbor, MI": {"dx": 0, "dy": -10},
    "Pittsburgh, PA": {"dx": 12, "dy": -15},
    "Philadelphia, PA": {"dx": 20, "dy": 10},
    "Bethlehem, PA": {"dx": 36, "dy": 0},
    "Kansas City, MO": {"dx": -36, "dy": 9},
    "Dallas, TX": {"dx": -24, "dy": 0},
    "Lexington, KY": {"dx": -16, "dy": 3},
    "Birmingham, AL": {"dx": -15, "dy": 10},
    "Bergamo": {"dx": 0, "dy": -10},
    "Charleston, SC": {"dx": 0, "dy": 13},
    "Newport News, VA": {"dx": 39, "dy": 3},
    "Chapel Hill, NC": {"dx": 13, "dy": 9},
    "Chicago, IL": {"dx": -20, "dy": 0},
    "Vancouver, WA": {"dx": 20, "dy": -12},
    "Mare Island, CA": {"dx": 20, "dy": -12},
    "Los Angeles, CA": {"dx": 0, "dy": 12},
    "Mesa, AZ": {"dx": 33, "dy": 0},
    "Minneapolis, MN": {"dx": 0, "dy": -12},
    "Arlington, TX": {"dx": -24, "dy": 12},
    "Hines, IL": {"dx": 0, "dy": -12},
    "Glendale, CO": {"dx": -5, "dy": -12},
    "Nashville, TN": {"dx": -32, "dy": 6}, 
    "Bronx, NY": {"dx": 15, "dy": 12}, 
    "Linthicum, MD": {"dx": 30, "dy": 2}, 
    "Durgham, NC": {"dx": 34, "dy": 2}, 
    "Cincinnati, OH": {"dx": 30, "dy": 8}, 
    "Long Beach, CA": {"dx": 0, "dy": -12}, 
}

labels_with_offset = labels.transform_filter(
    (alt.datum["City"] != "Long Beach, CA") &                                
    
    (alt.datum["City"] != "Cincinnati, OH") &                                
    (alt.datum["City"] != "Durgham, NC") &                            
    (alt.datum["City"] != "Linthicum, MD") &                            
    (alt.datum["City"] != "Bronx, NY") &                            
    (alt.datum["City"] != "Bedford, MA") &                        
    (alt.datum["City"] != "Nashville, TN") &                    
    (alt.datum["City"] != "Glendale, CO") &                
    (alt.datum["City"] != "Arlington, TX") &            
    (alt.datum["City"] != "Minneapolis, MN") &        
    (alt.datum["City"] != "Mesa, AZ") &    
    (alt.datum["City"] != "Los Angeles, CA") &
    (alt.datum["City"] != "Mare Island, CA") &
    (alt.datum["City"] != "Vancouver, WA") &
    (alt.datum["City"] != "Chicago, IL") &
    (alt.datum["City"] != "Boston, MA") & 
    (alt.datum["City"] != "Ann Arbor, MI") & 
    (alt.datum["City"] != "Pittsburgh, PA") & 
    (alt.datum["City"] != "Philadelphia, PA") & 
    (alt.datum["City"] != "Bethlehem, PA") &
    (alt.datum["City"] != "Kansas City, MO") &
    (alt.datum["City"] != "Dallas, TX") &
    (alt.datum["City"] != "Birmingham, AL") &
    (alt.datum["City"] != "Lexington, KY") & 
    (alt.datum["City"] != "Charleston, SC") &
    (alt.datum["City"] != "Chapel Hill, NC") &
    (alt.datum["City"] != "Newport News, VA")
)

for city in list(label_offsets.keys()):
    labels_with_offset += labels.transform_filter(
        alt.datum["City"] == city
    ).mark_text(dx=label_offsets[city]["dx"], dy=label_offsets[city]["dy"])

nor_am = ( background + circle + labels_with_offset ).project(
    type= 'mercator',
    scale= 280,
    center= [-126, 59],
    translate = [0, 0]
).properties(
    title={
        "text": 'Sites in North America',
#         "subtitle": get_visualization_subtitle(num_sites=num_sites_na, data_release=DATA_RELEASE),
        "subtitleColor": "gray"
    },
    width=300, height=300
)

nor_am

In [None]:
sou_am = ( background + circle + labels ).project(
    type= 'mercator',
    scale= 180,
    center= [-110, 23],
    translate = [0, 0]
).properties(
    title={
        "text": 'Sites in South America',
#         "subtitle": get_visualization_subtitle(num_sites=num_sites_sa, data_release=DATA_RELEASE),
        "subtitleColor": "gray"
    },
    width=300, height=300
)

sou_am

In [None]:
label_offsets = {
    # Custom dx and dy
    "Paris": {"dx": 0, "dy": 16},
    "Mannheim": {"dx": 0, "dy": -10},
    "Erlangen": {"dx": 22, "dy": 0},
    "Freiburg": {"dx": 18, "dy": 0},
    "Lumezzane/Brescia": {"dx": 20, "dy": 0},
    "Pavia": {"dx": -25, "dy": 0},
    
    "Milano": {"dx": -20, "dy": 0},
    "Bergamo": {"dx": 15, "dy": -10},
    "Milan":  {"dx": 5, "dy": 16},
}

labels_with_offset = labels.transform_filter(
    (alt.datum["City"] != "Paris") & 
    (alt.datum["City"] != "Milano") & 
    (alt.datum["City"] != "Bergamo") & 
    (alt.datum["City"] != "Erlangen") &
    (alt.datum["City"] != "Mannheim") &
    (alt.datum["City"] != "Freiburg") &
    (alt.datum["City"] != "Lumezzane/Brescia") &
    (alt.datum["City"] != "Pavia") &
    (alt.datum["City"] != "Milan")
)

for city in list(label_offsets.keys()):
    labels_with_offset += labels.transform_filter(
        alt.datum["City"] == city
    ).mark_text(dx=label_offsets[city]["dx"], dy=label_offsets[city]["dy"])

eu = ( 
    background + 
    circle + #.transform_filter(alt.datum["Country"] != "Germany") + 
    labels_with_offset#.transform_filter(alt.datum["Country"] != "Germany") 
).project(
    type='mercator',
    scale=420,
    center=[-20, 58],
    translate=[0, 0],
).properties(
    title={
        "text": 'Sites in Europe',
#         "subtitle": get_visualization_subtitle(num_sites=num_sites_eur-2-3, data_release=DATA_RELEASE),
        "subtitleColor": "gray"
    },
    width=300, height=300
)

eu

In [None]:
asia = ( background + circle + labels ).project(
    type= 'mercator',
    scale= 900,
    center= [94,10],
    translate=[0, 0]
).properties(
    title={
        "text": 'Sites in Asia',
#         "subtitle": get_visualization_subtitle(num_sites=num_sites_asia, data_release=DATA_RELEASE),
        "subtitleColor": "gray"
    },
    width=300, height=300
)

asia

In [None]:
# h = apply_theme(alt.hconcat(alt.vconcat(nor_am, sou_am, spacing=10), alt.vconcat(eu, asia, spacing=10), spacing=10)).properties(
h = apply_theme(alt.hconcat(nor_am, eu, sou_am, spacing=10)).properties(
    background="transparent"
)

for_website(h, "Map", "Sites by continent horizontal")

h

In [None]:
v = apply_theme(alt.vconcat(nor_am, eu, asia, sou_am, spacing=10)).properties(
    background="transparent"
)

for_website(v, "Map", "Sites by continent vertical")

v

In [None]:
choropleth = alt.Chart(countries).mark_geoshape(
#     fill="lightgray",
#     stroke='lightgray'
).encode(
    opacity=alt.Opacity("opacity:Q", legend=None),
    color=alt.Color("Hospitals:Q")
).transform_lookup(
    lookup="id",
    from_=alt.LookupData(df, "id", ["opacity", "color", "Hospitals"])
)

choropleth
# df