In [None]:
%load_ext autoreload
%autoreload 2
%aimport utils_1_0

import altair as alt
import pandas as pd
import requests
import io

from constants_1_0 import COLUMNS
from utils_1_0 import apply_theme, get_visualization_subtitle
from web import for_website

# Data Preprocessing

## Site Descriptions From Figshare

In [None]:
# Use the latest data from https://figshare.com/articles/Healthcare_Systems/12118911
site_df = pd.read_csv("https://ndownloader.figshare.com/files/22312305")

site_df.head()

## Topology of Countries

In [None]:
topo_url = "https://raw.githubusercontent.com/danutzthe/topojson_world_map/master/world-110m.json"
countries = alt.topo_feature(topo_url, 'countries')

## Country Codes

In [None]:
code_df = pd.read_json(
    "https://raw.githubusercontent.com/alisle/world-110m-country-codes/master/world-110m-country-codes.json"
)

# Add a missing country
singapore_df = pd.DataFrame(data=[{"code": "Singapore", "id": 702, "name": "Singapore"}])
code_df = pd.concat([code_df, singapore_df])

## Merge Data

In [None]:
# Add custom encodigs to code_df
df = pd.DataFrame(data=[
    # Custom encoding for each city
    {"id": 250, "color": "#0072B2", "fill": "lightgray", "opacity": 1, "Country": "France"},
    {"id": 276, "color": "#E69F00", "fill": "lightgray", "opacity": 1, "Country": "Germany"},
    {"id": 380, "color": "#009E73", "fill": "lightgray", "opacity": 1, "Country": "Italy"},
    {"id": 702, "color": "#CC79A7", "fill": "lightgray", "opacity": 1, "Country": "Singapore"},
    {"id": 840, "color": "#D55E00", "fill": "lightgray", "opacity": 1, "Country": "USA"},
])
code_df = code_df.set_index("id")
df = df.set_index("id")
df = code_df.join(df).reset_index()
df.loc[df["opacity"] != 1, "opacity"] = 0.2 # default opacity

# Add site descriptions
df = df.set_index("Country")
site_df = site_df.set_index("Country")
df = df.join(site_df).reset_index()

# Add lists of sites for each city
UNIQUE_CITIES = site_df["City"].unique().tolist()
for c in UNIQUE_CITIES:
    f = df["City"] == c
    sites_str =  ", ".join(site_df[site_df["City"] == c]["Healthcare System"].unique().tolist())
    df.loc[f, "Sites"] = sites_str

# Visualization: Symbol Map

## Map Background

In [None]:
background = alt.Chart(countries).mark_geoshape(
    fill="lightgray",
    stroke='lightgray'
).encode(
    opacity=alt.Opacity("opacity:Q", legend=None)
).transform_lookup(
    lookup="id",
    from_=alt.LookupData(df, "id", ["opacity", "color"])
)

## Symbols

In [None]:
COUNTRIES = ["France", "Germany", "Italy", "Singapore", "USA"]
COUNTRY_COLOR = ["#0072B2", "#E69F00", "#009E73", "#CC79A7", "#D55E00"]
color_scale = alt.Scale(domain=COUNTRIES, range=COUNTRY_COLOR)

circle = alt.Chart(df).transform_aggregate(
    Latitude_Mean='mean(Latitude)',
    Longitude_Mean='mean(Longitude)',
    Hospitals_Total='sum(Hospitals)',
    Sites_Total='count()',
    groupby=['City','Country', 'Sites']
).transform_filter(
    alt.datum["City"] != None
).mark_circle().encode(
    longitude='Longitude_Mean:Q',
    latitude='Latitude_Mean:Q',
    size=alt.Size(
        'Hospitals_Total:Q', 
        title='Number of Hospitals', 
        scale=alt.Scale(domain=[1, 40], range=[60, 400]),
        legend=None
    ),
    color=alt.Color(
        'Country:N', 
        scale=color_scale, 
        legend=None
    ),
    tooltip=[
        alt.Tooltip('Country:N'),
        alt.Tooltip('City:N'),
        alt.Tooltip('Sites:N', title="Participating sites"),
        alt.Tooltip('Sites_Total:Q', title="Number of sites"), 
        alt.Tooltip('Hospitals_Total:Q', title="Number of hospitals"),
    ]
)

labels = circle.mark_text(
    align='center',
    baseline='top',
    dy=7
).encode(
    longitude='Longitude_Mean:Q',
    latitude='Latitude_Mean:Q',
    text='City',
    size=alt.value(8),
    color=alt.value('black')
)

## Map Visualizations by Continent

In [None]:
usa = ( background + circle + labels ).project(
    type= 'mercator',
    scale= 280,
    center= [-126, 59],
    translate = [0, 0]
).properties(
    title={
        "text": 'Sites in North America',
        "subtitle": get_visualization_subtitle(12),
        "subtitleColor": "gray"
    },
    width=300, height=300
)

usa

In [None]:
label_offsets = {
    # Custom dx and dy
    "Paris": {"dx": 0, "dy": 16},
    "Milano": {"dx": -20, "dy": 0},
    "Bergamo": {"dx": 0, "dy": -10},
    "Erlangen": {"dx": 8, "dy": 10},
}

labels_with_offset = labels.transform_filter(
    (alt.datum["City"] != "Paris") & 
    (alt.datum["City"] != "Milano") & 
    (alt.datum["City"] != "Bergamo") & 
    (alt.datum["City"] != "Erlangen")
)

for city in list(label_offsets.keys()):
    labels_with_offset += labels.transform_filter(
        alt.datum["City"] == city
    ).mark_text(dx=label_offsets[city]["dx"], dy=label_offsets[city]["dy"])

eu = ( background + circle + labels_with_offset ).project(
    type='mercator',
    scale=820,
    center=[-3, 53],
    translate=[0, 0],
).properties(
    title={
        "text": 'Sites in Europe',
        "subtitle": get_visualization_subtitle(6),
        "subtitleColor": "gray"
    },
    width=300, height=300
)

eu

In [None]:
asia = ( background + circle + labels ).project(
    type= 'mercator',
    scale= 900,
    center= [94,10],
    translate=[0, 0]
).properties(
    title={
        "text": 'Sites in Asia',
        "subtitle": get_visualization_subtitle(1),
        "subtitleColor": "gray"
    },
    width=300, height=300
)

asia

In [None]:
h = apply_theme(alt.hconcat(usa, eu, asia, spacing=10)).properties(
    background="transparent"
)

for_website(h, "Map", "Sites by continent horizontal")

h

In [None]:
v = apply_theme(alt.vconcat(usa, eu, asia, spacing=10)).properties(
    background="transparent"
)

for_website(v, "Map", "Sites by continent vertical")

v