In [1]:
import pathlib
import urllib.request

import pandas as pd
import numpy as np
import plotly.express as px

from IPython.display import display, Pretty


In [2]:
def get_url_to_file(url, file):
    if pathlib.Path(file).exists():
        return
    with urllib.request.urlopen(url) as instream:
        with open(file, "wb") as outstream:
            bs = 1024*8
            while True:
                block = instream.read(bs)
                if not block:
                    break
                outstream.write(block)


In [3]:

display_handle = display(Pretty("Getting data..."), display_id=True)

display_handle.update(Pretty("Getting confirmed_global.csv..."))
get_url_to_file(
    "https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv",
    "confirmed_global.csv"
    )
display_handle.update(Pretty("Got confirmed_global.csv"))

display_handle.update(Pretty("Getting deaths_global.csv..."))
get_url_to_file(
    "https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv",
    "deaths_global.csv"
    )
display_handle.update(Pretty("Got deaths_global.csv"))

display_handle.update(Pretty("Getting recovered_global.csv..."))
get_url_to_file(
    "https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv",
    "recovered_global.csv"
    )
display_handle.update(Pretty("Got recovered_global.csv"))
display_handle.update(Pretty("Got CSV files."))


Got CSV files.

In [4]:
deaths = pd.read_csv("deaths_global.csv")
confirmed = pd.read_csv("confirmed_global.csv")
recovered = pd.read_csv("recovered_global.csv")

In [5]:
offset = 4
series = []

In [6]:
# Reshape the data
# Original data has many date columns.
# For each date, find cells with non-zero values,
# and get the lat/long (and value)

display_handle = display(Pretty("processing data"), display_id=True)

for i in range(4, len(deaths.columns)):
    date = (deaths.columns)[i]
    # we consider all rows in column [i], wanting ones where value != 0
    deaths_bool = (deaths.iloc[:,i] != 0)
    deaths_filtered = deaths[deaths_bool][["Lat","Long",(deaths.columns)[i]]]
    confirmed_bool = (confirmed.iloc[:,i] != 0)
    confirmed_filtered = deaths[confirmed_bool][["Lat","Long",(confirmed.columns)[i]]]
    recovered_bool = (recovered.iloc[:,i] != 0)
    recovered_filtered = recovered[recovered_bool][["Lat","Long",(recovered.columns)[i]]]

    display_handle.update(Pretty("processing %s" % date))

    data = {
        "date": date,
        "death_series": deaths_filtered,
        "confirmed_series": confirmed_filtered,
        "recovered_series": recovered_filtered,
        }

    series.append(data)

display_handle.update(Pretty("Done!"))


Done!

In [7]:
vals = series[120]["recovered_series"].iloc[:,-1]

In [8]:
series[120]["date"]

'5/21/20'

In [9]:
date = series[120]["date"]
plot_series = series[120]["recovered_series"]
values = plot_series.iloc[:,-1]

fig = px.scatter_geo(
    plot_series,
    lat = "Lat",
    lon = "Long",
    text = values,
)
fig.update_layout(
    font_color = "rgb(240,240,240)",
    title = "COVID-19 recoveries (" + date + ")",
    paper_bgcolor = "rgb(60,60,60)",
    geo_scope = "world",
    geo_showland = True,
    geo_showcountries = True,
    geo_bgcolor = "rgb(90,90,90)",
    geo_landcolor = "rgb(250,250,250)",
)
fig.update_traces(
    name = "Recovered",
    mode = "markers",
    geo = "geo",
    hovertemplate = "%{text}",
    marker_symbol = "diamond",
    marker_color = "#DC7633",
)
fig.show()