# Bar and line charts. Covid-19 Humdata

>Altair example visualization of Covid-19 data from humdata.org

Datasource: https://data.humdata.org/dataset/novel-coronavirus-2019-ncov-cases

In [1]:
import altair as alt
import dateutil
import deetly
import pandas as pd

## Create datapackage

In [2]:
# Create datapackage
description = """
Altair visualization of development in covid-19 in Norway

Source: https://data.humdata.org/dataset/novel-coronavirus-2019-ncov-cases

License: Creative Commons Attribution International
"""

package = {
    "name":"Altair Covid-19 Norway", 
    "description":description, 
    "author":"deetly", 
    "theme":"examples",
    "license": "MIT",
    "keywords": ["altair, covid-19, coronavirus, line"]
}

dp = deetly.package(package)

Missing mandatory props(s): ['title']
Missing recommended props(s): ['contactPoint', 'distribution', 'keyword', 'publisher', 'spatial', 'temporal']
Property not in DCAT schema: ['name', 'author', 'license', 'keywords']
Invalid type: ['String:name', 'String:description', 'String:author', 'String or list of strings:theme', 'String:license', 'String:keywords', 'ISO date or datetime:issued', 'ISO date or datetime:modified']


## Get data

Source: https://data.humdata.org/dataset/novel-coronavirus-2019-ncov-cases

In [3]:
DATA_URL_CONFIRMED= 'https://data.humdata.org/hxlproxy/api/data-preview.csv?url=https%3A%2F%2Fraw.githubusercontent.com%2FCSSEGISandData%2FCOVID-19%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_time_series%2Ftime_series_covid19_confirmed_global.csv&filename=time_series_covid19_confirmed_global.csv'

In [4]:
DATA_URL_DEATHS = 'https://data.humdata.org/hxlproxy/api/data-preview.csv?url=https%3A%2F%2Fraw.githubusercontent.com%2FCSSEGISandData%2FCOVID-19%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_time_series%2Ftime_series_covid19_deaths_global.csv&filename=time_series_covid19_deaths_global.csv'

In [5]:
DATA_URL_RECOVERED = 'https://data.humdata.org/hxlproxy/api/data-preview.csv?url=https%3A%2F%2Fraw.githubusercontent.com%2FCSSEGISandData%2FCOVID-19%2Fmaster%2Fcsse_covid_19_data%2Fcsse_covid_19_time_series%2Ftime_series_covid19_recovered_global.csv&filename=time_series_covid19_recovered_global.csv'

In [6]:
df_confirmed = pd.read_csv(DATA_URL_CONFIRMED)
df_deaths = pd.read_csv(DATA_URL_DEATHS)
df_recovered = pd.read_csv(DATA_URL_RECOVERED)

In [7]:
df_confirmed.head(1)

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,10/14/20,10/15/20,10/16/20,10/17/20,10/18/20,10/19/20,10/20/20,10/21/20,10/22/20,10/23/20
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,39994,40026,40073,40141,40200,40287,40357,40510,40626,40687


In [8]:
selection = df_confirmed[ df_confirmed['Country/Region'].isin(['Norway'])].copy()
selection.drop(columns=['Province/State','Lat','Long'], inplace=True)
selection['Source'] = 'Humdata'

In [9]:
selection

Unnamed: 0,Country/Region,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,...,10/15/20,10/16/20,10/17/20,10/18/20,10/19/20,10/20/20,10/21/20,10/22/20,10/23/20,Source
196,Norway,0,0,0,0,0,0,0,0,0,...,16137,16272,16369,16457,16603,16772,16964,17234,17532,Humdata


In [10]:
source = pd.melt(selection, id_vars=['Source', 'Country/Region'])
source['change'] = source['value']- source['value'].shift(1)
source['date'] = source['variable'].apply(lambda x: dateutil.parser.parse(x))

In [11]:
first = max(source[source['value']==0]['date'])
first

Timestamp('2020-02-25 00:00:00')

In [12]:
source = source[source['date'] >= first]

## Create charts

In [13]:
base = alt.Chart(source)

fig = base.mark_bar(color='rgba(124,194,189,1)').encode(
    y=alt.Y('change:Q', axis=alt.Axis(title='Confirmed cases pr day')),
    x=alt.X('date:T', axis=alt.Axis(title='', format="%d %b %Y", labelAngle=45, grid=False)),
    tooltip =[alt.Tooltip('variable:O', title='Dato'), alt.Tooltip('change:Q', title='Antall')],
)

fig

In [14]:
description = """
Number of confirmed cases per day
"""

dp.vega(fig,"Cases per day", description)

In [15]:
line = base.mark_line(color='rgba(124,194,189,1)').encode(
    y=alt.Y('value:Q', axis=alt.Axis(title='Total confirmed cases')),
    x=alt.X('date:T', axis=alt.Axis(title='', format="%d %b %Y", labelAngle=45, grid=False)),
    tooltip =[alt.Tooltip('variable:O', title='Dato'), alt.Tooltip('value:Q', title='Antall')],
)

fig = line
fig

In [16]:
description = """
Total number of confirmed
"""

dp.vega(fig,"Total Cases", description)

## Publish package

In [17]:
item = dp.publish()

View: https://public.deetly.com/examples/483162a5b1a67895019b1bbb606b0a48 

Metadata: https://storage.googleapis.com/deetly/examples/483162a5b1a67895019b1bbb606b0a48/datapackage.json 

