### Visualization 101

In [1]:
import pandas as pd
import numpy as np
import altair as alt

#### TL;DR:

<img src="images/choose.png" alt="choose" width="1000">

### Points

In [2]:
df = pd.DataFrame({
    'x': np.random.normal(0, 1, 1000),
    'y': np.random.normal(0, 1, 1000)
})

In [3]:
alt.Chart(df).encode(x='x', y='y').mark_circle()

### Bars

In [4]:
# source: https://ourworldindata.org/grapher/covid-vaccination-doses-per-capita

vaccines = pd.DataFrame({
    'country': ['Israel', 'UK', 'US', 'Canada'],
    'doses_per_100': [24.96, 4.94, 3.37, 1.11]
})

In [5]:
alt.Chart(vaccines).mark_bar().encode(
    x='country',
    y='doses_per_100'
)

In [6]:
alt.Chart(vaccines).mark_bar().encode(
    x='doses_per_100',
    y='country'
)

### Lines

In [7]:
MIN = pd.Timestamp('2019-04-20')
MAX = pd.Timestamp('2020-05-01')

data = '1.9,2.0,3.8,2.9,2.7,1.5,1.4,2.0,1.8,2.6,2.1,1.4,2.8,3.2,3.0,3.6,2.4,4.2,3.3,4.3,2.0,4.0,2.0,2.2,2.5,1.8,1.8,1.6,2.6,2.6,2.8,2.1,2.4,1.9,1.4,1.2,3.9,2.9,1.7,1.8,1.7,2.4,2.3,1.5,2.4,2.6,1.6,1.2,1.9,2.5,2.3,2.6,2.0,1.8,2.5,1.9,2.5,2.7,2.5,2.0,1.6,1.4,2.4,2.4,0.7,3.5,3.6,2.9,3.4,1.6,1.8,1.8,1.1,1.9,1.9,1.3,1.6,2.1,1.7,3.1,4.4,3.7,2.8,3.6,4.0,5.6,2.5,1.4,1.6,1.6,2.9,2.0,2.9,2.0,1.9,1.9,1.9,1.2,2.1,1.8,2.5,2.0,2.0,2.1,2.3,2.9,1.4,1.6,1.4,2.2,2.2,2.4,1.6,1.2,1.8,1.8,2.2,1.8,5.3,0.8,2.1,3.3,4.5,1.4,1.3,2.8,0.9,1.7,1.6,1.3,1.8,2.4,3.6,2.6,3.6,5.8,2.4,1.2,1.5,2.1,2.5,3.1,1.8,2.0,1.6,1.8,3.6,2.2,2.1,2.2,1.0,1.7,2.0,2.3,2.0,1.6,1.6,1.2,1.1,1.6,1.7,2.2,1.5,1.9,1.6,2.0,2.3,1.8,3.2,2.7,2.0,2.3,1.3,1.4,1.0,2.1,1.6,1.6,2.7,2.7,2.9,2.7,2.9,2.5,2.2,2.7,2.5,1.7,3.0,2.9,2.4,3.0,3.1,3.0,3.4,2.2,1.7,4.3,2.8,2.8,2.0,4.3,4.2,7.9,9.1,3.6,2.7,4.9,4.1,4.5,3.1,4.1,3.1,3.0,3.7,2.9,3.0,4.1,4.4,4.8,1.9,2.6,2.3,2.2,1.8,3.7,1.7,1.9,3.5,6.3,3.2,2.2,5.0,1.4,2.8,2.2,2.3,3.6,2.4,3.9,1.7,2.2,1.9,2.5,2.6,3.4,4.8,3.2,5.0,5.8,3.3,3.8,2.6,2.8,3.5,3.2,3.6,3.1,6.3,7.5,3.2,3.5,3.4,4.2,2.7,2.9,6.9,4.3,4.3,2.7,2.7,3.1,4.4,5.8,2.8,3.2,4.1,2.0,3.0,5.1,5.4,6.5,3.0,2.7,2.8,3.1,2.6,4.4,5.7,3.6,3.4,4.1,4.2,4.3,5.6,4.9,2.7,2.1,3.5,3.5,3.1,2.7,0.7,3.1,1.9,3.9,2.9,2.9,2.4,2.5,2.5,3.2,2.1,2.3,2.1,2.3,4.7,4.7,4.4,4.5,4.0,3.4,3.0,1.9,3.8,1.4,2.6,1.7,2.8,2.7,2.7,2.2,2.6,4.3,6.7,7.0,4.2,4.9,3.8,4.8,4.8,3.5,3.0,1.8,1.3,2.4,4.1,4.5,4.6,4.5,4.2,3.4,2.8,1.4,3.0,2.4,2.2,1.9,2.1,1.6,2.8,2.8,4.2,3.1,3.7,2.0,2.6,1.7,2.1,1.6,3.5,1.6,1.8,2.1,3.0,5.4,2.9,3.0'

phone = pd.DataFrame({
    'date': pd.date_range(start=MIN, end=MAX),
    'time': [float(d) for d in data.split(',')]
})

In [8]:
alt.Chart(phone).encode(x="date", y="time").mark_line()

### My Dad...

<img src="images/dad.jpg" alt="dad" width="200">

In [9]:
marathon = pd.read_csv('data/marathon.csv')
marathon['time'] = marathon['time'].apply(pd.to_datetime)
rich = marathon[marathon['runner'] == 'Rich H.']

In [10]:
rich.head()

Unnamed: 0,time,distance,runner,dsplit,mph,hour,gender
376,2019-03-24 18:00:00,0.0,Rich H.,10.43034,5.21517,0,male
377,2019-03-24 20:00:00,10.43034,Rich H.,10.43034,5.21517,2,male
378,2019-03-24 22:00:00,20.86068,Rich H.,7.822755,3.911377,4,male
379,2019-03-24 23:59:00,28.683435,Rich H.,7.822755,3.911377,6,male
380,2019-03-25 02:00:00,36.50619,Rich H.,7.822755,3.911377,8,male


In [11]:
alt.Chart(rich).mark_line().encode(
    x='time',
    y='distance'
)

In [12]:
two = marathon[marathon['runner'].isin(['Emma S.', 'Rich H.'])]

In [13]:
alt.Chart(two).mark_line().encode(
    x='time:T',
    y='distance',
    color='runner'
).properties(title='Ultra Runners')

In [14]:
(
    alt.Chart(two)
    .mark_line(size=3)
    .encode(
        # axis titles
        x=alt.X('time:T', title='Timestamp'),
        y=alt.Y('distance', title='Miles'),
        color=alt.Color(
            'runner', 
            # colouring
            scale=alt.Scale(domain=['Rich H.', 'Emma S.'], range=['blue', 'pink']),
            legend=alt.Legend(title=None, orient='top')
        )
    )
    # formatting
    .properties(
        title='My Dad vs. a 9 Year-Old...',
        width=400,
        height=200
    )
)


### My cousin...

<img src="images/eli.jpg" alt="eli" width="400">

In [15]:
eli = pd.read_csv('data/skydiving.csv')

eli.head()

Unnamed: 0,jump,exit_alt,deploy_alt,freefall
0,1,13500,6000,7500
1,2,13500,6000,7500
2,3,14000,6000,8000
3,4,13500,6000,7500
4,5,11000,6000,5000


In [16]:
eli['ground'] = 0

<img src="images/candlestick.png" alt="candle" width="400">

In [17]:
alt.Chart(eli).mark_rule().encode(
    x='jump',
    y='deploy_alt',
    y2='exit_alt'
)

In [18]:
eli100 = eli.head(100)

top = alt.Chart(eli100).mark_rule(size=3).encode(
    x='jump',
    y='deploy_alt',
    y2='exit_alt'
)

bottom = alt.Chart(eli100).mark_rule(size=1).encode(
    x='jump',
    y='deploy_alt',
    y2='ground'
)

# abc ... 123
top + bottom

In [19]:
top = alt.Chart(eli100).mark_rule(size=3).encode(
    x=alt.X('jump', axis=None),
    y=alt.Y('deploy_alt', axis=None),
    y2=alt.Y2('exit_alt'),
)

bottom = alt.Chart(eli100).mark_rule(size=1).encode(
    x=alt.X('jump', axis=None),
    y=alt.Y('deploy_alt', axis=None),
    y2=alt.Y2('ground')
)

chart = (
    # layer here
    (top + bottom)
    .properties(height=100, width=700)
    .configure_axis(grid=False)
    .configure_view(strokeWidth=0)
)

chart

### Interactivity + The Web

In [20]:
alt.Chart(marathon).mark_line().encode(
    x='time',
    y='distance',
    color='runner'
).interactive()

In [21]:
alt.Chart(marathon).mark_line(size=3).encode(
    x='hour',
    y='distance',
    color='gender',
    detail='runner'
).interactive()

In [22]:
alt.Chart(marathon).mark_line(size=3).encode(
    x='hour',
    y='distance',
    color='gender',
    detail='runner',
    tooltip='runner'
).interactive()

In [23]:
selector = alt.selection_single(
    fields=['gender'], 
    empty='all',
    bind='legend'
)

runners = alt.Chart(marathon).mark_line(point=True).encode(
    x='hour',
    y='distance',
    color='gender',
    detail='runner',
    opacity=alt.condition(selector, alt.value(1), alt.value(0))
).add_selection(
    selector
)

runners

In [24]:
runners.save('index.html')