# Module 5: Designing Plots for Communication

### Descriptive titles and labels

In [1]:
import altair as alt
from vega_datasets import data

In [2]:
stocks = data.stocks()
stock_title = alt.TitleParams(
    "Google's stock experiencing heavier fluctuations than competitors",
    subtitle = ["Prices have been surging since 2009 but have still", "not reached hte same levels as in late 2007."])
alt.Chart(stocks, title=stock_title).mark_line().encode(
    alt.X('date', title='Date'),
    alt.Y('price', title='Price (USD)'),
    color='symbol')

In [3]:
lines=alt.Chart(stocks, title=stock_title).mark_line().encode(
    alt.X('date', title=None),
    alt.Y('price', title='Price (USD)'),
    alt.Color('symbol', title=None))
lines

In [6]:
stock_max_date = stocks[stocks['date'] == stocks['date'].max()]
texts = alt.Chart(stock_max_date).mark_text(align='left', dx=2).encode(
    x='date',
    y='price',
    text='symbol',
    color=alt.Color('symbol', legend=None))

lines+texts

In [7]:
stock_max_date.loc[stock_max_date['symbol']=="IBM", 'price']=140
stock_max_date.loc[stock_max_date['symbol']=="AMZN", 'price']=110
texts = alt.Chart(stock_max_date).mark_text(align='left', dx=2).encode(
    x='date',
    y='price',
    text='symbol',
    color=alt.Color('symbol', legend=None))

lines+texts

Add $ sign to price axis:

In [9]:
lines=alt.Chart(stocks, title=stock_title).mark_line().encode(
    alt.X('date', title=None),
    alt.Y('price', title='Price', axis=alt.Axis(format='$')),
    alt.Color('symbol', title=None))
lines + texts

In [11]:
lines=alt.Chart(stocks, title=stock_title).mark_line().encode(
    alt.X('date', title=None),
    alt.Y('price', title='Closing Price', axis=alt.Axis(format='$s')),
    alt.Color('symbol', title=None))
lines + texts

In [12]:
lines=alt.Chart(stocks, title=stock_title).mark_line().encode(
    alt.X('date', title=None, axis=alt.Axis(tickCount=3)),
    alt.Y('price', title='Price', axis=alt.Axis(format='$')),
    alt.Color('symbol', title=None))
lines + texts

In [14]:
lines=alt.Chart(stocks, title=stock_title).mark_line().encode(
    alt.X('date', title=None, axis=alt.Axis(tickCount=3, grid=False)),
    alt.Y('price', title='Price', axis=alt.Axis(format='$s')),
    alt.Color('symbol', title=None))
lines + texts

In [18]:
stock_title = alt.TitleParams(
    "Google's stock experiencing heavier fluctuations than competitors",
    subtitle = "Prices have been surging since 2009 but have still not reached the same levels as in late 2007.",
    fontSize=18, subtitleColor='steelblue', subtitleFontWeight='bold')

lines=alt.Chart(stocks, title=stock_title).mark_line().encode(
    alt.X('date', title=None, axis=alt.Axis(tickCount=3, grid=False)),
    alt.Y('price', title='Closing Price', axis=alt.Axis(format='$s', labelFontSize=12, titleFontSize=16)),
    alt.Color('symbol', title=None)).properties(height=275)
(lines + texts).configure_view(strokeWidth=0)

### Defining and Transforming Axis Ranges

In [19]:
import pandas as pd

In [24]:
donations = pd.read_csv('https://raw.githubusercontent.com/UBC-MDS/exploratory-data-viz/main/chapters/en/slides/module5/data/donations.csv',
                       parse_dates=['date'])

In [25]:
alt.Chart(donations, title="Wikipedia donations peak during Christmas").mark_circle().encode(
    alt.X('date', title=None),
    alt.Y('sum', axis=alt.Axis(format='$s'), title='Amount donated per day')).properties(width=500)

In [26]:
title_text = 'Wikipedia donations peak during Christmas'
donations_2020_until_July = donations[donations['date'].between("2020", "2020-07")]
alt.Chart(donations_2020_until_July, title=title_text).mark_circle().encode(
    alt.X('date', title=None),
    alt.Y('sum', axis=alt.Axis(format='$s'), title='Amount donated per day'))

In [27]:
alt.Chart(donations_2020_until_July, title=title_text).mark_circle(clip=True).encode(
    alt.X('date', title=None),
    alt.Y('sum', axis=alt.Axis(format='$s'), title='Amount donated per day',
         scale=alt.Scale(domain=[0,100_000])))

In [28]:
alt.Chart(donations[donations['sum']>2.5e6]).mark_circle().encode(
    alt.X('date', title=None),
    alt.Y('sum', axis=alt.Axis(format='$s'), title='Amount donated per day'))

In [29]:
alt.Chart(donations[donations['sum']>2.5e6]).mark_circle().encode(
    alt.X('date', title=None),
    alt.Y('sum', axis=alt.Axis(format='$s'), title='Amount donated per day', scale=alt.Scale(zero=False)))

In [30]:
alt.Chart(donations, title="Wikipedia donations peak during Christmas").mark_circle().encode(
    alt.X('date', title=None),
    alt.Y('sum', axis=alt.Axis(format='$s'), title='Amount donated per day')).interactive()

In [31]:
alt.Chart(donations).mark_circle().encode(
    alt.X('date', title=None),
    alt.Y('sum', title='Amount donated per day (log-transformed)',
          axis=alt.Axis(format='$s'), scale=alt.Scale(type='log')),
    alt.Tooltip('week_day'))

In [32]:
alt.Chart(donations).mark_point().encode(
    alt.X('date', title=None),
    alt.Y('sum', title='Amount donated per day (log-transformed)',
          axis=alt.Axis(format='$s'), scale=alt.Scale(type='symlog')))

In [33]:
alt.Chart(donations.drop(0)).mark_line().encode(
    alt.X('date', title=None),
    alt.Y('sum', title='Amount donated per day (log-transformed)',
          axis=alt.Axis(format='$s'), scale=alt.Scale(type='log')))

### Effective use of color for categorical data

* Hue - which color?
* Saturation - how vibrant/colorful?
* Lightness - how bright?

In [34]:
cars = data.cars()

In [35]:
alt.Chart(cars).mark_point(size=70, filled=True).encode(
    alt.X('Horsepower', title='Engine power (hp)'),
    alt.Y('Miles_per_Gallon', title='Fuel efficieny (miles/gallon)'),
    color=alt.Color('Origin', title=None, scale=alt.Scale(scheme='set1')),
    shape='Origin')

In [36]:
colors = ['coral', '#4682b4', 'rebeccapurple']
alt.Chart(cars).mark_point(size=70, filled=True).encode(
    alt.X('Horsepower', title='Engine power (hp)'),
    alt.Y('Miles_per_Gallon', title='Fuel efficieny (miles/gallon)'),
    color=alt.Color('Origin', title=None, scale=alt.Scale(range=colors)),
    shape='Origin')

Don't use more than 5-8 distinct hues:

In [37]:
alt.Chart(cars).mark_point(size=70, filled=True).encode(
    alt.X('Horsepower', title='Engine power (hp)'),
    alt.Y('Miles_per_Gallon', title='Fuel efficieny (miles/gallon)'),
    color=alt.Color('Name', title=None))

In [38]:
cars['Brand'] = cars['Name'].str.split().str[0]
chart=alt.Chart(cars, width=200, height=450).mark_bar().encode(
    alt.Y('Brand', sort='x'),
    alt.X('mean(Horsepower)'))
chart | chart.encode(alt.Color('Brand', legend=None, scale=alt.Scale(scheme='tableau20')))

In [39]:
chart = alt.Chart(cars).mark_bar().encode(
    alt.X('Origin', title=None),
    alt.Y('mean(Horsepower)'),
    alt.Color('Origin', title=None))
(chart | chart.mark_line().encode(alt.X('Year', title=None), alt.StrokeDash('Origin', title=None)))

### Effective use of color for quantitative data

In [40]:
alt.Chart(cars).mark_circle(size=50).encode(
    alt.X('Horsepower', title='Engine power (hp)'),
    alt.Y('Miles_per_Gallon', title='Mileage (miles/gallon)'),
    color=alt.Color('Horsepower', title ='Engine power (hp)',
                    scale=alt.Scale(scheme='viridis', reverse=True)))

In [41]:
alt.Chart(cars).mark_circle(size=50).encode(
    alt.X('Horsepower', title='Engine power (hp)'),
    alt.Y('Miles_per_Gallon', title='Mileage (miles/gallon)'),
    color=alt.Color('Horsepower', title ='Engine power (hp)',
                    scale=alt.Scale(scheme='cividis', reverse=True)))

In [42]:
alt.Chart(cars).mark_circle(size=50).encode(
    alt.X('Horsepower', title='Engine power (hp)'),
    alt.Y('Miles_per_Gallon', title='Mileage (miles/gallon)'),
    color=alt.Color('Horsepower', title ='Engine power (hp)',
                    scale=alt.Scale(scheme='turbo')))

In [43]:
alt.Chart(cars).mark_circle(size=50).encode(
    alt.X('Horsepower', title='Engine power (hp)'),
    alt.Y('Miles_per_Gallon', title='Mileage (miles/gallon)'),
    color=alt.Color('Weight_in_lbs', title ='Weight (lbs)'))

In [45]:
toronto_temp = pd.read_csv('https://raw.githubusercontent.com/UBC-MDS/exploratory-data-viz/main/chapters/en/slides/module5/weatherstats_toronto_daily.csv',
                           parse_dates=['date'])[['date', 'avg_temperature']]
alt.Chart(toronto_temp).mark_circle(size=50).encode(
    alt.X('date', title=None),
    alt.Y('avg_temperature', title='Average temperature (Celsuis)'),
    color=alt.Color('avg_temperature', title='Average temperature (C)'))

In [46]:
alt.Chart(toronto_temp).mark_circle(size=50).encode(
    alt.X('date', title=None),
    alt.Y('avg_temperature', title='Average temperature (Celsuis)'),
    color=alt.Color('avg_temperature', title='Average temperature (C)',
                   scale = alt.Scale(scheme='blueorange', domainMid=0)))

In [48]:
alt.Chart(toronto_temp).mark_circle(size=50).encode(
    alt.X('date', title=None),
    alt.Y('avg_temperature', title='Average temperature (Celsuis)'),
    color=alt.Color('avg_temperature', title='Average temperature (C)',
                   scale = alt.Scale(scheme='blueorange', domain=[-30,30])))

### Annotating with text and color

In [53]:
from calendar import day_abbr
top_day = donations.groupby('week_day')['sum'].sum().idxmax()
chart = alt.Chart(donations).mark_bar().encode(
    alt.Y('week_day', sort=list(day_abbr), title=None),
    alt.X('sum(sum)', axis=alt.Axis(format='$s'), title='Total donated amount'),
    color=alt.condition(alt.datum.week_day == top_day,
                        alt.value('coral'),
                        alt.value('steelblue')))
chart

In [54]:
(chart
 + chart.mark_text(align='left', dx=-125, dy=-15).encode(
     text=alt.condition(alt.datum.week_day == top_day,
                        alt.value('Salaries are paid on Wed'),
                        alt.value(''))))

In [55]:
chart + chart.mark_text(align='left').encode(text='sum(sum)')

In [56]:
chart + chart.mark_text(align='left', dx=2).encode(text=alt.Text('sum(sum)', format='$,d'))

In [57]:
chart + chart.mark_text(align='left', dx=2).encode(text=alt.Text('sum(sum)', format='$.3~s'))

In [59]:
title = alt.TitleParams(
    text="Wikipedia receives most donations on Wednesdays",
    subtitle='Values represent the total amount donated in 2020',
    anchor = 'start', dx=26, dy=-5)
chart = alt.Chart(donations, title=title).mark_bar().encode(
    alt.Y('week_day', sort=list(day_abbr), title=None),
    alt.X('sum(sum)',axis=None),
    color=alt.condition(alt.datum.week_day==top_day,
                        alt.value('coral'),
                        alt.value('steelblue')))
(chart + chart.mark_text(align='left', dx=2).encode(text=alt.Text('sum(sum)', format='$.3s'))).configure_view(strokeWidth=0)

In [60]:
alt.themes.enable('dark');
(chart + chart.mark_text(align='left', dx=2).encode(
    text=alt.Text('sum(sum)', format='$.3s'))).configure_view(strokeWidth=0).configure_title(subtitleColor='white')