### Customization

In [1]:
import altair as alt
import pandas as pd

#### Colours

[Inspiration](https://www.youtube.com/watch?v=dwSe2imUyvg)

In [2]:
owl = pd.read_csv('../data/owl.csv')

In [8]:
owl

Unnamed: 0,X,Y,Colour
0,1,1,Blue
1,1,2,Blue
2,1,3,Blue
3,1,4,Blue
4,1,5,Blue
...,...,...,...
116,11,7,Blue
117,11,8,Blue
118,11,9,Blue
119,11,10,Blue


In [3]:
alt.Chart(owl).mark_rect().encode(
    x='X:O',
    y='Y:O',
    color='Colour:N'
)

Let's get these colours right...

In [4]:
alt.Chart(owl).mark_rect().encode(
    x='X:O',
    y='Y:O',
    color=alt.Color('Colour', 
        scale=alt.Scale(
        domain=['Black', 'Dark Blue', 'Brown', 'Dark Brown', 'Light Blue', 'White', 'Yellow'], 
        range=["#000000", "#088DA5", "#753A10", "#48240A", "#21CDF2", "#FFFFFF", "#FFBA3F"]
    ),
    legend=None)
)

In [5]:
alt.Chart(owl).mark_rect().encode(
    x='X:O',
    # whoops!
    y=alt.Y('Y:O', sort='descending'),
    color=alt.Color('Colour', 
        scale=alt.Scale(
        domain=['Black', 'Dark Blue', 'Brown', 'Dark Brown', 'Light Blue', 'White', 'Yellow'], 
        range=["#000000", "#088DA5", "#753A10", "#48240A", "#21CDF2", "#FFFFFF", "#FFBA3F"]
    ),
    legend=None)
)

Clean it all up...

In [9]:
owl_chart = (
    alt.Chart(owl)
    .mark_rect()
    .encode(
        x=alt.X('X:O', axis=None),
        y=alt.Y('Y:O', sort='descending', axis=None),
        color=alt.Color('Colour', 
            scale=alt.Scale(
                domain=['Black', 'Dark Blue', 'Brown', 'Dark Brown', 'Light Blue', 'White', 'Yellow'], 
                range=["#000000", "#088DA5", "#753A10", "#48240A", "#21CDF2", "#FFFFFF", "#FFBA3F"]
            ),
            legend=None
        )
    )
    .configure_axis(grid=False)
    .configure_view(strokeWidth=0)
)

owl_chart

Sidenote:

In [28]:
alt.Chart(owl).mark_rect(color='#ddd', size=10).encode(
    x='X:O',
    y='Y:O'
)

### Actual Data

ZA4950: International Social Survey Programme: Religion III - ISSP 2008
        
[Source](https://zacat.gesis.org/webview/index.jsp?object=http://zacat.gesis.org/obj/fStudy/ZA4950)

<img src="../images/sex.png" alt="sex" width="500">

In [29]:
df = pd.read_csv('../data/sex.csv')

In [30]:
df.sample(3)

Unnamed: 0,age2,religion,response,n,pct
30,18-24,Roman Catholic,Not wrong,1330,58.1
107,40-49,Hinduism,Wrong-ish,10,23.8
108,40-49,Islam,Not wrong,42,9.6


In [31]:
df = df.rename(columns={'age2': 'age'})
df['pct'] /= 100

In [32]:
df['religion'].value_counts()

Buddhism                     21
Christian Orthodox           21
Islam                        21
Jewish                       21
No religion                  21
Other Christian Religions    21
Other Eastern Religions      21
Other Religions              21
Protestant                   21
Roman Catholic               21
Hinduism                     20
Name: religion, dtype: int64

#### Start Small...

In [33]:
no = df[df['religion'] == 'No religion']

In [34]:
alt.Chart(no).encode(x='age', y='pct').mark_circle()

In [35]:
alt.Chart(no).encode(x='age', y='pct', color='response').mark_circle()

Rotate...

In [36]:
alt.Chart(no).encode(y='age', x='pct', color='response').mark_circle()

Colours that make sense...

In [37]:
alt.Chart(no).encode(
    y='age', 
    x='pct', 
    color=alt.Color('response', 
        scale=alt.Scale(
        domain=['Not wrong', 'Wrong', 'Wrong-ish'], 
        range=['green', 'yellow', 'red'])
    )
).mark_circle()

In [38]:
alt.Chart(no).encode(
    y=alt.Y('age', scale=alt.Scale(domain=['70+','60-69', '50-59', '40-49', '30-39', '25-29', '18-24'])),
    x='pct', 
    color=alt.Color('response', 
        scale=alt.Scale(
        domain=['Not wrong', 'Wrong', 'Wrong-ish'], 
        # yikes... need to make a little nicer
        range=["#8EA604", "#F5BB00", "#DE1A1A"])
    )
).mark_circle()

Prep for complexity... 

In [39]:
# just line up everything
chart = (
    alt.Chart(no)
    .mark_circle()
    .encode(
        x='pct',
        y=alt.Y(
            'age', 
            scale=alt.Scale(domain=['70+','60-69', '50-59', '40-49', '30-39', '25-29', '18-24'])
        ),
        color=alt.Color(
            'response', 
            scale=alt.Scale(
            domain=['Not wrong', 'Wrong', 'Wrong-ish'], 
            range=["#8EA604", "#F5BB00", "#DE1A1A"])
        )
    )
)

chart

Flex 💪

In [40]:
chart = (
    alt.Chart(no)
    .mark_circle(opacity=3/4)
    .encode(
        x=alt.X('pct:Q',
            axis=alt.Axis(
                title='', 
                # showing off here
                format='%', 
                values=[0, 0.25, 0.5, 0.75, 1]
            ),
        ), 
        y=alt.Y('age', 
            # stripping it all down
            axis=alt.Axis(title='', grid=True),
            scale=alt.Scale(domain=['70+','60-69', '50-59', '40-49', '30-39', '25-29', '18-24']),
        ), 
        color=alt.Color('response', 
            scale=alt.Scale(
                domain=['Not wrong', 'Wrong-ish', 'Wrong'], 
                range=["#39a9db", "#f39237", "#d63230"]
            ),
            legend=alt.Legend(title='', orient='top')
        )
    )
    # formatting
    .configure_view(strokeWidth=0)
    .properties(
        width=250, 
        height=250,
        background='#F0F0F0',
        title='Sex Before Marriage'
    )
)

chart

To be continued...

#### Marathon

In [41]:
marathon = pd.read_csv('../data/marathon.csv')
marathon['time'] = marathon['time'].apply(pd.to_datetime)
two = marathon[marathon['runner'].isin(['Emma S.', 'Rich H.'])]

In [42]:
alt.Chart(two).mark_line().encode(
    x='time:T',
    y='distance',
    color='runner'
).properties(title='Ultra Runners')

In [43]:
(
    alt.Chart(two)
    .mark_line(size=3)
    .encode(
        # axis titles
        x=alt.X('time:T', title='Timestamp'),
        y=alt.Y('distance', title='Miles'),
        color=alt.Color(
            'runner', 
            # colouring
            scale=alt.Scale(domain=['Rich H.', 'Emma S.'], range=['blue', 'pink']),
            legend=alt.Legend(title=None, orient='top')
        )
    )
    # formatting
    .properties(
        title='My Dad vs. a 9 year-old',
        width=400,
        height=200
    )
)

#### Exercise

Build an Altair chart to compare the attitudes of two different religions in the `sex` data set!