In [65]:
import pandas as pd
import numpy as np
import altair as alt
from datetime import datetime

import assets

## WOW 2018
### Week 01: Looks vs. Personality

In [3]:
df08w01 = pd.read_csv(assets.DATA_18W01)
df08w01.head()

Unnamed: 0,Nationality,Gender,Value,Measure
0,Danish,Men,506,Unweighted Sample
1,Danish,Men,504,Weighted Sample
2,Danish,Men,77,Ranked personality higher than looks
3,Danish,Men,23,Ranked looks higher than personality
4,Danish,Women,502,Unweighted Sample


In [31]:
circleColor = "#449EA0"

base = (
    alt.Chart(df08w01)
    .transform_filter(filter='datum.Measure=="Ranked personality higher than looks"')
    .encode(
        y=alt.Y("Nationality:N")
        .axis(
            ticks=False,
            labelPadding=10,
            tickBand="extent",
            gridColor="white",
            grid=True,
            gridWidth=5,
            zindex=0,
            domainWidth=2,
            domainColor="black",
        )
        .sort(op="max", field="Value", order="descending")
        .title(""),
    )
)

points = (
    base.transform_calculate(
        Percentage=alt.datum.Value / 100,
    )
    .mark_circle(
        size=150,
        stroke=circleColor,
        strokeWidth=3,
        fillOpacity=1,
    )
    .encode(
        x=alt.X("Value:Q")
        .title("")
        .axis(
            orient="top",
            ticks=False,
            labelExpr='(datum.value==0 || datum.value==50 || datum.value==100) ? datum.label : ""',
            gridColor="white",
            gridWidth=2,
            zindex=0,
            domain=False,
        ),
        fill=alt.Fill("Gender:N")
        .scale(domain=["Men", "Women"], range=[circleColor, "white"])
        .legend(
            orient="top",
            title=None,
        ),
        tooltip=[
            "Nationality:N",
            "Gender:N",
            alt.Tooltip("Percentage:Q", format=".0%"),
        ],
    )
)

lines = (
    base.transform_filter(
        alt.FieldOneOfPredicate(
            field="Nationality", oneOf=["Egyptian", "Saudi Arabian", "Vietnamese"]
        )
    )
    .transform_pivot(
        pivot="Gender",
        groupby=["Nationality"],
        value="Value",
    )
    .mark_rule()
    .encode(x="Men:Q", x2="Women:Q", size=alt.value(2))
)

rule50 = alt.Chart().mark_rule(strokeDash=[4, 4], strokeWidth=1.5).encode(
    x=alt.X(datum=50).axis(grid=True)
)
rule100 = alt.Chart().mark_rule(strokeWidth=2).encode(
    x=alt.X(datum=100)
)

title = alt.Title(
    text=[
        "Across the world women are more likely than men to value",
        "personality over looks",
    ],
    subtitle=[
        "% of people who ranked a romantic partner having a personality they liked as",
        "more important than them being good looking",
    ],
    anchor="start",
    offset=20,
    fontSize=16,
)

rect = alt.Chart().mark_rect(
    fill="white",
    cornerRadius=10,
    strokeWidth=1,
    stroke="black",
)
x1, x2, y1, y2 = 10, 45, "Egyptian", "Saudi Arabian"
rect1 = rect.encode(
    x=alt.datum(x1),
    x2=alt.datum(x2),
    y=alt.datum(y1),
    y2=alt.datum(y2),
)
annotation = alt.Data(values=[{
    'ann1': ['The biggest differences','in opinion between a', 'nation\'s men and',
          'women were in Egypt', 'and Saudi Arabia (both',' 28%)'],
    'ann2': ['Vietnamese men','were the only group','who were more', 'likely to value a','partner\'s looks more','than their','personality']}])
text = alt.Chart(annotation).mark_text(
    align="left",
    baseline="top",
    dx=5, dy=5,
)
text1 = text.encode(
    x=alt.datum(x1),
    y=alt.datum(y1),
    text="ann1:N",
)
arrow1 = alt.Chart().mark_rule().encode(
    x=alt.datum(x2), x2=alt.datum(65),
    y=alt.datum("Filipino"), y2=alt.datum("Egyptian")
)
arrow2 = arrow1.encode(
    # x=alt.datum(x2), y=alt.datum("Filipino"),
    x2=alt.datum(60), y2=alt.datum("Saudi Arabian")
)

x_1, x_2, y_1, y_2 = 5, 36, 'UAE', 'Vietnamese'
rect2 = rect.encode(
    x=alt.datum(x_1),
    x2=alt.datum(x_2),
    y=alt.datum(y_1),
    y2=alt.datum(y_2),
)
text2 = text.encode(
    x=alt.datum(x_1),
    y=alt.datum(y_1),
    text="ann2:N",
)
arrow3 = alt.Chart().mark_rule().encode(
    x=alt.datum(x_2), x2=alt.datum(53),
    y=alt.datum("Indian"), y2=alt.datum("Vietnamese")
)

alt.layer(
    rule50,
    rule100,
    rect1,
    text1,
    arrow1,
    arrow2,
    rect2,
    text2,
    arrow3,
    lines,
    points,
    title=title,
).configure_view(
    strokeOpacity=0,
    fill="#D5D6D8",
).properties(
    width=400,
    height=500,
)

### Week 09: Highlighting all points in the year

In [45]:
df08w09 = pd.read_csv(assets.DATA_18W09, parse_dates=["Year"], engine="pyarrow")
df08w09 = df08w09.assign(
    Percent_of_Players=df08w09["% of Players"].str.rstrip("%").astype(float) / 100.0,
    Ethnicity_agg=np.where(df08w09["Ethnicity"]=="White", "White", "Players_of_Color")
).groupby(by=["Ethnicity_agg", "Year"]).sum(numeric_only=True).reset_index()
df08w09

Unnamed: 0,Ethnicity_agg,Year,Percent_of_Players
0,Players_of_Color,1947-01-01,0.016
1,Players_of_Color,1948-01-01,0.014
2,Players_of_Color,1949-01-01,0.034
3,Players_of_Color,1950-01-01,0.047
4,Players_of_Color,1951-01-01,0.057
...,...,...,...
135,White,2012-01-01,0.639
136,White,2013-01-01,0.636
137,White,2014-01-01,0.637
138,White,2015-01-01,0.634


In [102]:
COLOR = "#8CD17D"
WHITE = "white"
FILL_COLOR = "#21252B"
VLINE_COLOR = "#898989"

alt.renderers.set_embed_options(theme="dark")

base = alt.Chart(df08w09).encode(
    x=alt.X('Year:T').axis(
        grid=False,
        title=None,
        domain=False,
    ),
)
chart = base.mark_line(strokeWidth=2.5).encode(
    y=alt.Y('Percent_of_Players:Q').axis(
        grid=False,
        title=None,
        format=".0%",
        domain=False,
        ticks=False,
    ),
    color=alt.Color("Ethnicity_agg:N").scale(
        domain=["White", "Players_of_Color"],
        range=[WHITE, COLOR]
    ).legend(None),
    tooltip=[alt.Tooltip("Percent_of_Players:Q", title="Percent:", format=".1%"), 
             alt.Tooltip("Ethnicity_agg:N", title="Ethnicity:")],
)
fill_between = base.transform_pivot(
    pivot="Ethnicity_agg",
    groupby=["Year"],
    value="Percent_of_Players"
).mark_area(fill=FILL_COLOR).encode(
    y="White:Q",
    y2="Players_of_Color:Q"
)

ann_data = pd.DataFrame([
    {"x": datetime(2004, 1, 1), 'label': ["2004 The percent of","Minority Players","begins to decline"]}
])
ann_line = alt.Chart(ann_data).mark_rule(stroke=VLINE_COLOR, strokeDash=[4,4]).encode(
    x="x:T"
)
ann_text = alt.Chart(ann_data).mark_text(color=WHITE, baseline="top", dy=80).encode(
    x="x:T",
    y=alt.datum(1),
    text="label:N",
)

title=alt.Title(
    "The MLB Diversity Gap",
    subtitle=["Since Jackie Robinson broke the Color Barrir in 1947, the MLB has grown more diverse.",
              "However, in 2016,White Players still represented 63% of league. Additionally, since 2004,",
              "the percentage of Players of color is declining, falling from 39% to 36%."],
    anchor="start",
    offset=10,
    fontSize=32,
    subtitleFontSize=14,
    subtitlePadding=10,
)

alt.layer(
    fill_between, chart, ann_line, ann_text,
    title=title,
).properties(
    width=600, height=400,
).configure_view(
    strokeWidth=0,
)