# Baby Boomers Thru Time
## Demographics Report by &lt; Katsiaryna Aliashkevich &gt;



### Creating Interactive Charts to Visualize Population Shifts over Time with Altair


In [None]:
# Import the necessary libraries and data
import altair as alt
import pandas as pd
from vega_datasets import data

df_population = data.population()

In [None]:
df_population.head()

Unnamed: 0,year,age,sex,people
0,1850,0,1,1483789
1,1850,0,2,1450376
2,1850,5,1,1411067
3,1850,5,2,1359668
4,1850,10,1,1260099


## Adding the "Boomer" label

In [None]:
values = []

for index, row in df_population.iterrows():
    if 1946 < (row['year'] - row['age']) < 1964:
      values.append("Baby Boomer")
    else:
      values.append("Other")

df_population['Generation'] = values

## Changing the encoding for `sex`


In [None]:
sex = df_population['sex']
df_population['sex'] = sex.map({1: 'Male', 2: 'Female'})


In [None]:
df_population.head()

Unnamed: 0,year,age,sex,people,Generation
0,1850,0,Male,1483789,Other
1,1850,0,Female,1450376,Other
2,1850,5,Male,1411067,Other
3,1850,5,Female,1359668,Other
4,1850,10,Male,1260099,Other


## Juxtaposing Bar Charts Horizontally 


In [None]:

six = df_population.query("year == 1960")
nine = df_population.loc[df_population['year'] == 1990]
six.head(50)
purple = "#7D3C98"
gold = "#F4D03F"
purple_gold = alt.Scale(domain=('Baby Boomer', 'Other'),
                      range=[purple, gold])

chart1 = alt.Chart(six).mark_bar().encode(
    x=alt.X('age:N', axis=alt.Axis(title='Age')),
    y=alt.Y('sum(people)', scale=alt.Scale(domain=[0, 24000000]), axis=alt.Axis(title='Number of People')),
    color=alt.Color('Generation', scale=purple_gold),
).properties(
    title='Distribution of Ages in 1960'
)

chart2 = alt.Chart(nine).mark_bar().encode(
    x=alt.X('age:N', axis=alt.Axis(title='Age')),
    y=alt.Y('sum(people)', scale=alt.Scale(domain=[0, 24000000]), axis=alt.Axis(title='Number of People')),
    color=alt.Color('Generation', scale=purple_gold),
).properties(
    title='Distribution of Ages in 1990'
)

chart1 | chart2

## Showing the Population Change Over Time with a Slider


In [None]:
slider = alt.binding_range(min=1900, max=2000, step=10, name='Select Year:')
select_year = alt.selection_single(name="year", fields=['year'],
                                   bind=slider, init={'year': 2000})
purple = "#7D3C98"
gold = "#F4D03F"
purple_gold = alt.Scale(domain=('Baby Boomer', 'Other'),
                      range=[purple, gold])

alt.Chart(df_population).mark_bar().encode(
    x=alt.X('age:O', title='Age'),
    y=alt.Y('sum(people)', scale=alt.Scale(domain=(0, 24000000)), axis=alt.Axis(title='Number of People')),
    color=alt.Color('Generation', scale=purple_gold)
).properties(
    title= 'Population Distribution by Age in the USA',
    width= 700
).add_selection(
    select_year
).transform_filter(
    select_year
).configure_facet(
    spacing=8
)

## Linking



In [None]:
purple = "#7D3C98"
gold = "#F4D03F"
purple_gold = alt.Scale(domain=('Baby Boomer', 'Other'),
                      range=[purple, gold])
pts = alt.selection(type="single", encodings=['x'])

two = df_population.loc[df_population['year'] == 2000]
chart1 = alt.Chart(two).mark_bar().encode(
    x=alt.X('age:N', axis=alt.Axis(title='Age')),
    y=alt.Y('sum(people)', scale=alt.Scale(domain=[0, 24000000]), axis=alt.Axis(title='Number of People')),
    color=alt.condition(pts,
                     alt.Color("Generation:N", scale=purple_gold),
                     alt.value("#bababa"))
).properties(
    title='Distribution of Ages in 2000',
    width = 700
).add_selection(pts)

chart2 = alt.Chart(two).mark_bar().encode(
    x=alt.X('people:Q', scale=alt.Scale(), axis=alt.Axis(title='Number of People')),
    y=alt.Y('sex:N', axis=alt.Axis(title='Sex')),
    color=alt.Color('Generation', scale=purple_gold),order=alt.Order(
      'Generation',
      sort='descending'
    )
).properties(
    title='Distribution of Sex for Above Age Selection',
    width = 700
).transform_filter(
    pts
)

alt.vconcat(
    chart1 & chart2
).resolve_legend(
    color="independent",
    size="independent"
)

## Combining into One Chart


In [None]:
purple = "#7D3C98"
gold = "#F4D03F"
purple_gold = alt.Scale(domain=('Baby Boomer', 'Other'),
                      range=[purple, gold])

pts = alt.selection(type="single", encodings=['x'])

slider = alt.binding_range(min=1900, max=2000, step=10, name='Select Year:')
select_year = alt.selection_single(name="year", fields=['year'],
                                   bind=slider, init={'year': 2000})
chart1 = alt.Chart(df_population).mark_bar().encode(
    x=alt.X('age:N', axis=alt.Axis(title='Age')),
    y=alt.Y('sum(people)', scale=alt.Scale(domain=[0, 24000000]), axis=alt.Axis(title='Number of People')),
    color=alt.condition(pts,
                     alt.Color("Generation:N", scale=purple_gold),
                     alt.value("#bababa")),
    tooltip=['age', 'people']
).interactive(
).properties(
    title= 'Distribution of Ages in Selected Year',
    width= 700
).add_selection(
    select_year
).transform_filter(
    select_year
).add_selection(pts)

chart2 = alt.Chart(df_population).mark_bar().encode(
    x=alt.X('people:Q', scale=alt.Scale(), axis=alt.Axis(title='Number of People')),
    y=alt.Y('sex:N', axis=alt.Axis(title='Sex')),
    color=alt.Color('Generation:N', scale=purple_gold),order=alt.Order(
      'Generation',
      sort='descending'
    )
).properties(
    title='Distribution of Sex for Above Age Selection',
    width = 700
).transform_filter(
    pts
).transform_filter(
    select_year
)

alt.vconcat(
    chart1 & chart2
).resolve_legend(
    color="independent",
    size="independent"
)