### Getting the data for [this visualization](https://edwin-torres.github.io/page/#/projects/congress).

In [1]:
import numpy as np
import pandas as pd
import warnings
import random
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

In [2]:
current = pd.read_json('legislators-current.json')
historical = pd.read_json('legislators-historical.json')
current = current[['id', 'name', 'bio', 'terms']]
historical = historical[['id', 'name', 'bio', 'terms']]
df = pd.concat([current,historical], ignore_index=True)

In [3]:
df['term_years'] = df.terms.apply(lambda x : [  [int(elem['start'].split('-')[0]), int(elem['end'].split('-')[0])] for elem in x ])

In [4]:
df.head()

Unnamed: 0,id,name,bio,terms,term_years
0,"{'bioguide': 'B000944', 'thomas': '00136', 'li...","{'first': 'Sherrod', 'last': 'Brown', 'officia...","{'birthday': '1952-11-09', 'gender': 'M'}","[{'type': 'rep', 'start': '1993-01-05', 'end':...","[[1993, 1995], [1995, 1997], [1997, 1999], [19..."
1,"{'bioguide': 'C000127', 'thomas': '00172', 'li...","{'first': 'Maria', 'last': 'Cantwell', 'offici...","{'birthday': '1958-10-13', 'gender': 'F'}","[{'type': 'rep', 'start': '1993-01-05', 'end':...","[[1993, 1995], [2001, 2007], [2007, 2013], [20..."
2,"{'bioguide': 'C000141', 'thomas': '00174', 'li...","{'first': 'Benjamin', 'middle': 'L.', 'last': ...","{'birthday': '1943-10-05', 'gender': 'M'}","[{'type': 'rep', 'start': '1987-01-06', 'end':...","[[1987, 1989], [1989, 1991], [1991, 1993], [19..."
3,"{'bioguide': 'C000174', 'thomas': '00179', 'li...","{'first': 'Thomas', 'middle': 'Richard', 'last...","{'birthday': '1947-01-23', 'gender': 'M'}","[{'type': 'rep', 'start': '1983-01-03', 'end':...","[[1983, 1985], [1985, 1987], [1987, 1989], [19..."
4,"{'bioguide': 'C001070', 'thomas': '01828', 'li...","{'first': 'Robert', 'middle': 'P.', 'last': 'C...","{'gender': 'M', 'birthday': '1960-04-13'}","[{'type': 'sen', 'start': '2007-01-04', 'end':...","[[2007, 2013], [2013, 2019], [2019, 2025]]"


In [5]:
def get_generation(birth_year : int) -> str:
    
    if birth_year <= 1859:
        return 'Progressive'
    if birth_year <= 1882:
        return 'Missionary'
    if birth_year <= 1900:
        return 'Lost'
    if birth_year <= 1927:
        return 'Greatest'
    if birth_year <= 1945:
        return 'Silent'
    if birth_year <= 1964:
        return 'Baby Boomer'
    if birth_year <= 1980:
        return 'Generation X'
    if birth_year <= 1996:
        return 'Millennial'
    if birth_year <= 2012:
        return 'Generation Z'
        
    return 'Unknown' 

In [6]:
# Dropped members with no birthday info
index_drop = df[df['bio'].apply(lambda x: 'birthday' not in list(x.keys()) )].index
df = df.drop(index_drop) 

In [7]:
# Adding Generation Class
df['generation'] = df['bio'].apply(lambda x : get_generation( int(x['birthday'].split('-')[0])))

In [8]:
df.head()

Unnamed: 0,id,name,bio,terms,term_years,generation
0,"{'bioguide': 'B000944', 'thomas': '00136', 'li...","{'first': 'Sherrod', 'last': 'Brown', 'officia...","{'birthday': '1952-11-09', 'gender': 'M'}","[{'type': 'rep', 'start': '1993-01-05', 'end':...","[[1993, 1995], [1995, 1997], [1997, 1999], [19...",Baby Boomer
1,"{'bioguide': 'C000127', 'thomas': '00172', 'li...","{'first': 'Maria', 'last': 'Cantwell', 'offici...","{'birthday': '1958-10-13', 'gender': 'F'}","[{'type': 'rep', 'start': '1993-01-05', 'end':...","[[1993, 1995], [2001, 2007], [2007, 2013], [20...",Baby Boomer
2,"{'bioguide': 'C000141', 'thomas': '00174', 'li...","{'first': 'Benjamin', 'middle': 'L.', 'last': ...","{'birthday': '1943-10-05', 'gender': 'M'}","[{'type': 'rep', 'start': '1987-01-06', 'end':...","[[1987, 1989], [1989, 1991], [1991, 1993], [19...",Silent
3,"{'bioguide': 'C000174', 'thomas': '00179', 'li...","{'first': 'Thomas', 'middle': 'Richard', 'last...","{'birthday': '1947-01-23', 'gender': 'M'}","[{'type': 'rep', 'start': '1983-01-03', 'end':...","[[1983, 1985], [1985, 1987], [1987, 1989], [19...",Baby Boomer
4,"{'bioguide': 'C001070', 'thomas': '01828', 'li...","{'first': 'Robert', 'middle': 'P.', 'last': 'C...","{'gender': 'M', 'birthday': '1960-04-13'}","[{'type': 'sen', 'start': '2007-01-04', 'end':...","[[2007, 2013], [2013, 2019], [2019, 2025]]",Baby Boomer


In [9]:
# Getting Generation Counts by year
data = {}
for i in range(1940, 2025,1):
    data[i] =  {'Progressive':0,
                'Missionary':0,
                'Lost':0,
                'Greatest':0,
                'Silent':0,
                'Baby Boomer':0,
               'Generation X':0,
               'Millennial':0,
               'Generation Z':0 
               }  

for idx in df.index:  
    gen = df['generation'][idx]
    for year in df['term_years'][idx]: 
        for i in range(year[0], year[1], 1):
            if i > 1939 and i < 2025:
                data[i][gen] = data[i][gen] + 1

In [10]:
# Looking at data for 3 years
for i in range(3):
    rand_year = random.choice(list(data.keys()))
    print('---------------------')
    print('Year: ', rand_year)
    print(data[rand_year]) 
    print('---------------------')

---------------------
Year:  2014
{'Progressive': 0, 'Missionary': 0, 'Lost': 0, 'Greatest': 2, 'Silent': 95, 'Baby Boomer': 343, 'Generation X': 98, 'Millennial': 3, 'Generation Z': 0}
---------------------
---------------------
Year:  1970
{'Progressive': 0, 'Missionary': 0, 'Lost': 44, 'Greatest': 441, 'Silent': 65, 'Baby Boomer': 0, 'Generation X': 0, 'Millennial': 0, 'Generation Z': 0}
---------------------
---------------------
Year:  2010
{'Progressive': 0, 'Missionary': 0, 'Lost': 0, 'Greatest': 6, 'Silent': 156, 'Baby Boomer': 323, 'Generation X': 54, 'Millennial': 1, 'Generation Z': 0}
---------------------


In [11]:
# Getting Percentages 
congress_data = {}
for key in data: 
     
    total = 0
    current_year = data[key]
    
    for generation in current_year: 
        total = total + current_year[generation]
  
    congress_data[key] = {}
    
    for generation in current_year:
        curr_dec = (current_year[generation]/total)*100
        congress_data[key][generation] = round(curr_dec, 2) 

In [12]:
# Looking at data for 3 years
for i in range(3):
    rand_year = random.choice(list(data.keys()))
    print('---------------------')
    print('Year: ', rand_year)
    print('Counts')
    print(data[rand_year]) 
    print('Percentage')
    print(congress_data[rand_year]) 
    print('---------------------')

---------------------
Year:  2012
Counts
{'Progressive': 0, 'Missionary': 0, 'Lost': 0, 'Greatest': 5, 'Silent': 124, 'Baby Boomer': 331, 'Generation X': 77, 'Millennial': 1, 'Generation Z': 0}
Percentage
{'Progressive': 0.0, 'Missionary': 0.0, 'Lost': 0.0, 'Greatest': 0.93, 'Silent': 23.05, 'Baby Boomer': 61.52, 'Generation X': 14.31, 'Millennial': 0.19, 'Generation Z': 0.0}
---------------------
---------------------
Year:  1974
Counts
{'Progressive': 0, 'Missionary': 0, 'Lost': 19, 'Greatest': 396, 'Silent': 130, 'Baby Boomer': 0, 'Generation X': 0, 'Millennial': 0, 'Generation Z': 0}
Percentage
{'Progressive': 0.0, 'Missionary': 0.0, 'Lost': 3.49, 'Greatest': 72.66, 'Silent': 23.85, 'Baby Boomer': 0.0, 'Generation X': 0.0, 'Millennial': 0.0, 'Generation Z': 0.0}
---------------------
---------------------
Year:  1949
Counts
{'Progressive': 0, 'Missionary': 62, 'Lost': 285, 'Greatest': 197, 'Silent': 0, 'Baby Boomer': 0, 'Generation X': 0, 'Millennial': 0, 'Generation Z': 0}
Percent