In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import json

# These commands below set some options for pandas and to have matplotlib show the charts in the notebook
pd.set_option('display.max_rows', 1000)
pd.options.display.float_format = '{:,.2f}'.format
%matplotlib inline

# Load the data
# We have this defaulted to the folder OUTSIDE of your repo - please change it as needed
population = pd.read_csv('Population by Age and Sex - US, States, Counties.csv')
with open('legislators-historical.json', 'r') as file:
    file_data = json.load(file)
rows = []
for person in file_data:
    base_info = {
        "bioguide_id": person["id"].get("bioguide"),
        "govtrack_id": person["id"].get("govtrack"),
        "icpsr_id": person["id"].get("icpsr"),
        "wikipedia": person["id"].get("wikipedia"),
        "wikidata": person["id"].get("wikidata"),
        "first_name": person["name"]["first"],
        "last_name": person["name"]["last"],
        "gender": person["bio"].get("gender"),
        "birthday": person["bio"].get("birthday"),
    }
    for term in person["terms"]:
        row = base_info.copy()
        row.update({
            "type": term["type"],
            "start": term["start"],
            "end": term["end"],
            "state": term["state"],
            "district": term.get("district"),
            "party": term.get("party"),
            "class": term.get("class"),
        })
        rows.append(row)

# Convert to DataFrame
legislators = pd.DataFrame(rows)

# Note - for now, it is okay to ignore the warning about mixed types.

In [None]:
# Display the DataFrame
#print(legislators.head(2))

# Convert term start & end to datetimes
legislators['start'] = pd.to_datetime(legislators['start'])
legislators['end'] = pd.to_datetime(legislators['end'])

# Filter to starting on or after 2000
leg_filtered_df = legislators[legislators['start'].dt.year >= 2000] 

leg_filtered_df.head())

Unnamed: 0,bioguide_id,govtrack_id,icpsr_id,wikipedia,wikidata,first_name,last_name,gender,birthday,type,start,end,state,district,party,class
36776,M001141,300074,49904.0,Zell Miller,Q187516,Zell,Miller,M,1932-02-24,sen,2000-07-24,2005-01-03,GA,,Democrat,3.0
36778,A000121,300004,29148.0,George Allen (American politician),Q527553,George,Allen,M,1952-03-08,sen,2001-01-03,2007-01-03,VA,,Republican,1.0
36787,A000217,400551,15125.0,Dick Armey,Q781336,Richard,Armey,M,1940-07-07,rep,2001-01-03,2003-01-03,TX,26.0,Republican,
36791,B000081,400525,29528.0,John Baldacci,Q502949,John,Baldacci,M,1955-01-30,rep,2001-01-03,2003-01-03,ME,2.0,Democrat,
36796,B000134,400526,29363.0,James Barcia,Q1679884,James,Barcia,M,1952-02-25,rep,2001-01-03,2003-01-03,MI,5.0,Democrat,
