In [54]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import json

# These commands below set some options for pandas and to have matplotlib show the charts in the notebook
pd.set_option('display.max_rows', 1000)
pd.options.display.float_format = '{:,.2f}'.format
%matplotlib inline

# Load the data
# We have this defaulted to the folder OUTSIDE of your repo - please change it as needed
congress = pd.read_csv('congress-terms.csv')
population = pd.read_csv('Population by Age and Sex - US, States, Counties.csv')
with open('legislators-historical.json', 'r') as file:
    file_data = json.load(file)
rows = []
for person in file_data:
    base_info = {
        "bioguide_id": person["id"].get("bioguide"),
        "govtrack_id": person["id"].get("govtrack"),
        "icpsr_id": person["id"].get("icpsr"),
        "wikipedia": person["id"].get("wikipedia"),
        "wikidata": person["id"].get("wikidata"),
        "first_name": person["name"]["first"],
        "last_name": person["name"]["last"],
        "gender": person["bio"].get("gender"),
        "birthday": person["bio"].get("birthday"),
    }
    for term in person["terms"]:
        row = base_info.copy()
        row.update({
            "type": term["type"],
            "start": term["start"],
            "end": term["end"],
            "state": term["state"],
            "district": term.get("district"),
            "party": term.get("party"),
            "class": term.get("class"),
        })
        rows.append(row)

# Convert to DataFrame
legislators = pd.DataFrame(rows)

# Note - for now, it is okay to ignore the warning about mixed types.

In [None]:
# Display the DataFrame
#print(legislators.head(2))

# Convert term start & end to datetimes
legislators['start'] = pd.to_datetime(legislators['start'])
legislators['end'] = pd.to_datetime(legislators['end'])

# Filter to starting on or after 2000
filtered_df = legislators[legislators['start'].dt.year >= 2000] 

# Gabby Check

print(filtered_df.head())

      bioguide_id  govtrack_id  icpsr_id                           wikipedia  \
36776     M001141       300074 49,904.00                         Zell Miller   
36778     A000121       300004 29,148.00  George Allen (American politician)   
36787     A000217       400551 15,125.00                          Dick Armey   
36791     B000081       400525 29,528.00                       John Baldacci   
36796     B000134       400526 29,363.00                        James Barcia   

       wikidata first_name last_name gender    birthday type      start  \
36776   Q187516       Zell    Miller      M  1932-02-24  sen 2000-07-24   
36778   Q527553     George     Allen      M  1952-03-08  sen 2001-01-03   
36787   Q781336    Richard     Armey      M  1940-07-07  rep 2001-01-03   
36791   Q502949       John  Baldacci      M  1955-01-30  rep 2001-01-03   
36796  Q1679884      James    Barcia      M  1952-02-25  rep 2001-01-03   

             end state  district       party  class  
36776 2005-01-