In [2]:
import json
import pandas as pd
import numpy as np


def parse_congresses(df):
    # Parse the 'congresses' column
    df['congresses'] = df['congresses'].apply(json.loads)

    # Flatten the nested structure
    rows = []
    for _, row in df.iterrows():
        for congress in row['congresses']:
            new_row = row.drop('congresses').to_dict()
            new_row.update(congress)
            rows.append(new_row)

    return pd.DataFrame(rows)

In [3]:
democrats_df = pd.read_csv("../DATA/democrats.csv")
other_df = pd.read_csv("../DATA/else.csv")

democrats_parsed_df = parse_congresses(democrats_df)
other_parsed_df = parse_congresses(other_df)

In [4]:
democrats_parsed_df

Unnamed: 0,id,givenName,familyName,middleName,unaccentedGivenName,unaccentedFamilyName,unaccentedMiddleName,nickName,honorificPrefix,honorificSuffix,honorificTitle,birthYear,deathYear,position,congressNumber,stateName,parties,electionCirca,electionDate,leadershipPositions
0,A000014,Neil,Abercrombie,,Neil,Abercrombie,,,,,,1938,,Representative,99,HI,[Democrat],,,
1,A000014,Neil,Abercrombie,,Neil,Abercrombie,,,,,,1938,,Representative,102,HI,[Democrat],,,
2,A000014,Neil,Abercrombie,,Neil,Abercrombie,,,,,,1938,,Representative,103,HI,[Democrat],,,
3,A000014,Neil,Abercrombie,,Neil,Abercrombie,,,,,,1938,,Representative,104,HI,[Democrat],,,
4,A000014,Neil,Abercrombie,,Neil,Abercrombie,,,,,,1938,,Representative,105,HI,[Democrat],,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7726,Z000001,Clement,Zablocki,John,Clement,Zablocki,John,,,,,1912,1983.0,Representative,97,WI,[Democrat],,,
7727,Z000002,Leo,Zeferetti,C.,Leo,Zeferetti,C.,,,,,1927,2018.0,Representative,94,NY,[Democrat],,,
7728,Z000002,Leo,Zeferetti,C.,Leo,Zeferetti,C.,,,,,1927,2018.0,Representative,95,NY,[Democrat],,,
7729,Z000002,Leo,Zeferetti,C.,Leo,Zeferetti,C.,,,,,1927,2018.0,Representative,96,NY,[Democrat],,,


In [5]:
combined_df = pd.concat([democrats_parsed_df, other_parsed_df], ignore_index=True)
combined_df

Unnamed: 0,id,givenName,familyName,middleName,unaccentedGivenName,unaccentedFamilyName,unaccentedMiddleName,nickName,honorificPrefix,honorificSuffix,honorificTitle,birthYear,deathYear,position,congressNumber,stateName,parties,electionCirca,electionDate,leadershipPositions
0,A000014,Neil,Abercrombie,,Neil,Abercrombie,,,,,,1938,,Representative,99,HI,[Democrat],,,
1,A000014,Neil,Abercrombie,,Neil,Abercrombie,,,,,,1938,,Representative,102,HI,[Democrat],,,
2,A000014,Neil,Abercrombie,,Neil,Abercrombie,,,,,,1938,,Representative,103,HI,[Democrat],,,
3,A000014,Neil,Abercrombie,,Neil,Abercrombie,,,,,,1938,,Representative,104,HI,[Democrat],,,
4,A000014,Neil,Abercrombie,,Neil,Abercrombie,,,,,,1938,,Representative,105,HI,[Democrat],,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13787,Z000018,Ryan,Zinke,,Ryan,Zinke,,,,,,1961,,Representative,114,MT,[Republican],,,
13788,Z000018,Ryan,Zinke,,Ryan,Zinke,,,,,,1961,,Representative,115,MT,[Republican],False,,
13789,Z000018,Ryan,Zinke,,Ryan,Zinke,,,,,,1961,,Representative,118,MT,[Republican],False,,
13790,Z000014,Edwin,Zschau,Van Wyck,Edwin,Zschau,Van Wyck,,,,,1940,,Representative,98,CA,[Republican],,,


In [7]:
combined_df['electionYear'] = (combined_df['congressNumber'] -1 ) * 2 + 1788
combined_df['ageDuringElection'] = combined_df['electionYear'] - combined_df['birthYear']
combined_df

Unnamed: 0,id,givenName,familyName,middleName,unaccentedGivenName,unaccentedFamilyName,unaccentedMiddleName,nickName,honorificPrefix,honorificSuffix,...,deathYear,position,congressNumber,stateName,parties,electionCirca,electionDate,leadershipPositions,electionYear,ageDuringElection
0,A000014,Neil,Abercrombie,,Neil,Abercrombie,,,,,...,,Representative,99,HI,[Democrat],,,,1984,46
1,A000014,Neil,Abercrombie,,Neil,Abercrombie,,,,,...,,Representative,102,HI,[Democrat],,,,1990,52
2,A000014,Neil,Abercrombie,,Neil,Abercrombie,,,,,...,,Representative,103,HI,[Democrat],,,,1992,54
3,A000014,Neil,Abercrombie,,Neil,Abercrombie,,,,,...,,Representative,104,HI,[Democrat],,,,1994,56
4,A000014,Neil,Abercrombie,,Neil,Abercrombie,,,,,...,,Representative,105,HI,[Democrat],,,,1996,58
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13787,Z000018,Ryan,Zinke,,Ryan,Zinke,,,,,...,,Representative,114,MT,[Republican],,,,2014,53
13788,Z000018,Ryan,Zinke,,Ryan,Zinke,,,,,...,,Representative,115,MT,[Republican],False,,,2016,55
13789,Z000018,Ryan,Zinke,,Ryan,Zinke,,,,,...,,Representative,118,MT,[Republican],False,,,2022,61
13790,Z000014,Edwin,Zschau,Van Wyck,Edwin,Zschau,Van Wyck,,,,...,,Representative,98,CA,[Republican],,,,1982,42


In [9]:
combined_df.to_csv("../DATA/house_biographic.csv")