In [2]:
import os
import json
import pandas as pd

In [3]:
def flatten_json_concatenated(json_obj, prefix=''):
    """
    Recursively flattens a JSON object into a dictionary and concatenates hierarchy field names
    """
    flat_dict = {}
    if isinstance(json_obj, dict):
        for key, value in json_obj.items():
            new_prefix = prefix + key + '_' if prefix else key + '_'
            flat_dict.update(flatten_json_concatenated(value, new_prefix))
    elif isinstance(json_obj, list):
        for i in range(len(json_obj)):
            flat_dict.update(flatten_json_concatenated(json_obj[i], prefix))
    else:
        flat_dict[prefix[:-1]] = json_obj
    return flat_dict

In [4]:
def read_json_files_in_directory(directory_path):
    """
    Reads all JSON files in a directory and returns a dictionary where the file name without extension is the key and the value is the JSON data
    """
    json_data_dict = {}
    for filename in os.listdir(directory_path):
        if filename.endswith(".json"):
            file_path = os.path.join(directory_path, filename)
            with open(file_path, "r") as f:
                json_data = json.load(f)
                key = os.path.splitext(filename)[0]
                json_data_dict[key] = json_data
    return json_data_dict


In [14]:
json_file_objects = read_json_files_in_directory(r'C:\Users\aghor\OneDrive\Documents\GitHub\congress_resume\resumes')

df = pd.DataFrame()
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

for json_file_name, json_file_obj in json_file_objects.items():
    file_dict = flatten_json_concatenated(json_file_obj)
    file_series = pd.Series(file_dict, name=json_file_name)
    df = pd.concat([df, file_series], axis=1)

In [19]:
for index in df.index:
    print(index)
# print(df.index[0])

url
created_date
congress
session
report_date
legislative_activity_start_date
legislative_activity_end_date
legislative_activity_senate_days_in_session
legislative_activity_senate_time_in_session
legislative_activity_senate_congressional_record_pages_of_proceedings
legislative_activity_senate_public_bills_enacted_into_law
legislative_activity_senate_private_bills_enacted_into_law
legislative_activity_senate_bills_in_conference
legislative_activity_senate_measures_passed_total_total
legislative_activity_senate_measures_passed_total_senate_bills
legislative_activity_senate_measures_passed_total_house_bills
legislative_activity_senate_measures_passed_total_senate_joint_resolutions
legislative_activity_senate_measures_passed_total_house_joint_resolutions
legislative_activity_senate_measures_passed_total_senate_concurrent_resolutions
legislative_activity_senate_measures_passed_total_house_concurrent_resolutions
legislative_activity_senate_measures_passed_total_simple_resolutions
legislative

In [21]:
df.loc[['created_date', 'congress', 'session', 'report_date',
    'legislative_activity_start_date', 'legislative_activity_end_date',
    'legislative_activity_senate_days_in_session', 
    'legislative_activity_senate_time_in_session']]

Unnamed: 0,100_1,100_2,101_1,101_2,102_1,102_2,103_1,103_2,104_1,104_2,105_1,105_2,106_1,106_2,107_1,107_2,108_1,108_2,109_1,109_2,110_1,110_2,111_1,111_2,112_1,112_2,113_1,113_2,114_1,114_2,115_1,115_2,116_1,116_2,117_1,117_2,80_1,80_2,81_1,81_2,82_1,82_2,83_1,83_2,84_1,84_2,85_1,85_2,86_1,86_2,87_1,87_2,88_1,88_2,89_1,89_2,90_1,90_2,91_1,91_2,92_1,92_2,93_1,93_2,94_1,94_2,95_1,95_2,96_1,96_2,97_1,97_2,98_1,98_2,99_1,99_2
created_date,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-02T08:00:00.000Z,2023-02-01T08:00:00.000Z,2023-02-01T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z,2023-03-19T08:00:00.000Z
congress,100,100,101,101,102,102,103,103,104,104,105,105,106,106,107,107,108,108,109,109,110,110,111,111,112,112,113,113,114,114,115,115,116,116,117,117,80,80,81,81,82,82,83,83,84,84,85,85,86,86,87,87,88,88,89,89,90,90,91,91,92,92,93,93,94,94,95,95,96,96,97,97,98,98,99,99
session,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,2,1,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,1,1,2,1,2,1,2,1,2,1,2,1,2,1,1,1,2,1,2,1,2,1,2,1,2,1,2
report_date,1988-11-10T05:00:00.000Z,1988-11-10T05:00:00.000Z,1991-01-03T05:00:00.000Z,1991-01-03T05:00:00.000Z,1993-01-05T05:00:00.000Z,1993-01-05T05:00:00.000Z,1994-12-20T05:00:00.000Z,1994-12-20T05:00:00.000Z,1997-01-07T05:00:00.000Z,1997-01-07T05:00:00.000Z,1999-01-19T00:00:00.000Z,1999-01-19T00:00:00.000Z,2001-01-30T05:00:00.000Z,2001-01-30T05:00:00.000Z,2003-05-06T05:00:00.000Z,2003-05-06T05:00:00.000Z,2005-02-15T05:00:00.000Z,2005-02-15T05:00:00.000Z,2006-03-02T05:00:00.000Z,,,,2010-03-19T05:00:00.000Z,2011-03-29T05:00:00.000Z,2012-03-07T00:00:00.000Z,2013-03-13T00:00:00.000Z,2014-02-27T05:00:00.000Z,2015-03-04T05:00:00.000Z,2016-03-16T05:00:00.000Z,2017-03-09T05:00:00Z,2018-03-15T05:00:00.000Z,2019-04-11T05:00:00.000Z,2021-04-20T05:00:00.000Z,2020-01-07T05:00:00.000Z,2022-04-28T05:00:00.000Z,2023-01-24T05:00:00.000Z,,,1950-01-02T05:00:00.000Z,1950-01-02T05:00:00.000Z,1952-07-07T05:00:00.000Z,1952-07-07T05:00:00.000Z,1954-12-02T05:00:00.000Z,1954-12-02T05:00:00.000Z,1956-07-27T05:00:00.000Z,1956-07-27T05:00:00.000Z,1958-08-23T05:00:00.000Z,1958-08-23T05:00:00.000Z,1960-09-01T05:00:00.000Z,1960-09-01T05:00:00.000Z,1962-10-13T05:00:00.000Z,1962-10-13T05:00:00.000Z,1964-10-03T05:00:00.000Z,1964-10-03T05:00:00.000Z,1966-10-22T05:00:00.000Z,1966-10-22T05:00:00.000Z,1968-10-14T05:00:00.000Z,1968-10-14T05:00:00.000Z,1971-01-02T05:00:00.000Z,1971-01-02T05:00:00.000Z,1972-10-18T05:00:00.000Z,1972-10-18T05:00:00.000Z,1974-12-20T05:00:00.000Z,1974-12-20T05:00:00.000Z,1976-10-01T05:00:00.000Z,1976-10-01T00:00:00.000Z,1978-10-14T05:00:00.000Z,1978-10-14T05:00:00.000Z,1980-12-30T05:00:00.000Z,1980-12-30T05:00:00.000Z,1983-01-25T05:00:00.000Z,1983-01-25T05:00:00.000Z,1984-11-14T05:00:00.000Z,1984-11-14T05:00:00.000Z,1987-01-06T00:00:00.000Z,1987-01-06T00:00:00.000Z
legislative_activity_start_date,1987-01-06T05:00:00.000Z,1988-01-25T05:00:00.000Z,1989-01-03T05:00:00.000Z,1990-01-23T05:00:00.000Z,1991-01-03T05:00:00.000Z,1992-01-03T05:00:00.000Z,1993-01-05T05:00:00.000Z,1994-01-25T05:00:00.000Z,1995-01-04T05:00:00.000Z,1996-01-03T05:00:00.000Z,1997-01-03T00:00:00.000Z,1998-01-27T00:00:00.000Z,1999-01-06T05:00:00.000Z,2000-01-24T05:00:00.000Z,2001-01-03T05:00:00.000Z,2002-01-23T05:00:00.000Z,2020-01-03T05:00:00.000Z,2004-01-03T05:00:00.000Z,2005-01-04T05:00:00.000Z,2006-01-03T05:00:00.000Z,2007-01-04T05:00:00.000Z,2008-01-03T05:00:00.000Z,2009-01-06T05:00:00.000Z,2010-01-05T05:00:00.000Z,2011-01-05T00:00:00.000Z,2012-01-03T00:00:00.000Z,2013-01-03T05:00:00.000Z,2014-01-03T05:00:00.000Z,2015-01-06T05:00:00.000Z,2016-01-04T05:00:00Z,2017-01-03T05:00:00.000Z,2018-01-03T05:00:00.000Z,2020-01-03T05:00:00.000Z,2019-01-03T05:00:00.000Z,2021-01-03T05:00:00.000Z,2022-01-03T05:00:00.000Z,1947-01-03T05:00:00.000Z,1948-01-06T05:00:00.000Z,1949-01-03T05:00:00.000Z,1950-01-03T05:00:00.000Z,1951-01-03T05:00:00.000Z,1952-01-08T05:00:00.000Z,1953-01-03T05:00:00.000Z,1954-01-06T05:00:00.000Z,1955-01-05T05:00:00.000Z,1956-01-03T05:00:00.000Z,1957-01-03T05:00:00.000Z,1958-01-07T05:00:00.000Z,1959-01-07T05:00:00.000Z,1960-01-06T05:00:00.000Z,1961-01-03T05:00:00.000Z,1962-01-10T05:00:00.000Z,1963-01-09T05:00:00.000Z,1964-01-07T05:00:00.000Z,1965-01-04T05:00:00.000Z,1966-01-10T05:00:00.000Z,1967-01-10T05:00:00.000Z,1968-01-15T05:00:00.000Z,1969-01-03T05:00:00.000Z,1970-01-19T05:00:00.000Z,1971-01-21T05:00:00.000Z,1972-01-18T05:00:00.000Z,1973-01-03T05:00:00.000Z,1974-01-21T05:00:00.000Z,1975-01-14T05:00:00.000Z,1976-01-19T00:00:00.000Z,1977-01-04T00:00:00.000Z,1978-01-19T05:00:00.000Z,,1980-01-03T05:00:00.000Z,1981-01-05T05:00:00.000Z,,1983-01-03T05:00:00.000Z,1984-01-23T05:00:00.000Z,1985-01-03T00:00:00.000Z,1986-01-21T00:00:00.000Z
legislative_activity_end_date,1987-12-22T05:00:00.000Z,1988-10-22T05:00:00.000Z,1989-11-22T05:00:00.000Z,1990-10-28T05:00:00.000Z,1992-01-03T05:00:00.000Z,1992-10-09T05:00:00.000Z,1993-11-26T05:00:00.000Z,1994-12-01T05:00:00.000Z,1996-01-03T05:00:00.000Z,1996-10-03T05:00:00.000Z,1997-11-13T00:00:00.000Z,1998-10-21T00:00:00.000Z,1999-11-22T05:00:00.000Z,2000-12-15T05:00:00.000Z,2001-12-20T05:00:00.000Z,2002-11-22T05:00:00.000Z,2021-01-03T05:00:00.000Z,2004-12-31T05:00:00.000Z,2005-12-22T05:00:00.000Z,2006-12-31T05:00:00.000Z,2007-12-31T05:00:00.000Z,2009-01-03T05:00:00.000Z,2009-12-31T05:00:00.000Z,2010-12-31T05:00:00.000Z,2012-01-03T00:00:00.000Z,2013-01-03T00:00:00.000Z,2014-01-03T05:00:00.000Z,2015-01-02T05:00:00.000Z,2015-12-31T05:00:00.000Z,2017-01-03T05:00:00Z,2018-01-03T05:00:00.000Z,2019-01-03T05:00:00.000Z,2021-01-03T05:00:00.000Z,2020-01-03T05:00:00.000Z,2022-01-03T05:00:00.000Z,2023-01-03T05:00:00.000Z,1947-12-19T05:00:00.000Z,1948-12-31T05:00:00.000Z,1949-10-19T05:00:00.000Z,1951-01-02T05:00:00.000Z,1951-10-20T05:00:00.000Z,1952-07-07T05:00:00.000Z,1953-08-03T05:00:00.000Z,1954-12-02T05:00:00.000Z,1955-08-02T05:00:00.000Z,1956-07-27T05:00:00.000Z,1957-08-30T05:00:00.000Z,1958-08-24T05:00:00.000Z,1959-09-14T05:00:00.000Z,1960-09-01T05:00:00.000Z,1961-09-26T05:00:00.000Z,1962-10-13T05:00:00.000Z,1963-12-30T05:00:00.000Z,1964-10-03T05:00:00.000Z,1965-10-23T05:00:00.000Z,1966-10-22T05:00:00.000Z,1967-12-15T05:00:00.000Z,1968-10-14T05:00:00.000Z,1969-12-23T05:00:00.000Z,1971-01-02T05:00:00.000Z,1971-12-17T05:00:00.000Z,1972-10-18T05:00:00.000Z,1973-12-22T05:00:00.000Z,1974-12-20T05:00:00.000Z,1975-12-19T05:00:00.000Z,1976-10-01T00:00:00.000Z,1977-12-15T00:00:00.000Z,1978-10-15T05:00:00.000Z,,1980-12-16T05:00:00.000Z,1981-12-16T05:00:00.000Z,,1983-11-18T05:00:00.000Z,1984-10-12T05:00:00.000Z,1985-12-20T00:00:00.000Z,1986-10-18T00:00:00.000Z
legislative_activity_senate_days_in_session,170,137,136,138,158,129,153,138,211,132,153,143,162,141,173,149,167,133,159,138,190,184,191,158,170,153,156,136,168,165,195,191,192,187,192,178,143,114,186,203,172,115,125,169,105,119,133,138,140,140,146,177,189,186,177,168,200,158,176,208,186,162,184,168,178,142,178,159,167,166,165,147,150,131,170,143
legislative_activity_senate_time_in_session,P1214M52S,PT1126H52M,P1003H19M,PT1250H14M,PT1200H44M,PT1091H09M,P1269H42M,P1243H33M,P1839H10M,P1036H45M,PT1093H7M,P1095H05M,PT1183H57M,PT1017H51M,PT1236H15M,PT1043H23M,1454:05,P1031H31M,PT1222H26M,PT1027H48M,PT1375H54M,PT988H30M,PT1420H39M,PT1074H40M,P1101H44M,PT930H12M,PT1095H12M,PT908H15M,PT1073H39M,PT780H58M,PT1166H34M,PT1015H29M,PT963H52M,PT947H46M,PT1083H11M,PT958H32M,PT807H36M,PT653H37M,PT1144H53M,PT1265H09M,PT996H46M,PT651H24M,PT763H35M,PT1198H17M,P559H41M,PT801H42M,PT860H44M,PT1014H45M,PT1009H48M,PT1188H49M,PT1005H0M,PT1159H22M,PT1044H43M,PT1350H25M,PT960H50M,PT852H54M,PT1090H48M,PT870H11M,PT926H56M,P1424H51M,PT1157H14M,PT1137H23M,PT1084H13M,PT1068H09M,PT1177H11M,PT1033H01M,PT1143H42M,PT1366H22M,PT1159H01M,PT1165H10M,PT1079H54M,PT1079H55M,PT1010H47M,PT940H28M,PT1252H31M,PT1278H15M
