In [15]:
import datetime
import isodate

import os
import json
import pandas as pd
import numpy as np

In [2]:
def flatten_json_concatenated(json_obj, prefix=''):
    """
    Recursively flattens a JSON object into a dictionary and concatenates hierarchy field names
    """
    flat_dict = {}
    if isinstance(json_obj, dict):
        for key, value in json_obj.items():
            new_prefix = prefix + key + '_' if prefix else key + '_'
            flat_dict.update(flatten_json_concatenated(value, new_prefix))
    elif isinstance(json_obj, list):
        for i in range(len(json_obj)):
            flat_dict.update(flatten_json_concatenated(json_obj[i], prefix))
    else:
        flat_dict[prefix[:-1]] = json_obj
    return flat_dict

In [3]:
def read_json_files_in_directory(directory_path):
    """
    Reads all JSON files in a directory and returns a dictionary where the file name without extension is the key and the value is the JSON data
    """
    json_data_dict = {}
    for filename in os.listdir(directory_path):
        if filename.endswith(".json"):
            file_path = os.path.join(directory_path, filename)
            with open(file_path, "r") as f:
                json_data = json.load(f)
                key = os.path.splitext(filename)[0]
                json_data_dict[key] = json_data
    return json_data_dict


In [4]:
json_file_objects = read_json_files_in_directory(r'C:\Users\aghor\OneDrive\Documents\GitHub\congress_resume\resumes')

df = pd.DataFrame()
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

for json_file_name, json_file_obj in json_file_objects.items():
    file_dict = flatten_json_concatenated(json_file_obj)
    file_series = pd.Series(file_dict, name=json_file_name)
    df = pd.concat([df, file_series], axis=1)
    
df = df.T

In [5]:
df.head(2)

Unnamed: 0,url,created_date,congress,session,report_date,legislative_activity_start_date,legislative_activity_end_date,legislative_activity_senate_days_in_session,legislative_activity_senate_time_in_session,legislative_activity_senate_congressional_record_pages_of_proceedings,legislative_activity_senate_public_bills_enacted_into_law,legislative_activity_senate_private_bills_enacted_into_law,legislative_activity_senate_bills_in_conference,legislative_activity_senate_measures_passed_total_total,legislative_activity_senate_measures_passed_total_senate_bills,legislative_activity_senate_measures_passed_total_house_bills,legislative_activity_senate_measures_passed_total_senate_joint_resolutions,legislative_activity_senate_measures_passed_total_house_joint_resolutions,legislative_activity_senate_measures_passed_total_senate_concurrent_resolutions,legislative_activity_senate_measures_passed_total_house_concurrent_resolutions,legislative_activity_senate_measures_passed_total_simple_resolutions,legislative_activity_senate_measures_reported_total_total,legislative_activity_senate_measures_reported_total_senate_bills,legislative_activity_senate_measures_reported_total_house_bills,legislative_activity_senate_measures_reported_total_senate_joint_resolutions,legislative_activity_senate_measures_reported_total_house_joint_resolutions,legislative_activity_senate_measures_reported_total_senate_concurrent_resolutions,legislative_activity_senate_measures_reported_total_house_concurrent_resolutions,legislative_activity_senate_measures_reported_total_simple_resolutions,legislative_activity_senate_special_reports,legislative_activity_senate_conference_reports,legislative_activity_senate_measures_pending_on_calendar,legislative_activity_senate_measures_introduced_total_total,legislative_activity_senate_measures_introduced_total_bills,legislative_activity_senate_measures_introduced_total_joint_resolutions,legislative_activity_senate_measures_introduced_total_concurrent_resolutions,legislative_activity_senate_measures_introduced_total_simple_resolutions,legislative_activity_senate_quorum_calls,legislative_activity_senate_yea_and_nay_votes,legislative_activity_senate_bills_vetoed,legislative_activity_senate_vetoes_overridden,legislative_activity_house_days_in_session,legislative_activity_house_time_in_session,legislative_activity_house_congressional_record_pages_of_proceedings,legislative_activity_house_public_bills_enacted_into_law,legislative_activity_house_private_bills_enacted_into_law,legislative_activity_house_bills_in_conference,legislative_activity_house_measures_passed_total_total,legislative_activity_house_measures_passed_total_senate_bills,legislative_activity_house_measures_passed_total_house_bills,legislative_activity_house_measures_passed_total_senate_joint_resolutions,legislative_activity_house_measures_passed_total_house_joint_resolutions,legislative_activity_house_measures_passed_total_senate_concurrent_resolutions,legislative_activity_house_measures_passed_total_house_concurrent_resolutions,legislative_activity_house_measures_passed_total_simple_resolutions,legislative_activity_house_measures_reported_total_total,legislative_activity_house_measures_reported_total_senate_bills,legislative_activity_house_measures_reported_total_house_bills,legislative_activity_house_measures_reported_total_house_joint_resolutions,legislative_activity_house_measures_reported_total_house_concurrent_resolutions,legislative_activity_house_measures_reported_total_simple_resolutions,legislative_activity_house_special_reports,legislative_activity_house_conference_reports,legislative_activity_house_measures_pending_on_calendar,legislative_activity_house_measures_introduced_total_total,legislative_activity_house_measures_introduced_total_bills,legislative_activity_house_measures_introduced_total_joint_resolutions,legislative_activity_house_measures_introduced_total_concurrent_resolutions,legislative_activity_house_measures_introduced_total_simple_resolutions,legislative_activity_house_quorum_calls,legislative_activity_house_yea_and_nay_votes,legislative_activity_house_recorded_votes,legislative_activity_house_bills_vetoed,legislative_activity_house_vetoes_overridden,legislative_activity_total_congressional_record_pages_of_proceedings,legislative_activity_total_congressional_record_extensions_of_remarks,legislative_activity_total_public_bills_enacted_into_law,legislative_activity_total_private_bills_enacted_into_law,legislative_activity_total_measures_passed_total_total,legislative_activity_total_measures_reported_total_total,legislative_activity_total_measures_introduced_total_total,legislative_activity_total_bills_vetoed,disposition_of_executive_nominations_start_date,disposition_of_executive_nominations_end_date,disposition_of_executive_nominations_civilian_nominees_total,disposition_of_executive_nominations_civilian_nominees_disposition_confirmed,disposition_of_executive_nominations_civilian_nominees_disposition_unconfirmed,disposition_of_executive_nominations_civilian_nominees_disposition_withdrawn,disposition_of_executive_nominations_civilian_nominees_disposition_rejected,disposition_of_executive_nominations_civilian_nominees_disposition_returned_to_white_house_1,disposition_of_executive_nominations_civilian_nominees_disposition_returned_to_white_house_2,disposition_of_executive_nominations_other_civilian_nominees_total,disposition_of_executive_nominations_other_civilian_nominees_disposition_confirmed,disposition_of_executive_nominations_other_civilian_nominees_disposition_unconfirmed,disposition_of_executive_nominations_air_force_nominees_total,disposition_of_executive_nominations_air_force_nominees_disposition_confirmed,disposition_of_executive_nominations_air_force_nominees_disposition_unconfirmed,disposition_of_executive_nominations_air_force_nominees_disposition_returned_to_white_house,disposition_of_executive_nominations_army_nominees_total,disposition_of_executive_nominations_army_nominees_disposition_confirmed,disposition_of_executive_nominations_army_nominees_disposition_unconfirmed,disposition_of_executive_nominations_army_nominees_disposition_returned_to_white_house,disposition_of_executive_nominations_navy_nominees_total,disposition_of_executive_nominations_navy_nominees_disposition_confirmed,disposition_of_executive_nominations_navy_nominees_disposition_unconfirmed,disposition_of_executive_nominations_marine_corps_nominees_total,disposition_of_executive_nominations_marine_corps_nominees_disposition_confirmed,disposition_of_executive_nominations_marine_corps_nominees_disposition_unconfirmed,disposition_of_executive_nominations_marine_corps_nominees_disposition_returned_to_white_house,disposition_of_executive_nominations_summary_total_nominees_received_this_session,disposition_of_executive_nominations_summary_total_confirmed,disposition_of_executive_nominations_summary_total_unconfirmed,disposition_of_executive_nominations_summary_total_withdrawn,disposition_of_executive_nominations_summary_total_rejected,disposition_of_executive_nominations_summary_total_returned_to_white_house,footnote_resume.legislative_activity.senate.measures_reported_total.total,footnote_resume.legislative_activity.house.measures_reported_total.total,footnote_resume.disposition_of_executive_nominations.civilian_nominees.disposition.returned_to_white_house_1,footnote_resume.disposition_of_executive_nominations.civilian_nominees.disposition.returned_to_white_house_2,legislative_activity_house_measures_reported_total_senate_joint_resolutions,legislative_activity_house_measures_reported_total_senate_concurrent_resolutions,legislative_activity_house_measures_introduced_total_field1,legislative_activity_house_measures_introduced_total_field2,disposition_of_executive_nominations_civilian_nominees_carried_over_from_first_session,disposition_of_executive_nominations_other_civilian_nominees_carried_over_from_first_session,disposition_of_executive_nominations_air_force_nominees_carried_over_from_first_session,disposition_of_executive_nominations_army_nominees_carried_over_from_first_session,disposition_of_executive_nominations_army_nominees_disposition_withdrawn,disposition_of_executive_nominations_navy_nominees_carried_over_from_first_session,disposition_of_executive_nominations_navy_nominees_disposition_withdrawn,disposition_of_executive_nominations_marine_corps_nominees_carried_over_from_first_session,disposition_of_executive_nominations_summary_total_nominees_carried_over_from_first_session,legislative_activity_house_congressional_record_extensions_of_remarks,disposition_of_executive_nominations_civilian_nominees_disposition_failed_at_aug_sept_adjournment,disposition_of_executive_nominations_civilian_nominees_disposition_failed_at_sine_die_adjournment,disposition_of_executive_nominations_other_civilian_nominees_disposition_failed_at_sine_die_adjournment,disposition_of_executive_nominations_army_nominees_disposition_failed_at_sine_die_adjournment,disposition_of_executive_nominations_summary_total_failed_at_aug_sept_adjournment,disposition_of_executive_nominations_summary_total_failed_at_sine_die_adjournment,legislative_activity_senate_congressional_record_extensions_of_remarks,legislative_activity_senate_bills_through_conference,legislative_activity_house_bills_through_conference,disposition_of_executive_nominations_air_force_nominees_disposition_withdrawn,legislative_activity_senate_bills_not_signed,legislative_activity_total_bills_in_conference,legislative_activity_total_bills_not_signed,disposition_of_executive_nominations_civilian_nominees_disposition_failed_at_adjournment,disposition_of_executive_nominations_other_civilian_nominees_disposition_withdrawn,disposition_of_executive_nominations_summary_total_failed_at_ajournment,footnote_resume.legislative_activity.senate.bills_not_signed,disposition_of_executive_nominations_civilian_nominees_disposition_returned_at_sine_die_adjournment,disposition_of_executive_nominations_marine_corps_nominees_disposition_withdrawn,disposition_of_executive_nominations_summary_total_returned_at_sine_die_adjournment,legislative_activity_senate_measures_reported_total_sentate_concurrent_resolutions,disposition_of_executive_nominations_civilian_nominees_disposition_return_at_sine_die_adjournment,disposition_of_executive_nominations_navy_nominees_disposition_return_at_sine_die_adjournment,disposition_of_executive_nominations_civilian_nominees_disposition_returned_to_white_house,disposition_of_executive_nominations_other_civilian_nominees_disposition_returned_to_white_house,disposition_of_executive_nominations_navy_nominees_disposition_returned_to_white_house,legislative_activity_senate_recorded_votes,legislative_activity_senate_measures_reported_total_senate _concurrent_resolutions,legislative_activity_senate_bill_in_conference,legislative_activity_total_public_bill_enacted_into_law,disposition_of_executive_nominations_summary_returned_to_white_house,legislative_activity_house_measures_introduced_total_quorum_calls,legislative_activity_house_measures_introduced_total_yea_and_nay_votes,legislative_activity_house_measures_introduced_total_recorded_votes,legislative_activity_house_measures_introduced_total_bills_vetoed,legislative_activity_house_measures_introduced_total_vetoes_overridden,disposition_of_executive_nominations_other_civilian_nominees_disposition_uncomfirmed,disposition_of_executive_nominations_space_force_nominees_total,disposition_of_executive_nominations_space_force_nominees_disposition_confirmed,legislative_activity_total_public_bills_enacted_into_law_total,disposition_of_executive_nominations_space_force_nominees_disposition_unconfirmed,disposition_of_executive_nominations_space_force_nominees_disposition_withdrawn,disposition_of_executive_nominations_space_force_nominees_carried_over_from_first_session,legislative_activity_senate_reported_measures_not_acted_on,legislative_activity_senate_bills_vetoed_total,legislative_activity_senate_bills_vetoed_regular_vetoes,legislative_activity_senate_bills_vetoed_pocket_vetoes,legislative_activity_house_reported_measures_not_acted_on,legislative_activity_house_bills_vetoed_total,legislative_activity_house_bills_vetoed_regular_vetoes,legislative_activity_house_bills_vetoed_pocket_vetoes,legislative_activity_total_congressional_record_appendix,legislative_activity_total_bill_in_conference,legislative_activity_total_bills_through_conference,legislative_activity_total_measures_passed_total_simple_resolutions,legislative_activity_total_measures_reported_total_simple_resolutions,legislative_activity_total_conference_reports,legislative_activity_total_measures_introduced_total_bills,legislative_activity_total_measures_introduced_total_joint_resolutions,legislative_activity_total_measures_introduced_total_concurrent_resolutions,legislative_activity_total_measures_introduced_total_simple_resolutions,legislative_activity_total_quorum_calls,legislative_activity_total_yea_and_nay_votes,legislative_activity_total_bills_vetoed_total,legislative_activity_total_bills_vetoed_regular_vetoes,legislative_activity_total_bills_vetoed_pocket_vetoes,legislative_activity_total_vetoes_overridden,disposition_of_executive_nominations_postmaster_nominees_total,disposition_of_executive_nominations_postmaster_nominees_disposition_confirmed,disposition_of_executive_nominations_postmaster_nominees_disposition_withdrawn,disposition_of_executive_nominations_postmaster_nominees_disposition_unconfirmed,footnote_resume.legislative_activity.senate.reported_measures_not_acted_on,footnote_resume.legislative_activity.house.reported_measures_not_acted_on,legislative_activity_senate_regular_vetoes,legislative_activity_senate_pocket_vetoes,legislative_activity_house_regular_vetoes,legislative_activity_house_pocket_vetoes,legislative_activity_total_regular_vetoes,legislative_activity_total_pocket_vetoes,legislative_activity_senate_measures_passed_total_bills,legislative_activity_total_measures_passed_total_senate_bills,legislative_activity_total_measures_passed_total_house_bills,legislative_activity_total_measures_passed_total_senate_joint_resolutions,legislative_activity_total_measures_passed_total_house_joint_resolutions,legislative_activity_total_measures_passed_total_senate_concurrent_resolutions,legislative_activity_total_measures_passed_total_house_concurrent_resolutions,legislative_activity_total_measures_reported_total_senate_bills,legislative_activity_total_measures_reported_total_house_bills,legislative_activity_total_measures_reported_total_senate_joint_resolutions,legislative_activity_total_measures_reported_total_house_joint_resolutions,legislative_activity_total_measures_reported_total_senate_concurrent_resolutions,legislative_activity_total_measures_reported_total_house_concurrent_resolutions,disposition_of_executive_nominations_postmaster_nominees_disposition_rejected,footnote_resume.legislative_activity.senate.special_reports,footnote_resume.legislative_activity.house.special_reports,disposition_of_executive_nominations_summary_withdrawn,house_days_in_session,house_time_in_session,house_congressional_record_pages_of_proceedings,house_public_bills_enacted_into_law,house_private_bills_enacted_into_law,house_bills_in_conference,house_bills_through_conference,house_measures_passed_total_total,house_measures_passed_total_senate_bills,house_measures_passed_total_house_bills,house_measures_passed_total_senate_joint_resolutions,house_measures_passed_total_house_joint_resolutions,house_measures_passed_total_senate_concurrent_resolutions,house_measures_passed_total_house_concurrent_resolutions,house_measures_passed_total_simple_resolutions,house_measures_reported_total_total,house_measures_reported_total_senate_bills,house_measures_reported_total_house_bills,house_measures_reported_total_senate_joint_resolutions,house_measures_reported_total_house_joint_resolutions,house_measures_reported_total_senate_concurrent_resolutions,house_measures_reported_total_house_concurrent_resolutions,house_measures_reported_total_simple_resolutions,house_special_reports,house_reported_measures_not_acted_on,house_measures_introduced_total_total,house_measures_introduced_total_bills,house_measures_introduced_total_joint_resolutions,house_measures_introduced_total_concurrent_resolutions,house_measures_introduced_total_simple_resolutions,house_quorum_calls,house_yea_and_nay_votes,house_bills_vetoed,total_congressional_record_pages_of_proceedings,total_congressional_record_appendix,total_public_bills_enacted_into_law,total_private_bills_enacted_into_law,total_measures_passed_total_simple_resolutions,total_measures_reported_total_simple_resolutions,total_measures_introduced_total_total,total_measures_introduced_total_bills,total_measures_introduced_total_joint_resolutions,total_measures_introduced_total_concurrent_resolutions,total_measures_introduced_total_simple_resolutions,total_quorum_calls,total_yea_and_nay_votes,legislative_activity_total_reported_measures_not_acted_on,footnote_resume.legislative_activity.senate.quorum_calls,footnote_resume.legislative_activity.senate.yea_and_nay_votes,footnote_resume.legislative_activity.disposition_of_executive_nominations.army_nominees.disposition.returned_to_white_house,footnote_resume.legislative_activity.disposition_of_executive_nominations.navy_nominees.disposition.returned_to_white_house,footnote_resume.legislative_activity.disposition_of_executive_nominations.civilian_nominees.disposition.returned_to_white_house,footnote_resume.legislative_activity.disposition_of_executive_nominations.summary.total_returned_to_white_house,footnote_resume.disposition_of_executive_nominations.civilian_nominees.disposition.unconfirmed,footnote_resume.disposition_of_executive_nominations.summary.total_nominees_received_this_session,legislative_activity_congressional_record_pages_of_proceedings,legislative_activity_congressional_record_extensions_of_remarks,legislative_activity_public_bills_enacted_into_law,legislative_activity_private_bills_enacted_into_law,legislative_activity_bills_in_conference,legislative_activity_bills_through_conference,legislative_activity_house_start_date,legislative_activity_house_end_date,disposition_of_executive_nominations_civilian_nominees_disposition_superseded_by_renominations_confirmations,disposition_of_executive_nominations_civilian_nominees_disposition_failed_of_confirmation_ontime,disposition_of_executive_nominations_summary_total_failed_of_confirmation_ontime,disposition_of_executive_nominations_summary_total_superseded_by_renominations_confirmations,disposition_of_executive_nominations_army_nominees_disposition_failed_at_first_adjournment,disposition_of_executive_nominations_army_nominees_disposition_failed_at_second_adjournment,disposition_of_executive_nominations_air_force_nominees_disposition_failed,disposition_of_executive_nominations_civilian_nominees_disposition_failed_at_first_adjournment,disposition_of_executive_nominations_civilian_nominees_disposition_failed_at_second_adjournment,disposition_of_executive_nominations_summary_total_failed_at_first_adjournment,disposition_of_executive_nominations_summary_total_failed_at_second_adjournment,disposition_of_executive_nominations_civilian_nominees_disposition_recess_appointments,disposition_of_executive_nominations_summary_total_recess_appointments,disposition_of_executive_nominations_civilian_nominees_disposition_failed_of_confirmation_at_aug_sept_adjournment,disposition_of_executive_nominations_civilian_nominees_disposition_failed_of_confirmation_at_sine_die_adjournment,disposition_of_executive_nominations_civilian_nominees_disposition_superceded_by_recess_appointment,disposition_of_executive_nominations_summary_total_failed_of_confirmation_at_aug_sept_adjournment,disposition_of_executive_nominations_summary_total_failed_of_confirmation_at_sine_die_adjournment,disposition_of_executive_nominations_summary_total_superseded_by_recess_appointment
100_1,https://www.senate.gov/reference/resources/pdf...,2023-03-19T08:00:00.000Z,100,1,1988-11-10T05:00:00.000Z,1987-01-06T05:00:00.000Z,1987-12-22T05:00:00.000Z,170,PT1214H52M,18660,96,2,6,616,110,135,91,48,25,31,176,398,183,59,72,6,11,5,62,28,1.0,76,2685,1998,239,95,353,36,420,1,2,169,PT909H57M,13078,144,5,15,644,44,272,63,66,8,53,138,426,9,299,9,7,102,56,16,48,4857,3840,437,235,345,23,234,254,2,2,31738,5016,240.0,7.0,1260,824,7542,3,1987-01-06T05:00:00.000Z,1988-10-22T05:00:00.000Z,470,331,112,10,1.0,7.0,9.0,4248,4238,10,18667,15711,2955.0,1.0,14448,12086,2361,1.0,12101,12055,46,1995,1983,10,2.0,51929,46404,5494,10,1.0,20.0,These figures on measures reported include all...,These figures on measures reported include all...,Nominations returned to the President without ...,Nominations returned to the President without ...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
100_2,https://www.senate.gov/reference/resources/pdf...,2023-03-19T08:00:00.000Z,100,2,1988-11-10T05:00:00.000Z,1988-01-25T05:00:00.000Z,1988-10-22T05:00:00.000Z,137,PT1126H52M,17397,193,10,3,814,188,273,106,51,52,32,112,511,262,100,88,3,14,1,43,13,,108,1328,932,156,74,166,26,379,6,1,129,PT749H01M,11290,280,31,4,858,131,330,85,70,32,46,164,636,34,360,5,5,95,85,49,88,2,1,241,163,263,14,308,143,10,1,28687,3776,,,1672,1147,3740,16,1988-01-25T05:00:00.000Z,1988-10-22T05:00:00.000Z,516,335,170,11,,,,2276,2022,254,15304,15304,,,12861,12858,1,,10205,10203,1,1596,1595,1,,37264,42317,428,13,,,These figures on measures reported include all...,These figures on measures reported include all...,,,2.0,1.0,412.0,745.0,112.0,10.0,2955.0,2361.0,2.0,46.0,1.0,10.0,5494.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [6]:
# for index in df.index:
#     print(index)
# print(df.index[0])

In [7]:
df.loc[['100_1', '80_1']]

Unnamed: 0,url,created_date,congress,session,report_date,legislative_activity_start_date,legislative_activity_end_date,legislative_activity_senate_days_in_session,legislative_activity_senate_time_in_session,legislative_activity_senate_congressional_record_pages_of_proceedings,legislative_activity_senate_public_bills_enacted_into_law,legislative_activity_senate_private_bills_enacted_into_law,legislative_activity_senate_bills_in_conference,legislative_activity_senate_measures_passed_total_total,legislative_activity_senate_measures_passed_total_senate_bills,legislative_activity_senate_measures_passed_total_house_bills,legislative_activity_senate_measures_passed_total_senate_joint_resolutions,legislative_activity_senate_measures_passed_total_house_joint_resolutions,legislative_activity_senate_measures_passed_total_senate_concurrent_resolutions,legislative_activity_senate_measures_passed_total_house_concurrent_resolutions,legislative_activity_senate_measures_passed_total_simple_resolutions,legislative_activity_senate_measures_reported_total_total,legislative_activity_senate_measures_reported_total_senate_bills,legislative_activity_senate_measures_reported_total_house_bills,legislative_activity_senate_measures_reported_total_senate_joint_resolutions,legislative_activity_senate_measures_reported_total_house_joint_resolutions,legislative_activity_senate_measures_reported_total_senate_concurrent_resolutions,legislative_activity_senate_measures_reported_total_house_concurrent_resolutions,legislative_activity_senate_measures_reported_total_simple_resolutions,legislative_activity_senate_special_reports,legislative_activity_senate_conference_reports,legislative_activity_senate_measures_pending_on_calendar,legislative_activity_senate_measures_introduced_total_total,legislative_activity_senate_measures_introduced_total_bills,legislative_activity_senate_measures_introduced_total_joint_resolutions,legislative_activity_senate_measures_introduced_total_concurrent_resolutions,legislative_activity_senate_measures_introduced_total_simple_resolutions,legislative_activity_senate_quorum_calls,legislative_activity_senate_yea_and_nay_votes,legislative_activity_senate_bills_vetoed,legislative_activity_senate_vetoes_overridden,legislative_activity_house_days_in_session,legislative_activity_house_time_in_session,legislative_activity_house_congressional_record_pages_of_proceedings,legislative_activity_house_public_bills_enacted_into_law,legislative_activity_house_private_bills_enacted_into_law,legislative_activity_house_bills_in_conference,legislative_activity_house_measures_passed_total_total,legislative_activity_house_measures_passed_total_senate_bills,legislative_activity_house_measures_passed_total_house_bills,legislative_activity_house_measures_passed_total_senate_joint_resolutions,legislative_activity_house_measures_passed_total_house_joint_resolutions,legislative_activity_house_measures_passed_total_senate_concurrent_resolutions,legislative_activity_house_measures_passed_total_house_concurrent_resolutions,legislative_activity_house_measures_passed_total_simple_resolutions,legislative_activity_house_measures_reported_total_total,legislative_activity_house_measures_reported_total_senate_bills,legislative_activity_house_measures_reported_total_house_bills,legislative_activity_house_measures_reported_total_house_joint_resolutions,legislative_activity_house_measures_reported_total_house_concurrent_resolutions,legislative_activity_house_measures_reported_total_simple_resolutions,legislative_activity_house_special_reports,legislative_activity_house_conference_reports,legislative_activity_house_measures_pending_on_calendar,legislative_activity_house_measures_introduced_total_total,legislative_activity_house_measures_introduced_total_bills,legislative_activity_house_measures_introduced_total_joint_resolutions,legislative_activity_house_measures_introduced_total_concurrent_resolutions,legislative_activity_house_measures_introduced_total_simple_resolutions,legislative_activity_house_quorum_calls,legislative_activity_house_yea_and_nay_votes,legislative_activity_house_recorded_votes,legislative_activity_house_bills_vetoed,legislative_activity_house_vetoes_overridden,legislative_activity_total_congressional_record_pages_of_proceedings,legislative_activity_total_congressional_record_extensions_of_remarks,legislative_activity_total_public_bills_enacted_into_law,legislative_activity_total_private_bills_enacted_into_law,legislative_activity_total_measures_passed_total_total,legislative_activity_total_measures_reported_total_total,legislative_activity_total_measures_introduced_total_total,legislative_activity_total_bills_vetoed,disposition_of_executive_nominations_start_date,disposition_of_executive_nominations_end_date,disposition_of_executive_nominations_civilian_nominees_total,disposition_of_executive_nominations_civilian_nominees_disposition_confirmed,disposition_of_executive_nominations_civilian_nominees_disposition_unconfirmed,disposition_of_executive_nominations_civilian_nominees_disposition_withdrawn,disposition_of_executive_nominations_civilian_nominees_disposition_rejected,disposition_of_executive_nominations_civilian_nominees_disposition_returned_to_white_house_1,disposition_of_executive_nominations_civilian_nominees_disposition_returned_to_white_house_2,disposition_of_executive_nominations_other_civilian_nominees_total,disposition_of_executive_nominations_other_civilian_nominees_disposition_confirmed,disposition_of_executive_nominations_other_civilian_nominees_disposition_unconfirmed,disposition_of_executive_nominations_air_force_nominees_total,disposition_of_executive_nominations_air_force_nominees_disposition_confirmed,disposition_of_executive_nominations_air_force_nominees_disposition_unconfirmed,disposition_of_executive_nominations_air_force_nominees_disposition_returned_to_white_house,disposition_of_executive_nominations_army_nominees_total,disposition_of_executive_nominations_army_nominees_disposition_confirmed,disposition_of_executive_nominations_army_nominees_disposition_unconfirmed,disposition_of_executive_nominations_army_nominees_disposition_returned_to_white_house,disposition_of_executive_nominations_navy_nominees_total,disposition_of_executive_nominations_navy_nominees_disposition_confirmed,disposition_of_executive_nominations_navy_nominees_disposition_unconfirmed,disposition_of_executive_nominations_marine_corps_nominees_total,disposition_of_executive_nominations_marine_corps_nominees_disposition_confirmed,disposition_of_executive_nominations_marine_corps_nominees_disposition_unconfirmed,disposition_of_executive_nominations_marine_corps_nominees_disposition_returned_to_white_house,disposition_of_executive_nominations_summary_total_nominees_received_this_session,disposition_of_executive_nominations_summary_total_confirmed,disposition_of_executive_nominations_summary_total_unconfirmed,disposition_of_executive_nominations_summary_total_withdrawn,disposition_of_executive_nominations_summary_total_rejected,disposition_of_executive_nominations_summary_total_returned_to_white_house,footnote_resume.legislative_activity.senate.measures_reported_total.total,footnote_resume.legislative_activity.house.measures_reported_total.total,footnote_resume.disposition_of_executive_nominations.civilian_nominees.disposition.returned_to_white_house_1,footnote_resume.disposition_of_executive_nominations.civilian_nominees.disposition.returned_to_white_house_2,legislative_activity_house_measures_reported_total_senate_joint_resolutions,legislative_activity_house_measures_reported_total_senate_concurrent_resolutions,legislative_activity_house_measures_introduced_total_field1,legislative_activity_house_measures_introduced_total_field2,disposition_of_executive_nominations_civilian_nominees_carried_over_from_first_session,disposition_of_executive_nominations_other_civilian_nominees_carried_over_from_first_session,disposition_of_executive_nominations_air_force_nominees_carried_over_from_first_session,disposition_of_executive_nominations_army_nominees_carried_over_from_first_session,disposition_of_executive_nominations_army_nominees_disposition_withdrawn,disposition_of_executive_nominations_navy_nominees_carried_over_from_first_session,disposition_of_executive_nominations_navy_nominees_disposition_withdrawn,disposition_of_executive_nominations_marine_corps_nominees_carried_over_from_first_session,disposition_of_executive_nominations_summary_total_nominees_carried_over_from_first_session,legislative_activity_house_congressional_record_extensions_of_remarks,disposition_of_executive_nominations_civilian_nominees_disposition_failed_at_aug_sept_adjournment,disposition_of_executive_nominations_civilian_nominees_disposition_failed_at_sine_die_adjournment,disposition_of_executive_nominations_other_civilian_nominees_disposition_failed_at_sine_die_adjournment,disposition_of_executive_nominations_army_nominees_disposition_failed_at_sine_die_adjournment,disposition_of_executive_nominations_summary_total_failed_at_aug_sept_adjournment,disposition_of_executive_nominations_summary_total_failed_at_sine_die_adjournment,legislative_activity_senate_congressional_record_extensions_of_remarks,legislative_activity_senate_bills_through_conference,legislative_activity_house_bills_through_conference,disposition_of_executive_nominations_air_force_nominees_disposition_withdrawn,legislative_activity_senate_bills_not_signed,legislative_activity_total_bills_in_conference,legislative_activity_total_bills_not_signed,disposition_of_executive_nominations_civilian_nominees_disposition_failed_at_adjournment,disposition_of_executive_nominations_other_civilian_nominees_disposition_withdrawn,disposition_of_executive_nominations_summary_total_failed_at_ajournment,footnote_resume.legislative_activity.senate.bills_not_signed,disposition_of_executive_nominations_civilian_nominees_disposition_returned_at_sine_die_adjournment,disposition_of_executive_nominations_marine_corps_nominees_disposition_withdrawn,disposition_of_executive_nominations_summary_total_returned_at_sine_die_adjournment,legislative_activity_senate_measures_reported_total_sentate_concurrent_resolutions,disposition_of_executive_nominations_civilian_nominees_disposition_return_at_sine_die_adjournment,disposition_of_executive_nominations_navy_nominees_disposition_return_at_sine_die_adjournment,disposition_of_executive_nominations_civilian_nominees_disposition_returned_to_white_house,disposition_of_executive_nominations_other_civilian_nominees_disposition_returned_to_white_house,disposition_of_executive_nominations_navy_nominees_disposition_returned_to_white_house,legislative_activity_senate_recorded_votes,legislative_activity_senate_measures_reported_total_senate _concurrent_resolutions,legislative_activity_senate_bill_in_conference,legislative_activity_total_public_bill_enacted_into_law,disposition_of_executive_nominations_summary_returned_to_white_house,legislative_activity_house_measures_introduced_total_quorum_calls,legislative_activity_house_measures_introduced_total_yea_and_nay_votes,legislative_activity_house_measures_introduced_total_recorded_votes,legislative_activity_house_measures_introduced_total_bills_vetoed,legislative_activity_house_measures_introduced_total_vetoes_overridden,disposition_of_executive_nominations_other_civilian_nominees_disposition_uncomfirmed,disposition_of_executive_nominations_space_force_nominees_total,disposition_of_executive_nominations_space_force_nominees_disposition_confirmed,legislative_activity_total_public_bills_enacted_into_law_total,disposition_of_executive_nominations_space_force_nominees_disposition_unconfirmed,disposition_of_executive_nominations_space_force_nominees_disposition_withdrawn,disposition_of_executive_nominations_space_force_nominees_carried_over_from_first_session,legislative_activity_senate_reported_measures_not_acted_on,legislative_activity_senate_bills_vetoed_total,legislative_activity_senate_bills_vetoed_regular_vetoes,legislative_activity_senate_bills_vetoed_pocket_vetoes,legislative_activity_house_reported_measures_not_acted_on,legislative_activity_house_bills_vetoed_total,legislative_activity_house_bills_vetoed_regular_vetoes,legislative_activity_house_bills_vetoed_pocket_vetoes,legislative_activity_total_congressional_record_appendix,legislative_activity_total_bill_in_conference,legislative_activity_total_bills_through_conference,legislative_activity_total_measures_passed_total_simple_resolutions,legislative_activity_total_measures_reported_total_simple_resolutions,legislative_activity_total_conference_reports,legislative_activity_total_measures_introduced_total_bills,legislative_activity_total_measures_introduced_total_joint_resolutions,legislative_activity_total_measures_introduced_total_concurrent_resolutions,legislative_activity_total_measures_introduced_total_simple_resolutions,legislative_activity_total_quorum_calls,legislative_activity_total_yea_and_nay_votes,legislative_activity_total_bills_vetoed_total,legislative_activity_total_bills_vetoed_regular_vetoes,legislative_activity_total_bills_vetoed_pocket_vetoes,legislative_activity_total_vetoes_overridden,disposition_of_executive_nominations_postmaster_nominees_total,disposition_of_executive_nominations_postmaster_nominees_disposition_confirmed,disposition_of_executive_nominations_postmaster_nominees_disposition_withdrawn,disposition_of_executive_nominations_postmaster_nominees_disposition_unconfirmed,footnote_resume.legislative_activity.senate.reported_measures_not_acted_on,footnote_resume.legislative_activity.house.reported_measures_not_acted_on,legislative_activity_senate_regular_vetoes,legislative_activity_senate_pocket_vetoes,legislative_activity_house_regular_vetoes,legislative_activity_house_pocket_vetoes,legislative_activity_total_regular_vetoes,legislative_activity_total_pocket_vetoes,legislative_activity_senate_measures_passed_total_bills,legislative_activity_total_measures_passed_total_senate_bills,legislative_activity_total_measures_passed_total_house_bills,legislative_activity_total_measures_passed_total_senate_joint_resolutions,legislative_activity_total_measures_passed_total_house_joint_resolutions,legislative_activity_total_measures_passed_total_senate_concurrent_resolutions,legislative_activity_total_measures_passed_total_house_concurrent_resolutions,legislative_activity_total_measures_reported_total_senate_bills,legislative_activity_total_measures_reported_total_house_bills,legislative_activity_total_measures_reported_total_senate_joint_resolutions,legislative_activity_total_measures_reported_total_house_joint_resolutions,legislative_activity_total_measures_reported_total_senate_concurrent_resolutions,legislative_activity_total_measures_reported_total_house_concurrent_resolutions,disposition_of_executive_nominations_postmaster_nominees_disposition_rejected,footnote_resume.legislative_activity.senate.special_reports,footnote_resume.legislative_activity.house.special_reports,disposition_of_executive_nominations_summary_withdrawn,house_days_in_session,house_time_in_session,house_congressional_record_pages_of_proceedings,house_public_bills_enacted_into_law,house_private_bills_enacted_into_law,house_bills_in_conference,house_bills_through_conference,house_measures_passed_total_total,house_measures_passed_total_senate_bills,house_measures_passed_total_house_bills,house_measures_passed_total_senate_joint_resolutions,house_measures_passed_total_house_joint_resolutions,house_measures_passed_total_senate_concurrent_resolutions,house_measures_passed_total_house_concurrent_resolutions,house_measures_passed_total_simple_resolutions,house_measures_reported_total_total,house_measures_reported_total_senate_bills,house_measures_reported_total_house_bills,house_measures_reported_total_senate_joint_resolutions,house_measures_reported_total_house_joint_resolutions,house_measures_reported_total_senate_concurrent_resolutions,house_measures_reported_total_house_concurrent_resolutions,house_measures_reported_total_simple_resolutions,house_special_reports,house_reported_measures_not_acted_on,house_measures_introduced_total_total,house_measures_introduced_total_bills,house_measures_introduced_total_joint_resolutions,house_measures_introduced_total_concurrent_resolutions,house_measures_introduced_total_simple_resolutions,house_quorum_calls,house_yea_and_nay_votes,house_bills_vetoed,total_congressional_record_pages_of_proceedings,total_congressional_record_appendix,total_public_bills_enacted_into_law,total_private_bills_enacted_into_law,total_measures_passed_total_simple_resolutions,total_measures_reported_total_simple_resolutions,total_measures_introduced_total_total,total_measures_introduced_total_bills,total_measures_introduced_total_joint_resolutions,total_measures_introduced_total_concurrent_resolutions,total_measures_introduced_total_simple_resolutions,total_quorum_calls,total_yea_and_nay_votes,legislative_activity_total_reported_measures_not_acted_on,footnote_resume.legislative_activity.senate.quorum_calls,footnote_resume.legislative_activity.senate.yea_and_nay_votes,footnote_resume.legislative_activity.disposition_of_executive_nominations.army_nominees.disposition.returned_to_white_house,footnote_resume.legislative_activity.disposition_of_executive_nominations.navy_nominees.disposition.returned_to_white_house,footnote_resume.legislative_activity.disposition_of_executive_nominations.civilian_nominees.disposition.returned_to_white_house,footnote_resume.legislative_activity.disposition_of_executive_nominations.summary.total_returned_to_white_house,footnote_resume.disposition_of_executive_nominations.civilian_nominees.disposition.unconfirmed,footnote_resume.disposition_of_executive_nominations.summary.total_nominees_received_this_session,legislative_activity_congressional_record_pages_of_proceedings,legislative_activity_congressional_record_extensions_of_remarks,legislative_activity_public_bills_enacted_into_law,legislative_activity_private_bills_enacted_into_law,legislative_activity_bills_in_conference,legislative_activity_bills_through_conference,legislative_activity_house_start_date,legislative_activity_house_end_date,disposition_of_executive_nominations_civilian_nominees_disposition_superseded_by_renominations_confirmations,disposition_of_executive_nominations_civilian_nominees_disposition_failed_of_confirmation_ontime,disposition_of_executive_nominations_summary_total_failed_of_confirmation_ontime,disposition_of_executive_nominations_summary_total_superseded_by_renominations_confirmations,disposition_of_executive_nominations_army_nominees_disposition_failed_at_first_adjournment,disposition_of_executive_nominations_army_nominees_disposition_failed_at_second_adjournment,disposition_of_executive_nominations_air_force_nominees_disposition_failed,disposition_of_executive_nominations_civilian_nominees_disposition_failed_at_first_adjournment,disposition_of_executive_nominations_civilian_nominees_disposition_failed_at_second_adjournment,disposition_of_executive_nominations_summary_total_failed_at_first_adjournment,disposition_of_executive_nominations_summary_total_failed_at_second_adjournment,disposition_of_executive_nominations_civilian_nominees_disposition_recess_appointments,disposition_of_executive_nominations_summary_total_recess_appointments,disposition_of_executive_nominations_civilian_nominees_disposition_failed_of_confirmation_at_aug_sept_adjournment,disposition_of_executive_nominations_civilian_nominees_disposition_failed_of_confirmation_at_sine_die_adjournment,disposition_of_executive_nominations_civilian_nominees_disposition_superceded_by_recess_appointment,disposition_of_executive_nominations_summary_total_failed_of_confirmation_at_aug_sept_adjournment,disposition_of_executive_nominations_summary_total_failed_of_confirmation_at_sine_die_adjournment,disposition_of_executive_nominations_summary_total_superseded_by_recess_appointment
100_1,https://www.senate.gov/reference/resources/pdf...,2023-03-19T08:00:00.000Z,100,1,1988-11-10T05:00:00.000Z,1987-01-06T05:00:00.000Z,1987-12-22T05:00:00.000Z,170,PT1214H52M,18660,96,2,6.0,616,110,135,91,48,25,31,176,398,183,59,72,6,11,5,62,28,1.0,76.0,2685,1998,239,95,353,36,420,1.0,2.0,169,PT909H57M,13078,144,5,15.0,644,44,272,63,66,8,53,138,426,9,299,9,7,102,56,16.0,48.0,4857,3840,437,235,345,23,234,254.0,2.0,2,31738,5016.0,240,7,1260.0,824.0,7542,3.0,1987-01-06T05:00:00.000Z,1988-10-22T05:00:00.000Z,470,331,112,10,1.0,7.0,9.0,4248.0,4238.0,10.0,18667.0,15711.0,2955.0,1.0,14448,12086,2361,1.0,12101,12055,46,1995,1983,10,2.0,51929,46404,5494,10,1.0,20.0,These figures on measures reported include all...,These figures on measures reported include all...,Nominations returned to the President without ...,Nominations returned to the President without ...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
80_1,https://www.senate.gov/reference/resources/pdf...,2023-03-19T08:00:00.000Z,80,1,,1947-01-03T05:00:00.000Z,1947-12-19T05:00:00.000Z,143,PT807H36M,6150,120,37,,841,278,138,47,30,12,19,87,973,357,401,53,31,16,20,95,21,,,2312,1924,170,37,181,332,138,,,144,PT686H2M,5379,275,94,,1029,138,568,29,33,10,21,230,1100,102,753,48,12,169,75,,,5658,4831,284,129,414,69,84,,,1,11889,,395,131,,,7970,,1947-01-03T05:00:00.000Z,1947-12-19T05:00:00.000Z,1506,1480,20,6,,,,,,,,,,,25863,25568,285,,11144,11115,29,759,757,2,,40672,39855,570,132,,,These figures on measures reported include all...,These figures on measures reported include all...,,,14.0,2.0,,,,,,,10.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,93.0,8.0,6.0,2.0,116.0,24.0,7.0,17.0,5302.0,4.0,62.0,317.0,264.0,59.0,6755.0,454.0,166.0,595.0,401.0,222.0,32.0,13.0,19.0,1.0,1285.0,935.0,116.0,234.0,This figure does not agree with the total diff...,This figure does not agree with the total diff...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [13]:
df['legislative_activity_senate_time_in_session'][0]

'PT1214H52M'

In [9]:
def print_empty_cells():
    empty_cells = df.isnull().stack()
    empty_index = empty_cells[empty_cells].index.tolist()

    for idx in empty_index:
        row, col = idx
        print(f"{row};;;{col}")

In [10]:
def filter_df(input_df):
    df_filtered = input_df[[col for col in input_df if not col.startswith("footnote")]]
    df_filtered = df_filtered[[col for col in df_filtered if not col.startswith("url")]]
    return df_filtered

def split_df(input_df):
    activity = input_df[[col for col in input_df if col.startswith("legislative_activity") or col == "congress" or col == "session" or col == "report_date"]]
    disposition = input_df[[col for col in input_df if col.startswith("disposition") or col == "congress" or col == "session" or col == "report_date"]]
    return activity, disposition
    
def save_to_pickle(input_df):
    df = filter_df(input_df)
    activity, disposition = split_df(df)
    activity.to_pickle('activity.pkl')
    disposition.to_pickle('disposition.pkl')

In [18]:
final_df = pd.DataFrame()

def convert_to_datetime(col):
    final_df[col] = pd.to_datetime(df[col])
    
def convert_to_int(col):
    final_df[col] = df[col].replace(np.nan, 0).astype(int)

def convert_to_duration(col):
    final_df[col] = df[col]
    final_df[col] = final_df[col].apply(lambda x: isodate.parse_duration(x))

def set_col_types():
    convert_to_int('congress')
    convert_to_int('session')
    convert_to_datetime('report_date')
    convert_to_datetime('legislative_activity_start_date')
    convert_to_datetime('legislative_activity_end_date')
    convert_to_int('legislative_activity_senate_days_in_session')
    convert_to_duration('legislative_activity_senate_time_in_session')
    convert_to_int('legislative_activity_senate_congressional_record_pages_of_proceedings')
    convert_to_int('legislative_activity_senate_public_bills_enacted_into_law')
    convert_to_int('legislative_activity_senate_private_bills_enacted_into_law')
    convert_to_int('legislative_activity_senate_bills_in_conference')
    convert_to_int('legislative_activity_senate_measures_passed_total_total')
    convert_to_int('legislative_activity_senate_measures_passed_total_senate_bills')
    convert_to_int('legislative_activity_senate_measures_passed_total_house_bills')
    convert_to_int('legislative_activity_senate_measures_passed_total_senate_joint_resolutions')
    convert_to_int('legislative_activity_senate_measures_passed_total_house_joint_resolutions')

set_col_types()
final_df.to_pickle('df.pkl')
final_df.head(5)

Unnamed: 0,congress,session,report_date,legislative_activity_start_date,legislative_activity_end_date,legislative_activity_senate_days_in_session,legislative_activity_senate_time_in_session,legislative_activity_senate_congressional_record_pages_of_proceedings,legislative_activity_senate_public_bills_enacted_into_law,legislative_activity_senate_private_bills_enacted_into_law,legislative_activity_senate_bills_in_conference,legislative_activity_senate_measures_passed_total_total,legislative_activity_senate_measures_passed_total_senate_bills,legislative_activity_senate_measures_passed_total_house_bills,legislative_activity_senate_measures_passed_total_senate_joint_resolutions,legislative_activity_senate_measures_passed_total_house_joint_resolutions
100_1,100,1,1988-11-10 05:00:00+00:00,1987-01-06 05:00:00+00:00,1987-12-22 05:00:00+00:00,170,50 days 14:52:00,18660,96,2,6,616,110,135,91,48
100_2,100,2,1988-11-10 05:00:00+00:00,1988-01-25 05:00:00+00:00,1988-10-22 05:00:00+00:00,137,46 days 22:52:00,17397,193,10,3,814,188,273,106,51
101_1,101,1,1991-01-03 05:00:00+00:00,1989-01-03 05:00:00+00:00,1989-11-22 05:00:00+00:00,136,41 days 19:19:00,16944,90,0,30,605,121,128,106,50
101_2,101,2,1991-01-03 05:00:00+00:00,1990-01-23 05:00:00+00:00,1990-10-28 05:00:00+00:00,138,52 days 02:14:00,0,179,8,50,716,212,218,98,47
102_1,102,1,1993-01-05 05:00:00+00:00,1991-01-03 05:00:00+00:00,1992-01-03 05:00:00+00:00,158,50 days 00:44:00,18853,81,1,5,626,169,127,79,57
