In [1]:
import pandas as pd
import nameparser
import json

In [2]:
trans = pd.read_csv('../data/transactions_20230924.csv')
trans['disclosure_date'] = pd.to_datetime(trans['disclosure_date'])
trans['transaction_date'] = pd.to_datetime(trans['transaction_date'])
trans['parsed_name'] = trans['member'].apply(lambda x: nameparser.HumanName(x))
trans['string_name'] = trans['parsed_name'].astype('string')
trans_117 = trans[(trans['transaction_date'] < '2023-01-03') & (trans['transaction_date']  >= '2021-01-03')]
trans_116 = trans[(trans['transaction_date'] < '2021-01-03')]
trans_118 = trans[(trans['transaction_date'] > '2023-01-03')]
trans_names = trans['string_name'].unique()


In [15]:
trans['member_type'].value_counts()

member_type
representative    12300
Name: count, dtype: int64

In [3]:
with open('committees-current.json', 'r') as file:
    committees = json.load(file)
data_list = []
for committee in committees:
    committee_name = committee['name']
    committee_code = committee['thomas_id']
    if 'subcommittees' in committee:
        for subcommittee in committee['subcommittees']:
            subcommittee_name = subcommittee['name']
            subcomittee_code = subcommittee['thomas_id']
            data_list.append({'Committee': committee_name,
                              'Comittee_code': committee_code,
                              
                              'Subcommittee': subcommittee_name,
                              'Subcommittee_code': committee_code + subcomittee_code})
    else:
        data_list.append({'Committee': committee_name,
                          'Comittee_code': committee_code,
                          
                          'Subcommittee': None,
                          'Subcommittee_code': None})

committees_df = pd.DataFrame(data_list)

committees_df

Unnamed: 0,Committee,Comittee_code,Subcommittee,Subcommittee_code
0,House Committee on Agriculture,HSAG,Forestry,HSAG15
1,House Committee on Agriculture,HSAG,"Commodity Markets, Digital Assets, and Rural D...",HSAG22
2,House Committee on Agriculture,HSAG,"General Farm Commodities, Risk Management, and...",HSAG16
3,House Committee on Agriculture,HSAG,"Livestock, Dairy, and Poultry",HSAG29
4,House Committee on Agriculture,HSAG,"Conservation, Research, and Biotechnology",HSAG14
...,...,...,...,...
193,Senate Committee on Small Business and Entrepr...,SSSB,,
194,Senate Committee on Veterans' Affairs,SSVA,,
195,House Select Subcommittee on the Coronavirus P...,HSVC,,
196,House Select Subcommittee on the Weaponization...,HSFD,,


In [4]:
with open('committee-membership-current.json', 'r') as file:
    committee_membership = json.load(file)
    
comittee_data_list = []

for comittee in committee_membership.keys():
    comittee_code = comittee
    for member in committee_membership[comittee]:
        member_name = member['name']
        comittee_data_list.append({'Comittee_code': comittee_code,
                                   'Member_name': member_name
                                   })
    

In [5]:
membership = pd.DataFrame(comittee_data_list)


In [16]:
membership

Unnamed: 0,Comittee_code,Member_name
0,HSII,Bruce Westerman
1,HSII,Raúl M. Grijalva
2,HSII,Doug Lamborn
3,HSII,Grace F. Napolitano
4,HSII,Robert J. Wittman
...,...,...
3861,HSBA10,Juan Vargas
3862,HSBA10,Zachary Nunn
3863,HSBA10,Josh Gottheimer
3864,HSBA10,Monica De La Cruz


In [6]:

merged_df = pd.merge(membership, committees_df, on="Comittee_code")
merged_df['parsed_name'] = merged_df['Member_name'].apply(lambda x: nameparser.HumanName(x))
merged_df['string_name'] = merged_df['parsed_name'].astype('string')

member_comittees = merged_df[['string_name', 'Committee']]
member_subcomitties = merged_df[['string_name', 'Subcommittee']]
member_comittee_dummies = pd.get_dummies(member_comittees, columns=['Committee']).groupby('string_name').sum()
member_subcomittee_dummies = pd.get_dummies(member_subcomitties, columns=['Subcommittee']).groupby('string_name').sum().reset_index()

member_names = member_subcomittee_dummies['string_name'].unique()
member_names

<StringArray>
[     'A. Drew Ferguson IV',               'Aaron Bean',
 'Abigail Davis Spanberger',           'Adam B. Schiff',
               'Adam Smith',             'Adrian Smith',
        'Adriano Espaillat',                 'Al Green',
             'Alex Padilla',      'Alexander X. Mooney',
 ...
          'Warren Davidson',              'Wesley Hunt',
             'Wiley Nickel',       'William R. Keating',
    'William R. Timmons IV',           'Yadira Caraveo',
                'Young Kim',         'Yvette D. Clarke',
             'Zachary Nunn',              'Zoe Lofgren']
Length: 530, dtype: string

In [7]:
def compare_lists(listA, listB):
    # Convert lists to sets for efficient operations
    setA, setB = set(listA), set(listB)

    common = setA & setB
    unique_to_A = setA - setB
    unique_to_B = setB - setA
    all_unique = unique_to_A | unique_to_B

    return {
        "common_elements": len(list(common)),
        "unique_to_trans count": len(list(unique_to_A)),
        "uniques to trans": list(unique_to_A),
        "unique_to_listB": len(list(unique_to_B)),
        "all_unique_elements": len(list(all_unique))
    }

result = compare_lists(trans_names, member_names)
print(result)

{'common_elements': 83, 'unique_to_trans count': 80, 'uniques to trans': ['Lloyd K. Smucker', 'Bob Gibbs', 'Michael Patrick Guest', 'Ed Perlmutter', 'Frank Pallone', 'Trey Hollingsworth', 'Mo Brooks', 'Scott Franklin', 'Roger W. Marshall', 'Blake Moore', 'Greg Gianforte', 'John A. Yarmuth', 'Deborah Ross', 'Stephanie Bice', 'David P. Roe', 'David Cheston Rouzer', 'K. Michael Conaway', 'Abigail Spanberger', 'Alan S. Lowenthal', 'James M. Costa', 'Susan W. Brooks', 'Kathy Manning', 'Richard W. Allen', 'Thomas Suozzi', 'David B. McKinley', 'Ashley Hinson Arenholz', 'Kenny Marchant', 'Sean Patrick Maloney', 'Jim Hagedorn', 'Wm. Lacy Clay', 'James French Hill', 'Harley E. Rouda', 'Susan A. Davis', 'Patrick Fallon', 'Tom Malinowski', 'Justin Amash', 'Jonathan Jackson', 'Gilbert Cisneros', 'Peter J. Visclosky', 'Katherine M. Clark', 'Bill Pascrell', 'Donna Shalala', 'David Madison Cawthorn', 'Andrew Garbarino', 'David E. Price', 'Harold Dallas Rogers', 'David Trone', 'Tom Rice', 'Anthony E. G

In [8]:
with open('../../llm/commitees/committee_assignments.json', 'r') as file:
    data = json.load(file)

import ast

def destring_dict_values(d):
    return {k: ast.literal_eval(v) if isinstance(v, str) else v for k, v in d.items()}


converted_dict = destring_dict_values(data)

dfs = []

for senator in converted_dict['116']:
    # Create a DataFrame for each senator
    df = pd.DataFrame([senator])

    # Explode committees and subcommittees columns
    df = df.explode('committees').explode('subcomittees').reset_index(drop=True)

    # Pivot the DataFrame to have committees and subcommittees as columns
    df_committees = df.pivot(columns='committees', values='committees').fillna(0)
    df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)

    df_subcomittees = df.pivot(columns='subcomittees', values='subcomittees').fillna(0)
    df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)

    # Merge the pivoted DataFrames
    df = pd.concat([df[['full_name', 'state']], df_committees, df_subcomittees], axis=1).groupby(['full_name', 'state']).sum().reset_index()

    dfs.append(df)

# Combine all the senator DataFrames
result_df_118 = pd.concat(dfs, axis=0).reset_index(drop=True)
result_df_118['parsed_name'] = result_df_118['full_name'].apply(lambda x: nameparser.HumanName(x))
result_df_118['string_name'] = result_df_118['parsed_name'].astype('string')


llm_names = result_df_118['string_name'].unique()

  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x

  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x

  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x

  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x

  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)
  df_committees = df_committees.applymap(lambda x: 1 if x != 0 else 0)
  df_subcomittees = df_subcomittees.applymap(lambda x: 1 if x != 0 else 0)


In [9]:




result = compare_lists(trans_names, llm_names)
print(result)

{'common_elements': 0, 'unique_to_trans count': 163, 'uniques to trans': ['Lloyd K. Smucker', 'Joseph D. Morelle', 'Bob Gibbs', 'Judy Chu', 'Austin Scott', 'Teresa Leger Fernandez', 'Lois Frankel', 'Michael Patrick Guest', 'John Garamendi', 'Nicole Malliotakis', 'Ed Perlmutter', 'Frank Pallone', 'Maria Elvira Salazar', 'Kim Schrier', 'Thomas Massie', 'Trey Hollingsworth', 'Mo Brooks', 'Scott Franklin', 'Blake Moore', 'W. Gregory Steube', 'Greg Gianforte', 'Roger W. Marshall', 'Ken Buck', 'Doug Lamborn', 'John A. Yarmuth', 'Garret Graves', 'Deborah Ross', 'Stephanie Bice', 'Michael C. Burgess', 'David P. Roe', 'David Cheston Rouzer', 'Victoria Spartz', 'Earl Blumenauer', 'Seth Moulton', 'Pete Sessions', 'K. Michael Conaway', 'Abigail Spanberger', 'Alan S. Lowenthal', 'James M. Costa', 'Robert B. Aderholt', 'Susan W. Brooks', 'Kathy Manning', 'Richard W. Allen', 'Thomas Suozzi', 'Zoe Lofgren', 'David B. McKinley', 'David Kustoff', 'Tim Burchett', 'Steve Cohen', 'Ashley Hinson Arenholz', 