## Setup


In [172]:
#imports
import os, sys, json  # Provides OS-dependent functionality, system-specific parameters, JSON handling
import pandas as pd             # Provides data structures and data analysis tools
import numpy as np              # Supports large, multi-dimensional arrays and matrices
import requests
import time
import xlsxwriter
from tqdm import tqdm
from datetime import date #date/time manipulation

import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
pd.options.mode.chained_assignment = None  # default='warn'
from IPython.display import display_markdown
# %%

#### Definitions

In [173]:
#Intial legislator data cleaning
def make_initial_df(df, state, chamber):
    result = df[(df['State Abbreviation'] == state) & (df['Chamber'] == chamber)]
    return result

In [None]:
#get legislator details such as district, tenure, party, etc
def calc_leg_details(state_chamber) :
    track_cols = ['State Abbreviation','Chamber','Title', 'First Name',
        'Last Name', 'Party', 'District', 'Date Assumed Office', 'Committee List', 'Name'
        ]
    state_chamber = state_chamber[track_cols]
    print(state_chamber.columns)
    outgoings = state_chamber[state_chamber['Name'].str.contains('Outgoing', regex = True)]
    state_chamber = state_chamber[~state_chamber['Name'].str.contains('Outgoing', regex = True)]
    
    print(f'outgoings: {str(len(outgoings))}')
    # Calculate tenure
    current_year = 2025
    # print(type(current_year))
    state_chamber['tenure'] = current_year - state_chamber['Date Assumed Office']
    state_chamber['leader'] = np.nan
    try:
        state_chamber['leader'] = state_chamber['Committee List'].apply(lambda x: str(x).split('|', 1)[0] if len(str(x).split('|', 1)) == 2 else np.nan)
    except:
        print("something wrong with leader column creation")
    
    state_chamber = state_chamber.drop("Name", axis = 1)
    if len(outgoings) > 0:
        outgoings = outgoings.drop("Name", axis = 1)

    return state_chamber, outgoings

In [175]:
# get committees definition
def get_comms(df, state, chamber):
    result = df[(df['state'] == state) & (df['branch'] == chamber)]
    comms_list = result['committee'].to_list()
    
    return comms_list

In [176]:
#create final state leg file
def create_state_leg_files(df, state_list):
    current_date = str(date.today())
    current_date = current_date.replace('-', '_')
    for state in state_list:
        house = make_initial_df(df, str(state), "House")
        house_df, old_house = calc_leg_details(house)
        senate = make_initial_df(df, state, "Senate")
        senate_df, old_senate = calc_leg_details(senate)
        if senate_df is None:
            print(f'something wrong with {state} senate')
            print(senate.to_string())
            break
        if house_df is None:
            print(f'something wrong with {state} house')
            print(house.to_string())
            break
        
        if len(senate_df) == 0:
            print('zero length')
            print(state)
            return None
        elif len(house_df) == 0 :
            print(state)
            print('zero length')
            return None
        new_path = fr'C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\legislator data\2025\{state}'
        if not os.path.exists(new_path):
            os.makedirs(new_path)
        os.chdir(new_path)
        
        # Create a Pandas Excel writer using XlsxWriter as the engine.
        file_name = f'{state}_legislators_info_{current_date}.xlsx'
        # writer = pd.ExcelWriter(file_name, engine='xlsxwriter')
        # Write each dataframe to a different worksheet.
        house_sheet = f'{state}_house'
        senate_sheet = f'{state}_senate'
        with pd.ExcelWriter(file_name, engine='xlsxwriter') as writer:
            house_df.to_excel(writer, sheet_name=house_sheet, index=False)
            senate_df.to_excel(writer, sheet_name=senate_sheet, index=False)
        print(f'done writing for {state}')
        
        if not old_house.empty or not old_senate.empty:
            print(f'{state} has one')
            outgoing = f'{state}_legislators_{current_date}_old.xlsx'
            house_sheet_old = f'{state}_house (old)'
            senate_sheet_old = f'{state}_senate (old)'
            with pd.ExcelWriter(outgoing, engine='xlsxwriter') as old_writer:
            
                # Write each dataframe to a different worksheet.
       
                old_house.to_excel(old_writer, sheet_name=house_sheet_old, index=False)
                old_senate.to_excel(old_writer, sheet_name=senate_sheet_old, index=False)
        else:
            continue

        # Close the Pandas Excel writer and output the Excel file.
        writer.close()
        old_writer.close()


### Data Gathering

In [177]:
#states to pull info for
states = ['ND', 'NM', 'OH', 'OK', 'VA', 'WV', 'AL', 'CT', 'IL', 'IN', 'KS', 'MO', 'NC']

In [178]:
#get officials from quorum sheets download; currently coming from data_tracking_v2 sheet on quorum
officials_file = r"C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\data imports\quorum\legislator data\2025\leg_quorum_data_12_9_2024.xlsx"
officials = pd.read_excel(officials_file)

In [179]:
#get list of committees for each state legislative chamber
comms_file = r"C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\legislator data\committees_data.xlsx"
comms = pd.read_excel(comms_file)

In [180]:
# Final Call
create_state_leg_files(officials, states)
# %% one off creations
# create_state_leg_files(officials, ['WV'])

Index(['State Abbreviation', 'Chamber', 'Title', 'First Name', 'Last Name',
       'Party', 'District', 'Date Assumed Office', 'Committee List', 'Name'],
      dtype='object')
outgoings: 0
Index(['State Abbreviation', 'Chamber', 'Title', 'First Name', 'Last Name',
       'Party', 'District', 'Date Assumed Office', 'Committee List', 'Name'],
      dtype='object')
outgoings: 0
done writing for ND
Index(['State Abbreviation', 'Chamber', 'Title', 'First Name', 'Last Name',
       'Party', 'District', 'Date Assumed Office', 'Committee List', 'Name'],
      dtype='object')
outgoings: 0
Index(['State Abbreviation', 'Chamber', 'Title', 'First Name', 'Last Name',
       'Party', 'District', 'Date Assumed Office', 'Committee List', 'Name'],
      dtype='object')
outgoings: 0
done writing for NM
Index(['State Abbreviation', 'Chamber', 'Title', 'First Name', 'Last Name',
       'Party', 'District', 'Date Assumed Office', 'Committee List', 'Name'],
      dtype='object')
outgoings: 35
Index(['State 

  warn("Calling close() on already closed file.")


done writing for OK
Index(['State Abbreviation', 'Chamber', 'Title', 'First Name', 'Last Name',
       'Party', 'District', 'Date Assumed Office', 'Committee List', 'Name'],
      dtype='object')
outgoings: 0
Index(['State Abbreviation', 'Chamber', 'Title', 'First Name', 'Last Name',
       'Party', 'District', 'Date Assumed Office', 'Committee List', 'Name'],
      dtype='object')
outgoings: 0
done writing for VA
Index(['State Abbreviation', 'Chamber', 'Title', 'First Name', 'Last Name',
       'Party', 'District', 'Date Assumed Office', 'Committee List', 'Name'],
      dtype='object')
outgoings: 0
Index(['State Abbreviation', 'Chamber', 'Title', 'First Name', 'Last Name',
       'Party', 'District', 'Date Assumed Office', 'Committee List', 'Name'],
      dtype='object')
outgoings: 0
done writing for WV
Index(['State Abbreviation', 'Chamber', 'Title', 'First Name', 'Last Name',
       'Party', 'District', 'Date Assumed Office', 'Committee List', 'Name'],
      dtype='object')
outgoing

  warn("Calling close() on already closed file.")
  warn("Calling close() on already closed file.")


Index(['State Abbreviation', 'Chamber', 'Title', 'First Name', 'Last Name',
       'Party', 'District', 'Date Assumed Office', 'Committee List', 'Name'],
      dtype='object')
outgoings: 7
Index(['State Abbreviation', 'Chamber', 'Title', 'First Name', 'Last Name',
       'Party', 'District', 'Date Assumed Office', 'Committee List', 'Name'],
      dtype='object')
outgoings: 1
done writing for IL
IL has one
Index(['State Abbreviation', 'Chamber', 'Title', 'First Name', 'Last Name',
       'Party', 'District', 'Date Assumed Office', 'Committee List', 'Name'],
      dtype='object')
outgoings: 0
Index(['State Abbreviation', 'Chamber', 'Title', 'First Name', 'Last Name',
       'Party', 'District', 'Date Assumed Office', 'Committee List', 'Name'],
      dtype='object')
outgoings: 0
done writing for IN
Index(['State Abbreviation', 'Chamber', 'Title', 'First Name', 'Last Name',
       'Party', 'District', 'Date Assumed Office', 'Committee List', 'Name'],
      dtype='object')
outgoings: 23
Ind

  warn("Calling close() on already closed file.")


Index(['State Abbreviation', 'Chamber', 'Title', 'First Name', 'Last Name',
       'Party', 'District', 'Date Assumed Office', 'Committee List', 'Name'],
      dtype='object')
outgoings: 55
Index(['State Abbreviation', 'Chamber', 'Title', 'First Name', 'Last Name',
       'Party', 'District', 'Date Assumed Office', 'Committee List', 'Name'],
      dtype='object')
outgoings: 6
done writing for MO
MO has one


  warn("Calling close() on already closed file.")


Index(['State Abbreviation', 'Chamber', 'Title', 'First Name', 'Last Name',
       'Party', 'District', 'Date Assumed Office', 'Committee List', 'Name'],
      dtype='object')
outgoings: 16
Index(['State Abbreviation', 'Chamber', 'Title', 'First Name', 'Last Name',
       'Party', 'District', 'Date Assumed Office', 'Committee List', 'Name'],
      dtype='object')
outgoings: 7
done writing for NC
NC has one


  warn("Calling close() on already closed file.")


In [None]:
test_df = make_initial_df(officials, "KS", "Senate")
test_df_filt = test_df[~test_df['Name'].str.contains('Outgoing', regex = True)]
other = test_df[test_df['Name'].str.contains('Outgoing', regex = True)]
test_df_filt
other


Unnamed: 0,data_tracking_v2,Title,Party,Name,State Abbreviation,First Name,Last Name,Quorum ID,Party.1,Chamber,District,Committee List,Caucus List,Date Assumed Office,State Senate Controlling Party,State House Controlling Party,Name.1
6,KS Sen. Larry Alley (R-KS-032),Kansas Senator,Republican,KS Sen. Larry Alley (R-KS-032),KS,Larry,Alley,1114828,Republican,Senate,State Senate District 32,Senate Majority Leader | Confirmation Oversigh...,,2016.0,Republican,Republican,KS Sen. Larry Alley (R-KS-032)
16,KS Sen.-elect Mike Argabright (R-KS-017),Kansas Senator,Republican,KS Sen.-elect Mike Argabright (R-KS-017),KS,Mike,Argabright,3622679,Republican,Senate,State Senate District 17,,,2024.0,Republican,Republican,KS Sen.-elect Mike Argabright (R-KS-017)
42,KS Sen. Rick Billinger (R-KS-040),Kansas Senator,Republican,KS Sen. Rick Billinger (R-KS-040),KS,Rick,Billinger,2664619,Republican,Senate,State Senate District 40,"Ways and Means (Chair), Legislative Budget (Ch...",,2016.0,Republican,Republican,KS Sen. Rick Billinger (R-KS-040)
55,KS Sen. Elaine Bowers (R-KS-036),Kansas Senator,Republican,KS Sen. Elaine Bowers (R-KS-036),KS,Elaine,Bowers,18442,Republican,Senate,State Senate District 36,"Transparency and Ethics (Chair), Local Governm...",,2012.0,Republican,Republican,KS Sen. Elaine Bowers (R-KS-036)
56,KS Sen.-elect Craig Bowser (R-KS-001),Kansas Senator,Republican,KS Sen.-elect Craig Bowser (R-KS-001),KS,Craig,Bowser,1785436,Republican,Senate,State Senate District 1,,,2024.0,Republican,Republican,KS Sen.-elect Craig Bowser (R-KS-001)
103,KS Sen.-elect Joe Claeys (R-KS-027),Kansas Senator,Republican,KS Sen.-elect Joe Claeys (R-KS-027),KS,Joe,Claeys,3622549,Republican,Senate,State Senate District 27,,,2024.0,Republican,Republican,KS Sen.-elect Joe Claeys (R-KS-027)
104,KS Sen. J.R. Claeys (R-KS-024),Kansas Senator,Republican,KS Sen. J.R. Claeys (R-KS-024),KS,J.R.,Claeys,22100,Republican,Senate,State Senate District 24,"Ways and Means (Vice Chair), Assessment and Ta...",,2020.0,Republican,Republican,KS Sen. J.R. Claeys (R-KS-024)
116,KS Sen. Ethan Corson (D-KS-007),Kansas Senator,Democrat,KS Sen. Ethan Corson (D-KS-007),KS,Ethan,Corson,52531,Democrat,Senate,State Senate District 7,"Judiciary (Ranking Minority Member), Transport...",,2020.0,Republican,Republican,KS Sen. Ethan Corson (D-KS-007)
138,KS Sen. Brenda Dietrich (R-KS-020),Kansas Senator,Republican,KS Sen. Brenda Dietrich (R-KS-020),KS,Brenda,Dietrich,905091,Republican,Senate,State Senate District 20,"Commerce (Vice Chair), Special Claims Against ...",,2020.0,Republican,Republican,KS Sen. Brenda Dietrich (R-KS-020)
156,KS Sen. Renee Erickson (R-KS-030),Kansas Senator,Republican,KS Sen. Renee Erickson (R-KS-030),KS,Renee,Erickson,1271137,Republican,Senate,State Senate District 30,Senate Assistant Majority Leader | Commerce (C...,,2020.0,Republican,Republican,KS Sen. Renee Erickson (R-KS-030)


# Individual Calls

### NC 
#### House


In [None]:
nc_house = make_initial_df(officials, "NC", "House")
nc_house_df = calc_leg_details(nc_house)
os.chdir(r'C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\legislator data\nc\cleaned')
nc_house_df.to_csv('nc_house_officials.csv')


#### senate


In [None]:

nc_senate = make_initial_df(officials, "NC", "Senate")
nc_senate_df = calc_leg_details(nc_senate)
os.chdir(r'C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\legislator data\nc\cleaned')
nc_senate_df.to_csv('nc_senate_officials.csv', index=False)

### Illinois

In [None]:
#senate
il_house = make_initial_df(officials, "IL", "House")
il_house_df = calc_leg_details(il_house)

In [None]:
# House
nd_house = make_initial_df(officials, "ND", "House")
nd_house_df = calc_leg_details(nd_house)

In [None]:
#Senate
nd_senate = make_initial_df(officials, "ND", "Senate")
nd_senate_df = calc_leg_details(nd_senate)

In [None]:
os.chdir(r'C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\legislator data\nd')
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('ND_legislators.xlsx', engine='xlsxwriter')
# Write each dataframe to a different worksheet.

### North Dakota
#### House

In [None]:
nd_house = make_initial_df(officials, "ND", "House")
nd_house_df = calc_leg_details(nd_house)

#### Senate

In [None]:
nd_senate = make_initial_df(officials, "ND", "Senate")
nd_senate_df = calc_leg_details(nd_senate)

In [None]:
#writing to excel for ND
nd_house_df.to_excel(writer, sheet_name='nd_house', index=False)
nd_senate_df.to_excel(writer, sheet_name='nd_senate', index=False)
# Close the Pandas Excel writer and output the Excel file.
writer.close()


os.chdir(f'C:\\Users\\clutz\\OneDrive - THE HUNT INSTITUTE\\Documents\\Data\\legislator data\\{state}')
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('ND_legislators.xlsx', engine='xlsxwriter')
# Write each dataframe to a different worksheet.

### New Mexico

In [None]:
#House
nm_house = make_initial_df(officials, "NM", "House")
nm_house_df = calc_leg_details(nm_house)

In [None]:
#Senate
nm_senate = make_initial_df(officials, "NM", "Senate")
nm_senate_df = calc_leg_details(nm_senate)