## Setup

In [80]:
#imports
import os, sys, json, datetime, re, xlrd  # Provides OS-dependent functionality, system-specific parameters, JSON handling, and date/time manipulation
import pandas as pd             # Provides data structures and data analysis tools
from openpyxl import Workbook
import numpy as np              # Supports large, multi-dimensional arrays and matrices
import requests
import glob
import time

from tqdm import tqdm
from functools import reduce
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
pd.options.mode.chained_assignment = None  # default='warn'
from IPython.display import display_markdown
from cprl_functions.state_capture import thi_states,state_ref, state_coding, state_coding_r, state_pat, state_abv_pat
from cprl_functions.defined_functions import get_recent_file


# Data Setup

Have the following been updated?
- influence score file
- activities score file
- leg_lookup file (key_creation.py)
    - will need to update all_legs_file too
        - created in influence_score.py but powers the leg_lookup file

In [81]:
#Get lookup Data
key_path = r'C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\THII\legislator data\key_creation\2025'
legislators_df = get_recent_file("leg_lookup_*.csv", key_path)
# print(legislators_df)
# legislators_df = pd.read_csv(r'C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\legislator data\connectors\leg_lookup_df.csv')

#grab scores data
compiling_files = r'C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\THII\THII_build files\2025\bridges\compiling_calcs\2025'

activity_file = get_recent_file("activity*.csv", compiling_files)
influence_file = get_recent_file("*infl*.csv", compiling_files)
print(influence_file)

#make dfs
legislators_df = pd.read_csv(legislators_df)
activities_df = pd.read_csv(activity_file)
influence_df = pd.read_csv(influence_file)

# print(activities_df.columns)
legislators_df

C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\THII\THII_build files\2025\bridges\compiling_calcs\2025\leg_infl_df2025_03_31.csv


Unnamed: 0,full_pk,primary_key,district_code,state abbreviation,chamber,title,first name,last name,party,district,date assumed office,name,tenure,leader,state_code,chamber_code
0,10006300.0,100063,63.0,AL,House,Alabama Representative,Cynthia,Almond,Republican,63.0,2021,AL Rep. Cynthia Almond (R-AL-063),4,,10.0,0.0
1,10006600.0,100066,66.0,AL,House,Alabama Representative,Alan,Baker,Republican,66.0,2006,AL Rep. Alan Baker (R-AL-066),19,,10.0,0.0
2,10004900.0,100049,49.0,AL,House,Alabama Representative,Russell,Bedsole,Republican,49.0,2020,AL Rep. Russell Bedsole (R-AL-049),5,,10.0,0.0
3,10008000.0,100080,80.0,AL,House,Alabama Representative,Chris,Blackshear,Republican,80.0,2016,AL Rep. Chris Blackshear (R-AL-080),9,,10.0,0.0
4,10006100.0,100061,61.0,AL,House,Alabama Representative,Ronald,Bolton,Republican,61.0,2022,"AL Rep. Ronald ""Ron"" Bolton (R-AL-061)",3,,10.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1980,57101501.0,571015,15.0,WV,Senate,West Virginia Senator,Darren,Thorne,Republican,15.0,2025,WV Sen. Darren Thorne (R-WV-015),0,,57.0,1.0
1981,57100102.0,571001,1.0,WV,Senate,West Virginia Senator,Ryan,Weld,Republican,1.0,2016,WV Sen. Ryan Weld (R-WV-001),9,,57.0,1.0
1982,57101502.0,571015,15.0,WV,Senate,West Virginia Senator,Thomas,Willis,Republican,15.0,2024,"WV Sen. Thomas ""Tom"" Willis (R-WV-015)",1,,57.0,1.0
1983,57100502.0,571005,5.0,WV,Senate,West Virginia Senator,Michael,Woelfel,Democrat,5.0,2024,"WV Sen. Michael ""Mike"" Woelfel (D-WV-005)",1,,57.0,1.0


### Clean up

In [82]:
# #get rid of nan pks
# #rewrite na pks to be numpy nan
# influence_df.loc[influence_df['full_pk'].str.contains('nan'), 'full_pk'] = np.nan

# #drop em
# influence_df = influence_df.dropna(subset=['full_pk']).reset_index(drop = True)


In [83]:
#clean influence nan's
for i,j in enumerate(influence_df['full_pk']):
    if re.search('nan', str(j)):
        print(j)
        influence_df.loc[i,'full_pk'] = np.nan


In [84]:
#forces all pks to int64
all_dfs = [legislators_df,activities_df,influence_df]
for i,df in enumerate(all_dfs):
    # print(*all.columns, sep = ',')
    # print(all.head)
    if df['full_pk'].dtypes == "object":
        df['full_pk'] = pd.to_numeric(df['full_pk'], errors='coerce').astype('Int64')  # Use nullable Int64 if NaNs are present
        all_dfs[i] = df


In [85]:
#narrow down scores dfs
# activities_df = activities_df.loc[:,['full_pk', 'activities_score']]
# influence_df = influence_df.loc[:,['full_pk', 'influence_score']]

# Merge Data (compiled_scores creation)

In [86]:
# first_merge

#merging influence and legislators
#merge influence scores
first_merge = pd.merge(legislators_df, influence_df, how="right", on='full_pk', suffixes=('', '_y'))
first_merge.drop(first_merge.filter(regex='_y$').columns, axis=1, inplace=True)


## Print Statements for troubleshooting
# print(first_merge.to_string())
# print(*first_merge.columns, sep=", ")
# print(*activities_df.columns, sep=", ")
first_merge


Unnamed: 0,full_pk,primary_key,district_code,state abbreviation,chamber,title,first name,last name,party,district,date assumed office,name,tenure,leader,state_code,chamber_code,first_name,last_name,influence_score
0,10006300.0,100063,63.0,AL,House,Alabama Representative,Cynthia,Almond,Republican,63.0,2021,AL Rep. Cynthia Almond (R-AL-063),4,,10.0,0.0,Cynthia,Almond,11.0
1,10006600.0,100066,66.0,AL,House,Alabama Representative,Alan,Baker,Republican,66.0,2006,AL Rep. Alan Baker (R-AL-066),19,,10.0,0.0,Alan,Baker,13.0
2,10004900.0,100049,49.0,AL,House,Alabama Representative,Russell,Bedsole,Republican,49.0,2020,AL Rep. Russell Bedsole (R-AL-049),5,,10.0,0.0,Russell,Bedsole,11.0
3,10008000.0,100080,80.0,AL,House,Alabama Representative,Chris,Blackshear,Republican,80.0,2016,AL Rep. Chris Blackshear (R-AL-080),9,,10.0,0.0,Chris,Blackshear,12.0
4,10006100.0,100061,61.0,AL,House,Alabama Representative,Ronald,Bolton,Republican,61.0,2022,"AL Rep. Ronald ""Ron"" Bolton (R-AL-061)",3,,10.0,0.0,Ronald,Bolton,11.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1881,57101501.0,571015,15.0,WV,Senate,West Virginia Senator,Darren,Thorne,Republican,15.0,2025,WV Sen. Darren Thorne (R-WV-015),0,,57.0,1.0,Darren,Thorne,10.0
1882,57100102.0,571001,1.0,WV,Senate,West Virginia Senator,Ryan,Weld,Republican,1.0,2016,WV Sen. Ryan Weld (R-WV-001),9,,57.0,1.0,Ryan,Weld,12.0
1883,57101502.0,571015,15.0,WV,Senate,West Virginia Senator,Thomas,Willis,Republican,15.0,2024,"WV Sen. Thomas ""Tom"" Willis (R-WV-015)",1,,57.0,1.0,Thomas,Willis,2.0
1884,57100502.0,571005,5.0,WV,Senate,West Virginia Senator,Michael,Woelfel,Democrat,5.0,2024,"WV Sen. Michael ""Mike"" Woelfel (D-WV-005)",1,,57.0,1.0,Michael,Woelfel,2.0


In [87]:
#second merge
#merge influence scores
second_merge = pd.merge(first_merge, activities_df, how="left", on='full_pk', suffixes=('', '_y'))
second_merge.drop(second_merge.filter(regex='_y$').columns, axis=1, inplace=True)

second_merge

Unnamed: 0,full_pk,primary_key,district_code,state abbreviation,chamber,title,first name,last name,party,district,...,name,tenure,leader,state_code,chamber_code,first_name,last_name,influence_score,activities_score,events
0,10006300.0,100063,63.0,AL,House,Alabama Representative,Cynthia,Almond,Republican,63.0,...,AL Rep. Cynthia Almond (R-AL-063),4,,10.0,0.0,Cynthia,Almond,11.0,,
1,10006600.0,100066,66.0,AL,House,Alabama Representative,Alan,Baker,Republican,66.0,...,AL Rep. Alan Baker (R-AL-066),19,,10.0,0.0,Alan,Baker,13.0,,
2,10004900.0,100049,49.0,AL,House,Alabama Representative,Russell,Bedsole,Republican,49.0,...,AL Rep. Russell Bedsole (R-AL-049),5,,10.0,0.0,Russell,Bedsole,11.0,,
3,10008000.0,100080,80.0,AL,House,Alabama Representative,Chris,Blackshear,Republican,80.0,...,AL Rep. Chris Blackshear (R-AL-080),9,,10.0,0.0,Chris,Blackshear,12.0,,
4,10006100.0,100061,61.0,AL,House,Alabama Representative,Ronald,Bolton,Republican,61.0,...,"AL Rep. Ronald ""Ron"" Bolton (R-AL-061)",3,,10.0,0.0,Ronald,Bolton,11.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1881,57101501.0,571015,15.0,WV,Senate,West Virginia Senator,Darren,Thorne,Republican,15.0,...,WV Sen. Darren Thorne (R-WV-015),0,,57.0,1.0,Darren,Thorne,10.0,,
1882,57100102.0,571001,1.0,WV,Senate,West Virginia Senator,Ryan,Weld,Republican,1.0,...,WV Sen. Ryan Weld (R-WV-001),9,,57.0,1.0,Ryan,Weld,12.0,,
1883,57101502.0,571015,15.0,WV,Senate,West Virginia Senator,Thomas,Willis,Republican,15.0,...,"WV Sen. Thomas ""Tom"" Willis (R-WV-015)",1,,57.0,1.0,Thomas,Willis,2.0,,
1884,57100502.0,571005,5.0,WV,Senate,West Virginia Senator,Michael,Woelfel,Democrat,5.0,...,"WV Sen. Michael ""Mike"" Woelfel (D-WV-005)",1,,57.0,1.0,Michael,Woelfel,2.0,,


## Clean Merge Data

In [88]:
#get count of events for legislators

# Function to count non-whitespace events
def count_events(event_str):
    if pd.isna(event_str):  # Check if the value is NaN
        return 0
    # Split by "|" and strip whitespace
    events_list = [event.strip() for event in event_str.split('|') if event.strip()]
    
    return len(events_list)

# Add a new column with the count of events
second_merge['event_count'] = second_merge['events'].apply(count_events)
# print(second_merge.columns)
# print(second_merge.to_string())



In [89]:
#copy dataframe
from datetime import date

final_df = second_merge.copy()
print(*final_df.columns, sep=', ')
final_df['full_pk'] = final_df['full_pk'].astype(int).astype(str)

full_pk, primary_key, district_code, state abbreviation, chamber, title, first name, last name, party, district, date assumed office, name, tenure, leader, state_code, chamber_code, first_name, last_name, influence_score, activities_score, events, event_count


In [90]:
#PK Cleanup
for i,j in enumerate(final_df['full_pk']):
    # print(str(j))
    j_pk = re.findall(r'^\d{6}', str(j))[0]
    seat_num = re.findall(r'\d{2}$', str(j))[0]
    # print(f'seat num is {seat_num}')
    # print(f'type is {type(seat_num)}')
    
    if seat_num == '00':
        # print('its a single seat')
        seat_num_v = np.nan
    else:
        seat_num_v = 'Seat ' + seat_num

    
    state_match = re.findall(r'^\d{2}', str(j_pk))
    state = state_coding_r.get(int(state_match[0]))
    chamber = int(re.findall(r'(?<=^\d{2})\d{1}(?=\d{3})', str(j_pk))[0])
    
    if chamber == 0:
        chamber_v = 'House'
    else:
        chamber_v = 'Senate'
    
    district = int(re.findall(r'(?<=^\d{3})\d{3}$', str(j_pk))[0].lstrip('0'))
    district_v = f'District {district}'
    
    
    
    if str(seat_num_v) != 'nan':
        final_df.loc[i,'seat_num'] = seat_num_v
        


    # print('################')
    # print(f'state is {state}')
    # print(f'chamber is {chamber_v}')
    # print(f'district is {district_v}')
    # print(seat_num_v)
    

    final_df.loc[i,'state'] = state
    final_df.loc[i,'chamber'] = chamber_v
    final_df.loc[i,'district'] = district_v

print(*final_df.columns, sep=', ')


final_df.loc[:,['year']] = "2025"
no_tenure_mask = final_df['tenure'] == 1

#clears values for new legislators
final_df.loc[no_tenure_mask, 'activities_score'] = np.nan
final_df.loc[no_tenure_mask, 'events'] = np.nan
final_df.loc[no_tenure_mask, 'event_count'] = np.nan

# final_df['pk']

full_pk, primary_key, district_code, state abbreviation, chamber, title, first name, last name, party, district, date assumed office, name, tenure, leader, state_code, chamber_code, first_name, last_name, influence_score, activities_score, events, event_count, state, seat_num


In [91]:
final_df


Unnamed: 0,full_pk,primary_key,district_code,state abbreviation,chamber,title,first name,last name,party,district,...,chamber_code,first_name,last_name,influence_score,activities_score,events,event_count,state,seat_num,year
0,10006300,100063,63.0,AL,House,Alabama Representative,Cynthia,Almond,Republican,District 63,...,0.0,Cynthia,Almond,11.0,,,0.0,AL,,2025
1,10006600,100066,66.0,AL,House,Alabama Representative,Alan,Baker,Republican,District 66,...,0.0,Alan,Baker,13.0,,,0.0,AL,,2025
2,10004900,100049,49.0,AL,House,Alabama Representative,Russell,Bedsole,Republican,District 49,...,0.0,Russell,Bedsole,11.0,,,0.0,AL,,2025
3,10008000,100080,80.0,AL,House,Alabama Representative,Chris,Blackshear,Republican,District 80,...,0.0,Chris,Blackshear,12.0,,,0.0,AL,,2025
4,10006100,100061,61.0,AL,House,Alabama Representative,Ronald,Bolton,Republican,District 61,...,0.0,Ronald,Bolton,11.0,,,0.0,AL,,2025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1881,57101501,571015,15.0,WV,Senate,West Virginia Senator,Darren,Thorne,Republican,District 15,...,1.0,Darren,Thorne,10.0,,,0.0,WV,Seat 01,2025
1882,57100102,571001,1.0,WV,Senate,West Virginia Senator,Ryan,Weld,Republican,District 1,...,1.0,Ryan,Weld,12.0,,,0.0,WV,Seat 02,2025
1883,57101502,571015,15.0,WV,Senate,West Virginia Senator,Thomas,Willis,Republican,District 15,...,1.0,Thomas,Willis,2.0,,,,WV,Seat 02,2025
1884,57100502,571005,5.0,WV,Senate,West Virginia Senator,Michael,Woelfel,Democrat,District 5,...,1.0,Michael,Woelfel,2.0,,,,WV,Seat 02,2025


## Export

In [92]:
#export
file_name = f'compiled_scores{str(date.today()).replace('-','_')}.xlsx'
csv_file_name = f'compiled_scores{str(date.today()).replace('-','_')}.csv'
final_df.to_excel(fr'C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\THII\THII_build files\2025\Compiled Scores\{file_name}', index=False)
final_df.to_csv(fr'C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\THII\THII_build files\2025\Compiled Scores\{csv_file_name}', index=False)

# final_df

# Bills and Legislator Match

## Compiled plus bills
pulls in bills file and adds on the bills sponsored by legislators and the total count

In [93]:
#file set up

#this is from quorum_bill_pulling
#has bills and pk sponsors but no data to accompany it
leg_bills_info_path = r"C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\THII\THII_build files\2025\Bill Data"
bills = pd.read_excel(get_recent_file("*leg_bills_info*.xlsx",leg_bills_info_path))

bills.head(2)

# for name, data in final_df.items():
#     print(data)

# for name, data in bills.items():
#     print(data)
# print(bills.columns)


Unnamed: 0,primary_key,bill_labels,bill_counts
0,100007(Yarbrough),H.B.88,1
1,100008(Collins),H.B.188,1


In [94]:
#table set ups

# bill files
mls_bills = bills[bills['primary_key'].astype(str).str.startswith(('430', '571'))].reset_index(drop = True)
norm_bills = bills[~bills['primary_key'].astype(str).str.startswith(('430', '571'))].reset_index(drop = True)

#key lookup dicts
full_dict = dict(zip(final_df['full_pk'],final_df['last_name']))
pk_dict = dict(zip(final_df['full_pk'],final_df['last_name']))


In [95]:
#look up seats
mls_bills['full_pk'] = np.nan
for i,j in enumerate(mls_bills['primary_key']):
    print('########')
    print(j)
    pk = j.split('(',1)[0].strip()
    last_name = j.split('(',1)[-1].strip().replace(')','')
    # print(last_name)
    seats = ["01", "02"]
    ceiling = 1
    for si, seat in enumerate(seats):
        # print(si)
        # print(last_name)
        # print('___________')
        full_pk = int(pk+seat)
        # print(f'full_pk: {full_pk}')
        result = full_dict.get(full_pk)
        # print(f'result: {result}')
        if last_name in str(result):
            mls_bills.loc[i,['full_pk']] = int(full_pk)
            print(True)
            break
        else:
            if si == ceiling:
                print(f'none found for {full_pk}')
                all_pks = pk_dict.get(pk)
                print(all_pks)



########
430001(Hatlestad)
none found for 43000102
None
########
430001(Richter)
none found for 43000102
None
########
430001(Sukut)
none found for 43000102
None
########
430002(Anderson)
none found for 43000202
None
########
430002(Longmuir)
none found for 43000202
None
########
430002(Skarphol)
none found for 43000202
None
########
430003(Hoverson)
none found for 43000302
None
########
430003(Maragos)
none found for 43000302
None
########
430003(Streyle)
none found for 43000302
None
########
430003(VanWinkle)
none found for 43000302
None
########
430004(Fegley)
none found for 43000402
None
########
430004(Finley-DeVille)
none found for 43000402
None
########
430004(Froseth)
none found for 43000402
None
########
430004(Jones)
none found for 43000402
None
########
430004(Oliver)
none found for 43000402
None
########
430004(Onstad)
none found for 43000402
None
########
430005(Brabandt)
none found for 43000502
None
########
430005(Fisher)
none found for 43000502
None
########
430005(Lous

In [96]:

#cleaning bills
#dropna bills
mls_bills = mls_bills.dropna(subset='full_pk')

#creates fullpk for ssl (single seat legislatures)
norm_bills['full_pk'] = norm_bills['primary_key'].astype(str).str.split("(").str[0]
norm_bills['full_pk'] = norm_bills['full_pk'] + "00"
# norm_bills.head()

In [97]:

#finding leg
trouble = []

for i,j in enumerate(norm_bills['full_pk']):
    print('###########')
    print(str(j))
    result = full_dict.get(int(j))
    if result == None:
        continue
    print(fr'result: {result}')
    result2 = norm_bills.loc[i,['primary_key']][-1]
    print(fr'result2: {(result2)}')
    last_name = result2.split("(")[-1].replace(")", "").strip()
    if last_name is None:
        trouble.append(str(int(j)))
        continue
    else:
        if last_name in result:
            print(True)
        else:
            trouble_output = norm_bills.loc[i,['primary_key']][0] + str(norm_bills.loc[i,['bill_labels']][0])
            trouble.append(str(trouble_output))

norm_bills['full_pk'] = norm_bills['full_pk'].astype(int)
    
    # print(norm_bills.loc[i,['primary_key']])

###########
10000700
###########
10000800
###########
10001300
###########
10001500
###########
10001700
###########
10001800
###########
10003100
###########
10003800
###########
10003900
###########
10004000
###########
10004400
###########
10004500
###########
10005100
###########
10005200
###########
10005300
###########
10006000
###########
10006300
###########
10006600
###########
10006900
###########
10007500
###########
10007900
###########
10008100
###########
10008200
###########
10008300
###########
10008600
###########
10100300
###########
10100400
###########
10100500
###########
10100700
###########
10100900
###########
10101000
###########
10101100
###########
10101400
###########
10101600
###########
10101700
###########
10101800
###########
10102100
###########
10102200
###########
10102300
###########
10102400
###########
10102500
###########
10102600
###########
10102700
###########
10102800
###########
10102900
###########
10103300
###########
12000100
###########
1

In [98]:
#move full_pk to front
col_to_move = norm_bills.pop('full_pk')
norm_bills.insert(0,'full_pk', col_to_move)

col_to_move = mls_bills.pop('full_pk')
mls_bills.insert(0,'full_pk', col_to_move)

#create all_bills (final bills format for compiled plus bills)
all_bills = pd.concat([norm_bills, mls_bills]).reset_index(drop=True)

In [99]:
#forces all pks to int64
all_dfs = [final_df,all_bills]
for i,df in enumerate(all_dfs):
    # print(*df.columns, sep = ',')
    # print(all.head)
    if df['full_pk'].dtypes == "object":
        df['full_pk'] = pd.to_numeric(df['full_pk'], errors='coerce').astype('Int64')  # Use nullable Int64 if NaNs are present
        all_dfs[i] = df


In [None]:
#bill and leg merger
#final df comes from compiling data of legislators
#this file has unique rows for legislators and the bills they sponsored along with overall count of bills
compiled_plus_bills = pd.merge(final_df, all_bills, how="left", on='full_pk')
compiled_plus_bills =  compiled_plus_bills.drop('primary_key_y', axis=1)
compiled_plus_bills = compiled_plus_bills.rename(columns={'primary_key_x':'primary_key'})
compiled_plus_bills = compiled_plus_bills.reset_index(drop = True)
# compiled_plus_bills = compiled_plus_bills[~(compiled_plus_bills['tenure'] == 1)]

compiled_plus_bills


Unnamed: 0,full_pk,primary_key,district_code,state abbreviation,chamber,title,first name,last name,party,district,...,last_name,influence_score,activities_score,events,event_count,state,seat_num,year,bill_labels,bill_counts
0,10006300,100063,63.0,AL,House,Alabama Representative,Cynthia,Almond,Republican,District 63,...,Almond,11.0,,,0.0,AL,,2025,H.B.175|H.B.163,2.0
1,10006600,100066,66.0,AL,House,Alabama Representative,Alan,Baker,Republican,District 66,...,Baker,13.0,,,0.0,AL,,2025,H.B.439,1.0
2,10004900,100049,49.0,AL,House,Alabama Representative,Russell,Bedsole,Republican,District 49,...,Bedsole,11.0,,,0.0,AL,,2025,,
3,10008000,100080,80.0,AL,House,Alabama Representative,Chris,Blackshear,Republican,District 80,...,Blackshear,12.0,,,0.0,AL,,2025,,
4,10006100,100061,61.0,AL,House,Alabama Representative,Ronald,Bolton,Republican,District 61,...,Bolton,11.0,,,0.0,AL,,2025,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1962,57101501,571015,15.0,WV,Senate,West Virginia Senator,Darren,Thorne,Republican,District 15,...,Thorne,10.0,,,0.0,WV,Seat 01,2025,,
1963,57100102,571001,1.0,WV,Senate,West Virginia Senator,Ryan,Weld,Republican,District 1,...,Weld,12.0,,,0.0,WV,Seat 02,2025,,
1964,57101502,571015,15.0,WV,Senate,West Virginia Senator,Thomas,Willis,Republican,District 15,...,Willis,2.0,,,,WV,Seat 02,2025,,
1965,57100502,571005,5.0,WV,Senate,West Virginia Senator,Michael,Woelfel,Democrat,District 5,...,Woelfel,2.0,,,,WV,Seat 02,2025,,


In [101]:
#export
#this exports a file with all of the main info for legislators and the bills they have sponsored
file_name_cb = f'compiled_plus_billsv2{str(date.today()).replace('-','_')}.xlsx'
csv_file_name_cb = f'compiled_plus_bills{str(date.today()).replace('-','_')}.csv'
compiled_plus_bills.to_excel(fr'C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\THII\THII_build files\2025\Compiled Scores\{file_name_cb}',sheet_name=f'{file_name_cb.replace(".xlsx", "")}', index=False)
compiled_plus_bills.to_csv(fr'C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\THII\THII_build files\2025\Compiled Scores\{csv_file_name_cb}', index=False)


## Bills_and_legisltors


In [46]:
#bills and their legislators with the primary focus on bills``
bills_and_legislators = compiled_plus_bills.assign(
    bills=compiled_plus_bills['bill_labels'].str.split('|')  # Split the string into a list
).explode('bills')  # Create a new row for each list element



# Optionally, clean up the brackets
# bills_and_legislators['Values'] = df_expanded['Values'].str.strip('[]')
# print(compiled_plus_bills.columns)
bills_and_legislators['bill_lookup'] = bills_and_legislators['bills'] + "-[" + bills_and_legislators['state'] + "]"

In [47]:

#export
file_name_bl = f'bills_and_legislators{str(date.today()).replace('-','_')}.xlsx'
csv_file_name_bl = f'bills_and_legislators{str(date.today()).replace('-','_')}.csv'
bills_and_legislators.to_excel(fr'C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\THII\THII_build files\2025\Compiled Scores\{file_name_bl}', sheet_name=f'{file_name_bl.replace(".xlsx", "")}', index=False)
bills_and_legislators.to_csv(fr'C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\THII\THII_build files\2025\Compiled Scores\{csv_file_name_bl}', index=False)
#goes to final ed bills export
