## Setup

In [2]:
#imports
import os, sys, json, datetime, re # Provides OS-dependent functionality, system-specific parameters, JSON handling, and date/time manipulation
from datetime import date
import pandas as pd             # Provides data structures and data analysis tools
import numpy as np              # Supports large, multi-dimensional arrays and matrices
import ast
import requests
import urllib3
import time
import glob
from tqdm import tqdm
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from bs4 import BeautifulSoup
# from pandas.core.common import SettingWithCopyWarning

from cprl_functions.state_capture import thi_states,state_ref, state_coding, state_pat, state_abv_pat
from cprl_functions.defined_functions import create_pk, add_seats, get_recent_file

from IPython.display import display_markdown


## File Gathering

In [3]:
#new file gathering

path = r"C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\THII\bill_data\quorum downloads"
glob_pat = os.path.join(path, r'[!old]*')

bill_files = glob.glob(glob_pat)




In [4]:
# File declaration (deprecated)
thi_bills = r"C:\Users\clutz\Downloads\2024_thi_states_bills.xlsx"
nd_bills = r"C:\Users\clutz\Downloads\nd-bills.xlsx"
non_thi_bills = r"C:\Users\clutz\Downloads\2024_non_thi_states_bills.xlsx"

bill_files = [thi_bills, nd_bills, non_thi_bills]


In [5]:
#format dfs
dfs = []
for f in bill_files:
    df = pd.read_excel(f)
    df.columns = ['full_bill_label','bill_label', 'state','title', 'bill_sum','ai_sum','sponsors','status','status_date','detailed_status','detailed_status','quorum_link']
    dfs.append(df)


all_bills = pd.concat(dfs)


## Filtering Bills

In [6]:
#education bills filter

#getting only education bills with regex
ed_bills = all_bills.loc[
    (all_bills['title'].notna() & 
     (all_bills['title'].str.contains(
         r'[Ii]ccb-?|[Oo]ce-|[Ee]duc-.?|[Ee]arly [Cc]h(i)?ld|^Sch\s?|[Hg]ighr [Ee]d|[Hh]igher ([Ee]d(ucation)?)?\s?([Ll]earning)?|.*[Ee]ducation.*|.*[Cc]hild.*|.*[Ss]chool.*|.*[Cc]harter.*|.*[Mm]ath.*|.*[Rr]ead.*|.*[Tt]each.*|.*[Pp]arent.*|.*[Kk]id.*|.*[Ss]tudent.*|.*[Cc]ollege.*|.*[Uu]niversit.*|.*[Tt]uition.*', 
         regex=True, na=False
     ))
    ) 
    | 
    all_bills['bill_sum'].str.contains(
        r'[Ee]arly\s?-?[Cc]hildhood|[Ee]ducational\s?[Rr]equirements|[Pp]ost-?\s?[Ss]econdary\s?[Ee]ducation|[Cc]hild\s?[Ee]mploy', 
        regex=True, na=True
    )
]

ed_bills = ed_bills[~((ed_bills.title.str.contains(r'[Cc]ongrat[Ss]?(ulations)?|[Mm]ourn|[Mm]emorial|[Cc]ommending|[Hh]onoring', regex = True, na = True)) |
                    (ed_bills.title.str.contains(r'[Cc]ongrat[Ss]?(ulations)?|[Mm]ourn|[Mm]emorial|[Cc]ommending|[Hh]onoring', regex = True, na = True))) ]

ed_bills = ed_bills.reset_index(drop=True)
# ed_bills = ed_bills[~ed_bills.title.str.contains(r'[Cc]ongrat[Ss]?(ulations)?|[Mm]ourn|[Mm]emorial|[Cc]ommending|[Hh]onoring', regex = True, na = True)]

# ed_bills

  (all_bills['title'].str.contains(
  ed_bills = ed_bills[~((ed_bills.title.str.contains(r'[Cc]ongrat[Ss]?(ulations)?|[Mm]ourn|[Mm]emorial|[Cc]ommending|[Hh]onoring', regex = True, na = True)) |
  (ed_bills.title.str.contains(r'[Cc]ongrat[Ss]?(ulations)?|[Mm]ourn|[Mm]emorial|[Cc]ommending|[Hh]onoring', regex = True, na = True))) ]


## Final Bills exporting (creates ed_bills)

In [7]:

# splitting and getting sponsors
ed_bills['pk_sponsors'] = np.nan
for i,j in enumerate(ed_bills['sponsors']):

    # print(j)
    print('####')
    is_nan = pd.isna(ed_bills.loc[i,'sponsors'])  # True
    if is_nan == True:
        continue
    try:
        sponsor_split = j.split(',')
    except:
        print('************')
        print(f'sponsors: {j}')
        print(f'title: {str(ed_bills.loc[i,'title'])}')
        print('************')
    if len(sponsor_split) == 1:
        sponsor_split = [j]
    
        
    pks = []
    
    for s in enumerate(sponsor_split):
        s = s[-1]
        print(str(s).split('('))
        last_name = str(s).split('(')[0].strip().split(' ')[-1]
        chamber_match = re.findall(r'[Ss]en\.|[Rr]ep\.|[Ss]peaker|[Dd]el\.', str(s))
        if len(chamber_match) == 0:
            print(f'here is what has no chamber: {s}')
            continue
        # if not chamber_match[0]:
        #     print(chamber_match)
        #     print(s)
        #     continue

        if 'rep' in str(chamber_match[0]).lower() or 'speaker' in str(chamber_match[0]).lower() or 'del' in str(chamber_match[0]).lower():
            chamber_code = '0'
        elif 'sen' in str(chamber_match[0]).lower():
            chamber_code = '1'
        get_dis_and_state = re.findall(r'(?!\()[A-Z]{1}-[A-Z]{2}-\d{1,3}(?<!\))', str(s))

        if len(get_dis_and_state) == 0:
            print('regex failed')
            print(s)
            continue
        else:
            broken_up_val = str(get_dis_and_state[0]).split('-')
            state_code = state_coding.get(broken_up_val[1])
        # print(type(state_code))
            district_code = broken_up_val[2]
            desired_length = 3

            if len(district_code) < desired_length:
                district_code = district_code.zfill(desired_length)

        pk = str(state_code)+chamber_code+district_code
        pk = f'{pk}({last_name})'
        print(pk)

        pks.append(pk)



    pks_for_bill = '|'.join(pks)
    ed_bills.loc[i,'pk_sponsors'] = pks_for_bill
            
# print(ed_bills.head())
# ed_bills


####
['OH Rep. D.J. Swearingen ', 'R-OH-089)']
440089(Swearingen)
[' OH Former Rep. Sara Carruthers ', 'R-OH-051)']
440051(Carruthers)
####
['OH Rep. Darnell Brewer ', 'D-OH-022)']
440022(Brewer)
[' OH Rep. Latyna Humphrey ', 'D-OH-002)']
440002(Humphrey)
####
['OH Rep. Monica Robb Blasdel ', 'R-OH-079)']
440079(Blasdel)
[' OH Rep. Gary Click ', 'R-OH-088)']
440088(Click)
[' OH Rep. Rodney Creech ', 'R-OH-040)']
440040(Creech)
[' OH Rep. Bob Peterson ', 'R-OH-091)']
440091(Peterson)
####
['OH Rep. Sarah Fowler Arthur ', 'R-OH-099)']
440099(Arthur)
[' OH Rep. Jennifer Gross ', 'R-OH-045)']
440045(Gross)
[' OH Rep. James "Jim" Hoops ', 'R-OH-081)']
440081(Hoops)
####
['OH Sen. Andrew "Andy" Brenner ', 'R-OH-019)']
441019(Brenner)
[' OH Sen. Jerry Cirino ', 'R-OH-018)']
441018(Cirino)
####
['OH Former Sen. Stephanie Kunze ', 'R-OH-016)']
441016(Kunze)
####
['IL Speaker Emanuel "Chris" Welch ', 'D-IL-007)']
220007(Welch)
[' IL Rep. William "Will" Davis ', 'D-IL-030)']
220030(Davis)
[' IL S

## Tagging

In [8]:
pat = r'[Dd]iversity'


for a,b in zip(ed_bills.title, ed_bills.bill_sum):
    # print(a)
    if re.search(pat, str(a)):
        print('###### title #######')
        print(a)
        print('/n')
    if re.search(pat, str(b)):
        print('##### summary ########')
        print(b)
        print('/n')

##### summary ########
Affirms the importance and targeted value of the Minority Teachers of Illinois scholarship in increasing diversity in the teacher workforce. Celebrates the support that the Minority Teachers of Illinois scholarship has offered to aspiring teachers to date and the scholarship's role in growing the diversity of teacher preparation in Illinois. Urges the Illinois Student Assistance Commission to continue to administer the Minority Teachers of Illinois scholarship in order to support aspiring teachers of color to enter the teaching profession. Urges the State of Illinois and its entities to continue to support efforts grounded in research and data that increase the diversity of the educator workforce in order to improve outcomes for all students in this State.
/n
##### summary ########
Affirms the importance and targeted value of the Minority Teachers of Illinois scholarship in increasing diversity in the teacher workforce. Celebrates the support that the Minority Te

In [9]:
#tagging bills
ed_bills['HE_tag'] = False
ed_bills['EC_tag'] = False
ed_bills['ED_tag'] = False
i = 0
for a,b in zip(ed_bills.title, ed_bills.bill_sum):
    if re.search(r'[Ii]ccb|[Ee]duc-.?|[Hg]ighr [Ee]d|[Hh]igher ([Ee]d(ucation)?)?\s?([Ll]earning)?|.*[Cc]ollege.*|.*[Uu]niversit.*|.*[Tt]uition.*', str(a)):
        ed_bills.loc[i,'HE_tag'] = True
    
    if re.search('[Pp]ost-?\s?[Ss]econdary\s?[Ee]ducation', str(b)):
        ed_bills.loc[i,'HE_tag'] = True


    if re.search('[Ee]arly [Cc]h(i)?ld', str(a)):
        ed_bills.loc[i,'EC_tag'] = True

    if re.search(r'[Ee]arly\s?-?[Cc]hildhood', str(b)):
        ed_bills.loc[i,'EC_tag'] = True

    i += 1


# ec_bills = ed_bills[ed_bills['EC_tag']==True]
# he_bills = ed_bills[ed_bills['HE_tag']==True]

# test_dupes = ed_bills[ed_bills['bill_label'].duplicated(keep=False)]
# test_dupes


ed_bills[ed_bills['bill_label'].duplicated(keep="last")]
ed_bills['lookup'] = ed_bills['bill_label'] + "-[" + ed_bills['state'] + "]"

ed_bills.columns

  if re.search('[Pp]ost-?\s?[Ss]econdary\s?[Ee]ducation', str(b)):


Index(['full_bill_label', 'bill_label', 'state', 'title', 'bill_sum', 'ai_sum',
       'sponsors', 'status', 'status_date', 'detailed_status',
       'detailed_status', 'quorum_link', 'pk_sponsors', 'HE_tag', 'EC_tag',
       'ED_tag', 'lookup'],
      dtype='object')

# leg_info file
Takes ed bills and compiles all of the sponsors together (in pk form)
goes into compiling act_and_infl_scores.ipynb

In [10]:
#Leg_bills_info creation
##creates list of bills and counts of bills per legislator
sponsor_bill_lookup = []
for i,j in enumerate(ed_bills['pk_sponsors']):
    is_nan = pd.isna(ed_bills.loc[i,'pk_sponsors'])  # True

    if is_nan == True:
        continue 
    sponsors_ls = []
    bill_nums = []
    sponsors_split = str(j).split('|')
    sponsors_split = [x for x in sponsors_split if len(x) > 0]
    n = len(sponsors_split)
    sponsors_ls.extend(sponsors_split)
    bill_nums.extend([ed_bills.loc[i,'bill_label']]*n)
    append_df = pd.DataFrame({'primary_key':sponsors_ls, 'bill_labels':bill_nums})
    sponsor_bill_lookup.append(append_df)

leg_bill_lookup = pd.concat(sponsor_bill_lookup)
leg_bill_lookup = leg_bill_lookup.reset_index(drop=True)



#bills per primary key
leg_bill_lookup_grouped = leg_bill_lookup.groupby(['primary_key']).agg({
    'bill_labels': ['|'.join, 'count']
}).reset_index()
leg_bill_lookup_grouped.columns = ['primary_key', 'bill_labels', 'bill_counts']



#print statements for assistance
# print(leg_bill_lookup_grouped.columns)
# leg_bill_lookup_grouped


In [18]:
leg_bill_lookup_grouped

Unnamed: 0,primary_key,bill_labels,bill_counts
0,100007(Yarbrough),H.B.88,1
1,100008(Collins),H.B.188,1
2,100013(Woods),H.B.331|H.B.330|H.B.168|H.B.169,4
3,100015(Hulsey),H.B.173,1
4,100017(Estes),H.B.83|H.B.57,2
...,...,...,...
3576,591001(Driskill),S.F.0061,1
3577,591011(Hicks),S.F.0124,1
3578,591012(Kolb),S.F.0086,1
3579,591015(Schuler),S.F.0046,1


In [11]:
#export
# leg_filename = r'C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\Legislators Data\leg_data_update_10_2024\build files\bills\leg_bills_info.xlsx'
# leg_bill_lookup_grouped.to_excel(leg_filename, index=False)


leg_filename = f'leg_bills_info_{str(date.today()).replace('-','_')}.xlsx'
leg_bill_lookup_grouped.to_excel(fr'C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\THII\THII_build files\2025\Bill Data\{leg_filename}',sheet_name=f'{leg_filename.replace(".xlsx", "")}', index=False)




# Tableau Ed Bills
Pulls together and unions data to get bills and legislators and other info associated with them
Used in making:
- Legislative Bill Tracking

In [12]:
#pull in bills and legislators
key_path = r"C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\THII\THII_build files\2025\Compiled Scores"


compiled_file = get_recent_file("bills_and_legislators*.xlsx", key_path)
print(compiled_file)
compiled_df = pd.read_excel(compiled_file)
print(*compiled_df.columns, sep = " | ")

leg_bills = compiled_df['bills'].to_list()
leg_bills = list(set(leg_bills))
print(compiled_df.head(2).to_string())

C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\THII\THII_build files\2025\Compiled Scores\bills_and_legislators2025_03_20.xlsx
full_pk | primary_key | district_code | state abbreviation | chamber | title | first name | last name | party | district | date assumed office | name | tenure | leader | state_code | chamber_code | first_name | last_name | influence_score | activities_score | events | event_count | state | seat_num | year | bill_labels | bill_counts | bills | bill_lookup
    full_pk  primary_key  district_code state abbreviation chamber                   title first name last name       party     district  date assumed office                               name  tenure leader  state_code  chamber_code first_name last_name  influence_score  activities_score events  event_count state seat_num  year      bill_labels  bill_counts    bills   bill_lookup
0  10006300       100063             

In [13]:
#moving columns around
# Move the 'C' column to the first position
column_to_move = ed_bills.pop('bill_label')
ed_bills.insert(0, 'bill_label', column_to_move)

column2_to_move = ed_bills.pop('lookup')
ed_bills.insert(0, 'lookup', column2_to_move)
ed_bills

cpf_column_to_move = compiled_df.pop('bill_lookup')
compiled_df.insert(0,'bill_lookup', cpf_column_to_move)

In [19]:
#troubleshooting
print(*ed_bills.columns, sep = "|")
print(*compiled_df.columns, sep = "|")
ed_bills

lookup|bill_label|full_bill_label|state|title|bill_sum|ai_sum|sponsors|status|status_date|detailed_status|detailed_status|quorum_link|pk_sponsors|HE_tag|EC_tag|ED_tag
bill_lookup|full_pk|primary_key|district_code|state abbreviation|chamber|title|first name|last name|party|district|date assumed office|name|tenure|leader|state_code|chamber_code|first_name|last_name|influence_score|activities_score|events|event_count|state|seat_num|year|bill_labels|bill_counts|bills


Unnamed: 0,lookup,bill_label,full_bill_label,state,title,bill_sum,ai_sum,sponsors,status,status_date,detailed_status,detailed_status.1,quorum_link,pk_sponsors,HE_tag,EC_tag,ED_tag
0,H.B.8-[OH],H.B.8,H.B.8: Enact the Parents' Bill of Rights,OH,Enact the Parents' Bill of Rights,"To amend sections 3313.6022, 3314.03, and 3326...",The legislative text introduces several key pr...,"OH Rep. D.J. Swearingen (R-OH-089), OH Former ...",Enacted,1/8/2025,Became Public Law,4/9/2025,https://quorum.us/bill/3217061,440089(Swearingen)|440051(Carruthers),False,False,False
1,H.B.29-[OH],H.B.29,H.B.29: Regards driver's license suspensions-f...,OH,Regards driver's license suspensions-failure t...,"To amend sections 1901.44, 1905.202, 1907.25, ...",The legislation introduces significant amendme...,"OH Rep. Darnell Brewer (D-OH-022), OH Rep. Lat...",Enacted,1/8/2025,Became Public Law,4/9/2025,https://quorum.us/bill/3217890,440022(Brewer)|440002(Humphrey),False,False,False
2,H.B.206-[OH],H.B.206,H.B.206: Regards public school expulsion for a...,OH,Regards public school expulsion for actions da...,"To amend sections 3313.66, 3313.661, 3313.7117...",The legislation introduces significant amendme...,"OH Rep. Monica Robb Blasdel (R-OH-079), OH Rep...",Enacted,1/8/2025,Became Public Law,4/9/2025,https://quorum.us/bill/3280458,440079(Blasdel)|440088(Click)|440040(Creech)|4...,False,False,False
3,H.B.70-[OH],H.B.70,H.B.70: Require schools adopt a policy regardi...,OH,Require schools adopt a policy regarding over-...,To amend section 3313.713 of the Revised Code ...,The legislative text amends section 3313.713 o...,"OH Rep. Sarah Fowler Arthur (R-OH-099), OH Rep...",Enacted,12/19/2024,Became Public Law,3/20/2025,https://quorum.us/bill/3230311,440099(Arthur)|440045(Gross)|440081(Hoops),False,False,False
4,S.B.104-[OH],S.B.104,S.B.104: Regards the College Credit Plus Program,OH,Regards the College Credit Plus Program,"To amend sections 3302.03, 3314.03, 3326.11, 3...",The legislation amends the Ohio Revised Code t...,"OH Sen. Andrew ""Andy"" Brenner (R-OH-019), OH S...",Enacted,11/27/2024,Became Public Law,2/25/2025,https://quorum.us/bill/3261235,441019(Brenner)|441018(Cirino),True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12188,H.F.9-[MN],H.F.9,"H.F.9: Individual income provisions modified, ...",MN,"Individual income provisions modified, depende...","Individual income provisions modified, depende...",The Minnesota House of Representatives introdu...,MN Rep. Carlie Kotyza-Witthuhn (D-MN-049B),Introduced or Prefiled,1/4/2023,Referred to House committee,1/4/2023,https://quorum.us/bill/3129676,320049(Kotyza-Witthuhn),False,False,False
12189,H.F.18-[MN],H.F.18,H.F.18: School district special education serv...,MN,School district special education services ful...,School district special education services ful...,"On January 4, 2023, a bill (H.F. No. 18) was i...",MN Rep. Dan Wolgamott (D-MN-014B),Introduced or Prefiled,1/4/2023,Referred to House committee,1/4/2023,https://quorum.us/bill/3129701,320014(Wolgamott),False,False,False
12190,H.F.8-[MN],H.F.8,"H.F.8: Student personnel aid established, and ...",MN,"Student personnel aid established, and money a...","Student personnel aid established, and money a...",The Minnesota House of Representatives introdu...,MN Rep. Kaela Berg (D-MN-055B),Introduced or Prefiled,1/4/2023,Referred to House committee,1/4/2023,https://quorum.us/bill/3129710,320055(Berg),False,False,False
12191,H.F.54-[MN],H.F.54,H.F.54: Trust established for foster children ...,MN,Trust established for foster children receivin...,Trust established for foster children receivin...,The Minnesota House of Representatives introdu...,MN Rep. Duane Quam (R-MN-024A),Introduced or Prefiled,1/4/2023,Referred to House committee,1/4/2023,https://quorum.us/bill/3129732,320024(Quam),False,False,False


In [14]:
#ed bills is the finalized list of education bills and compiled_df comes in from bills and legislators are the exploded list of legislators with emphasis on bills with repeated legislator values
full_df = pd.merge(ed_bills, compiled_df, how='left', left_on='lookup', right_on='bill_lookup')
# full_df_v2 = pd.merge(ed_bills, compiled_df, how='left', left_on='bill_label', right_on='bills')

#this is just for testing
# print(full_df_v1.head(2).to_string())
# print(len(full_df_v1))
# print(full_df_v2.head(2).to_string())
# print(len(full_df_v2))

no_chamber = full_df[full_df['chamber'].isnull() == True]

no_chamber

Unnamed: 0,lookup,bill_label,full_bill_label,state_x,title_x,bill_sum,ai_sum,sponsors,status,status_date,...,influence_score,activities_score,events,event_count,state_y,seat_num,year,bill_labels,bill_counts,bills
11,S.B.163-[OH],S.B.163,S.B.163: Create Dublin City Schools license plate,OH,Create Dublin City Schools license plate,"To amend sections 4501.21, 4503.53, 4503.583, ...",The legislation amends and enacts various sect...,OH Former Sen. Stephanie Kunze (R-OH-016),Enacted,1/8/2025,...,,,,,,,,,,
123,H.1999-[MA],H.1999,H.1999: An Act Relative To Student Mental Health,MA,An Act Relative To Student Mental Health,By Representatives Lewis of Framingham and Hig...,The proposed legislation mandates that all pub...,MA Rep. Jack Lewis (D-MA-Massachusetts House 7...,Passed Second Chamber,12/26/2024,...,,,,,,,,,,
133,S.B.125-[OH],S.B.125,S.B.125: Create St. Vincent-St. Mary High Scho...,OH,Create St. Vincent-St. Mary High School licens...,To amend section 4501.21 and to enact section ...,The legislation amends section 4501.21 and ena...,Commissioner Vernon Sykes (OH),Passed Original Chamber,4/18/2023,...,,,,,,,,,,
140,H.D.5498-[MA],H.D.5498,H.D.5498: An Act Mitigating Mbta Communities F...,MA,An Act Mitigating Mbta Communities Funding Imp...,By Representatives Robertson of Tewksbury and ...,The legislative text introduces an amendment t...,,Introduced or Prefiled,11/27/2024,...,,,,,,,,,,
253,H.B. 1374-[VA],H.B. 1374,H.B. 1374: In-state tuition; eligibility for c...,VA,In-state tuition; eligibility for certain memb...,In-state tuition eligibility; certain members\...,The legislative text amends § 23.1-506 of the ...,VA Del. Jason Ballard (R-VA-042),Introduced or Prefiled,1/15/2024,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16178,H.F.9-[MN],H.F.9,"H.F.9: Individual income provisions modified, ...",MN,"Individual income provisions modified, depende...","Individual income provisions modified, depende...",The Minnesota House of Representatives introdu...,MN Rep. Carlie Kotyza-Witthuhn (D-MN-049B),Introduced or Prefiled,1/4/2023,...,,,,,,,,,,
16179,H.F.18-[MN],H.F.18,H.F.18: School district special education serv...,MN,School district special education services ful...,School district special education services ful...,"On January 4, 2023, a bill (H.F. No. 18) was i...",MN Rep. Dan Wolgamott (D-MN-014B),Introduced or Prefiled,1/4/2023,...,,,,,,,,,,
16180,H.F.8-[MN],H.F.8,"H.F.8: Student personnel aid established, and ...",MN,"Student personnel aid established, and money a...","Student personnel aid established, and money a...",The Minnesota House of Representatives introdu...,MN Rep. Kaela Berg (D-MN-055B),Introduced or Prefiled,1/4/2023,...,,,,,,,,,,
16181,H.F.54-[MN],H.F.54,H.F.54: Trust established for foster children ...,MN,Trust established for foster children receivin...,Trust established for foster children receivin...,The Minnesota House of Representatives introdu...,MN Rep. Duane Quam (R-MN-024A),Introduced or Prefiled,1/4/2023,...,,,,,,,,,,


1


In [15]:
starts_w_a = []
for i,(j,k) in enumerate(zip(full_df['lookup'], full_df['chamber'])):
    
    if not isinstance(k, str):
        # print(type(k))
        
        # print("None here")

        # print(j)
        first_3 = str(j)[:3]
        if re.search(r'H|S', first_3):
            if re.search(r'^H', first_3):
                chamber_fix = "House"
            elif re.search(r'^S', first_3):
                chamber_fix = "Senate"
        
        elif re.search(r'A', first_3):
            state_res = str(full_df.loc[i,'state_x'])
            starts_w_a.append(state_res)
            print(first_3)
            print(k)
            print(full_df.loc[i,['sponsors']])
            print(full_df.loc[i,['quorum_link']])
        full_df.loc[i,['chamber']] = chamber_fix      
        # title_exs = full_df.loc[i,['sponsors']]
        # for t in title_exs:
        #     if 'assemb' in str(t).lower().strip():
        #         chamber_patch = "House"
        #         break
        # try:
        #     chamber_patch
        #     # print(f'it works!: {chamber_patch}')
            
        # except:
        #     print(first_3)
        #     print(k)
        #     print(full_df.loc[i,['sponsors']])
        

A.1
nan
sponsors    NJ Assemb. Pamela "Pam" Lampitt (D-NJ-006), NJ...
Name: 7682, dtype: object
quorum_link    https://quorum.us/bill/3322732
Name: 7682, dtype: object
A.4
nan
sponsors    NJ Assemb. Linda Carter (D-NJ-022), NJ Assemb....
Name: 7684, dtype: object
quorum_link    https://quorum.us/bill/3395781
Name: 7684, dtype: object
A.4
nan
sponsors    NJ Assemb. Cody Miller (D-NJ-004), NJ Assemb. ...
Name: 7685, dtype: object
quorum_link    https://quorum.us/bill/3395788
Name: 7685, dtype: object
A.4
nan
sponsors    NJ Assemb. Lisa Swain (D-NJ-038), NJ Assemb. H...
Name: 7686, dtype: object
quorum_link    https://quorum.us/bill/3427297
Name: 7686, dtype: object
A.4
nan
sponsors    NJ Assemb. Linda Carter (D-NJ-022), NJ Assemb....
Name: 7687, dtype: object
quorum_link    https://quorum.us/bill/3428830
Name: 7687, dtype: object
A.5
nan
sponsors    NJ Assemb. Carmen Morales (D-NJ-034), NJ Assem...
Name: 7689, dtype: object
quorum_link    https://quorum.us/bill/3435403
Name: 7689, dtype:

In [109]:
full_df.columns

Index(['lookup', 'bill_label', 'full_bill_label', 'state_x', 'title_x',
       'bill_sum', 'ai_sum', 'sponsors', 'status', 'status_date',
       'detailed_status', 'detailed_status', 'quorum_link', 'pk_sponsors',
       'HE_tag', 'EC_tag', 'ED_tag', 'bill_lookup', 'full_pk', 'primary_key',
       'district_code', 'state abbreviation', 'chamber', 'title_y',
       'first name', 'last name', 'party', 'district', 'date assumed office',
       'name', 'tenure', 'leader', 'state_code', 'chamber_code', 'first_name',
       'last_name', 'influence_score', 'activities_score', 'events',
       'event_count', 'state_y', 'seat_num', 'year', 'bill_labels',
       'bill_counts', 'bills'],
      dtype='object')

In [16]:
#stats for bills

uniq_bill_labels = list(set(full_df['lookup'].to_list()))
print(len(full_df))
print(len(uniq_bill_labels))

res = full_df.groupby(['state_x', 'chamber'])['lookup'].nunique().reset_index()

# nc_rows =full_df[full_df['state_x'] == "NC"].drop_duplicates(subset='lookup').sort_values('lookup').reset_index()
# nc_rows
print(res)

16183
11135
   state_x chamber  lookup
0       AK  Senate       1
1       AL   House      42
2       AL  Senate      37
3       AZ   House      80
4       AZ  Senate      74
..     ...     ...     ...
88      WI  Senate     125
89      WV   House     191
90      WV  Senate      86
91      WY   House      15
92      WY  Senate       7

[93 rows x 3 columns]


In [17]:
file_name_full = f'full_join{str(date.today()).replace('-','_')}.xlsx'
file_name_full_csv = f'full_join{str(date.today()).replace('-','_')}.csv'
full_df.to_excel(fr'C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\THII\THII_build files\2025\Bill Data\{file_name_full}',sheet_name=f'ed_bills', index=False)
full_df.to_csv(fr'C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\THII\THII_build files\2025\Bill Data\{file_name_full_csv}', index=False)


