## Setup

In [22]:
#imports
import os, sys, json, datetime, re # Provides OS-dependent functionality, system-specific parameters, JSON handling, and date/time manipulation
from datetime import date
import pandas as pd             # Provides data structures and data analysis tools
import numpy as np              # Supports large, multi-dimensional arrays and matrices
import ast
import requests
import urllib3
import time
import glob
from tqdm import tqdm
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from bs4 import BeautifulSoup
# from pandas.core.common import SettingWithCopyWarning

from cprl_functions.state_capture import thi_states,state_ref, state_coding, state_pat, state_abv_pat
from cprl_functions.defined_functions import create_pk, add_seats, get_recent_file

from IPython.display import display_markdown


## File Gathering

In [None]:
# File declaration
thi_bills = r"C:\Users\clutz\Downloads\2024_thi_states_bills.xlsx"
nd_bills = r"C:\Users\clutz\Downloads\nd-bills.xlsx"
non_thi_bills = r"C:\Users\clutz\Downloads\2024_non_thi_states_bills.xlsx"

bill_files = [thi_bills, nd_bills, non_thi_bills]

dfs = []
for f in bill_files:
    df = pd.read_excel(f)
    df.columns = ['full_bill_label','bill_label', 'state','title', 'bill_sum','ai_sum','sponsors','status','status_date','detailed_status','detailed_status','quorum_link']
    dfs.append(df)


all_bills = pd.concat(dfs)


<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>


In [8]:
#test: title search
# for i,j in enumerate(all_bills['title']):
#     print('_________________')
#     print(j)

In [9]:
# DEPRECATED

#looking through and testing various titles and descriptions
# ed_bills = all_bills.loc[all_bills['title'].notna() & all_bills['title'].str.contains(r'.*[Ee]ducation.*|.*[Cc]hild.*|.*[Ss]chool.*|.*[Cc]harter.*|.*[Mm]ath.*|.*[Rr]ead.*|.*[Tt]each.*|.*[Pp]arent.*|.*[Kk]id.*|.*[Ss]tudent.*|.*[Cc]ollege.*|.*[Uu]niversit.*|.*[Tt]uition.*', regex=True)]
# print(len(all_bills))
# print(len(ed_bills))


# import textwrap

# # ed_bills = all_bills.loc[(all_bills['title'].notna() & (all_bills['title'].str.contains(r'[Ii]ccb-?[Oo]ce[Ee]duc-.?|[Ee]arly [Cc]h(i)?ld|^Sch\s?|[Hg]ighr [Ee]d|[Hh]igher ([Ee]d(ucation)?)?\s?([Ll]earning)?|.*[Ee]ducation.*|.*[Cc]hild.*|.*[Ss]chool.*|.*[Cc]harter.*|.*[Mm]ath.*|.*[Rr]ead.*|.*[Tt]each.*|.*[Pp]arent.*|.*[Kk]id.*|.*[Ss]tudent.*|.*[Cc]ollege.*|.*[Uu]niversit.*|.*[Tt]uition.*', regex=True, na=False))) | all_bills['bill_sum'].str.contains(r'[Ee]arly\s?-?[Cc]hildhood|[Ee]ducational\s?[Rr]equirements|[Pp]ost-?\s?[Ss]econdary\s?[Ee]ducation|[Cc]hild\s?[Ee]mploy', regex=True, na = True)]


# test_pat = r'diversity'
# count = 0
# for i,b in enumerate(all_bills['bill_sum']):
#     if re.search(test_pat, str(b)):
#     # if re.search(r'[Ee]arly [Cc]h(i)?ld', str(b)) and len(str(b)) < 100:
        
#         print(str(b))
#         print('###################')
#         # wrapped_text = textwrap.fill((all_bills.loc[i,'bill_sum']), width=100)
#         # print(wrapped_text)
#         # print('###################')
#         # print('\n')
#         count += 1

# print(count)





## Filtering Bills

In [6]:
#education bills filter

#getting only education bills with regex
ed_bills = all_bills.loc[
    (all_bills['title'].notna() & 
     (all_bills['title'].str.contains(
         r'[Ii]ccb-?|[Oo]ce-|[Ee]duc-.?|[Ee]arly [Cc]h(i)?ld|^Sch\s?|[Hg]ighr [Ee]d|[Hh]igher ([Ee]d(ucation)?)?\s?([Ll]earning)?|.*[Ee]ducation.*|.*[Cc]hild.*|.*[Ss]chool.*|.*[Cc]harter.*|.*[Mm]ath.*|.*[Rr]ead.*|.*[Tt]each.*|.*[Pp]arent.*|.*[Kk]id.*|.*[Ss]tudent.*|.*[Cc]ollege.*|.*[Uu]niversit.*|.*[Tt]uition.*', 
         regex=True, na=False
     ))
    ) 
    | 
    all_bills['bill_sum'].str.contains(
        r'[Ee]arly\s?-?[Cc]hildhood|[Ee]ducational\s?[Rr]equirements|[Pp]ost-?\s?[Ss]econdary\s?[Ee]ducation|[Cc]hild\s?[Ee]mploy', 
        regex=True, na=True
    )
]

ed_bills = ed_bills[~((ed_bills.title.str.contains(r'[Cc]ongrat[Ss]?(ulations)?|[Mm]ourn|[Mm]emorial|[Cc]ommending|[Hh]onoring', regex = True, na = True)) |
                    (ed_bills.title.str.contains(r'[Cc]ongrat[Ss]?(ulations)?|[Mm]ourn|[Mm]emorial|[Cc]ommending|[Hh]onoring', regex = True, na = True))) ]

ed_bills = ed_bills.reset_index(drop=True)
# ed_bills = ed_bills[~ed_bills.title.str.contains(r'[Cc]ongrat[Ss]?(ulations)?|[Mm]ourn|[Mm]emorial|[Cc]ommending|[Hh]onoring', regex = True, na = True)]

# ed_bills

  (all_bills['title'].str.contains(
  ed_bills = ed_bills[~((ed_bills.title.str.contains(r'[Cc]ongrat[Ss]?(ulations)?|[Mm]ourn|[Mm]emorial|[Cc]ommending|[Hh]onoring', regex = True, na = True)) |
  (ed_bills.title.str.contains(r'[Cc]ongrat[Ss]?(ulations)?|[Mm]ourn|[Mm]emorial|[Cc]ommending|[Hh]onoring', regex = True, na = True))) ]


## Sponsors Cleaning

In [7]:


# splitting and getting sponsors
ed_bills['pk_sponsors'] = np.nan
for i,j in enumerate(ed_bills['sponsors']):

    # print(j)
    is_nan = pd.isna(ed_bills.loc[i,'sponsors'])  # True
    if is_nan == True:
        continue
    try:
        sponsor_split = j.split(',')
    except:
        print('************')
        print(f'sponsors: {j}')
        print(f'title: {str(ed_bills.loc[i,'title'])}')
        print('************')
    if len(sponsor_split) == 1:
        sponsor_split = [j]
    
        
    pks = []
    
    for s in enumerate(sponsor_split):
        chamber_match = re.findall(r'[Ss]en\.|[Rr]ep\.|[Ss]peaker|[Dd]el\.', str(s))
        if len(chamber_match) == 0:
            print(f'here is what has no chamber: {s}')
            continue
        # if not chamber_match[0]:
        #     print(chamber_match)
        #     print(s)
        #     continue

        if 'rep' in str(chamber_match[0]).lower() or 'speaker' in str(chamber_match[0]).lower() or 'del' in str(chamber_match[0]).lower():
            chamber_code = '0'
        elif 'sen' in str(chamber_match[0]).lower():
            chamber_code = '1'
        get_dis_and_state = re.findall(r'(?!\()[A-Z]{1}-[A-Z]{2}-\d{1,3}(?<!\))', str(s))

        if len(get_dis_and_state) == 0:
            print('regex failed')
            print(s)
        else:
            broken_up_val = str(get_dis_and_state[0]).split('-')
            state_code = state_coding.get(broken_up_val[1])
        # print(type(state_code))
            district_code = broken_up_val[2]
            desired_length = 3

            if len(district_code) < desired_length:
                district_code = district_code.zfill(desired_length)

        pk = str(state_code)+chamber_code+district_code
        pks.append(pk)

    pks_for_bill = '|'.join(pks)
    ed_bills.loc[i,'pk_sponsors'] = pks_for_bill
            
print(ed_bills.head())
ed_bills


regex failed
(0, 'MA Rep. Jack Lewis (D-MA-Massachusetts House 7th Middlesex)')
regex failed
(1, ' MA Rep. Natalie Higgins (D-MA-Massachusetts House 4th Worcester)')
here is what has no chamber: (0, 'Commissioner Vernon Sykes (OH)')
regex failed
(0, 'NH Rep. Muriel Hall (D-NH-New Hampshire House Merrimack 09)')
regex failed
(0, 'NH Rep. Joe Alexander (R-NH-New Hampshire House Hillsborough 29)')
regex failed
(0, 'NH Rep. Sallie Fellows (D-NH-New Hampshire House Grafton 08)')
regex failed
(0, 'NH Rep. Mark Pearson (R-NH-New Hampshire House Rockingham 34 (Floterial))')
regex failed
(0, 'NH Former Rep. Corinne Cascadden (D-NH-New Hampshire House Coos 05)')
regex failed
(0, 'NH Rep. Lisa Post (R-NH-New Hampshire House Hillsborough 42)')
regex failed
(0, 'NH Former Rep. Chuck Grassie (D-NH-New Hampshire House Rockingham 12)')
regex failed
(0, 'NH Former Rep. Corinne Cascadden (D-NH-New Hampshire House Coos 05)')
regex failed
(0, 'NH Rep. Rick Ladd (R-NH-New Hampshire House Grafton 05)')
rege

Unnamed: 0,full_bill_label,bill_label,state,title,bill_sum,ai_sum,sponsors,status,status_date,detailed_status,detailed_status.1,quorum_link,pk_sponsors
0,H.B.8: Enact the Parents' Bill of Rights,H.B.8,OH,Enact the Parents' Bill of Rights,"To amend sections 3313.6022, 3314.03, and 3326...",The legislative text introduces several key pr...,"OH Rep. D.J. Swearingen (R-OH-089), OH Former ...",Enacted,1/8/2025,Became Public Law,4/9/2025,https://quorum.us/bill/3217061,440089|440051
1,H.B.29: Regards driver's license suspensions-f...,H.B.29,OH,Regards driver's license suspensions-failure t...,"To amend sections 1901.44, 1905.202, 1907.25, ...",The legislation introduces significant amendme...,"OH Rep. Darnell Brewer (D-OH-022), OH Rep. Lat...",Enacted,1/8/2025,Became Public Law,4/9/2025,https://quorum.us/bill/3217890,440022|440002
2,H.B.206: Regards public school expulsion for a...,H.B.206,OH,Regards public school expulsion for actions da...,"To amend sections 3313.66, 3313.661, 3313.7117...",The legislation introduces significant amendme...,"OH Rep. Monica Robb Blasdel (R-OH-079), OH Rep...",Enacted,1/8/2025,Became Public Law,4/9/2025,https://quorum.us/bill/3280458,440079|440088|440040|440091
3,H.B.70: Require schools adopt a policy regardi...,H.B.70,OH,Require schools adopt a policy regarding over-...,To amend section 3313.713 of the Revised Code ...,The legislative text amends section 3313.713 o...,"OH Rep. Sarah Fowler Arthur (R-OH-099), OH Rep...",Enacted,12/19/2024,Became Public Law,3/20/2025,https://quorum.us/bill/3230311,440099|440045|440081
4,S.B.104: Regards the College Credit Plus Program,S.B.104,OH,Regards the College Credit Plus Program,"To amend sections 3302.03, 3314.03, 3326.11, 3...",The legislation amends the Ohio Revised Code t...,"OH Sen. Andrew ""Andy"" Brenner (R-OH-019), OH S...",Enacted,11/27/2024,Became Public Law,2/25/2025,https://quorum.us/bill/3261235,441019|441018
...,...,...,...,...,...,...,...,...,...,...,...,...,...
12188,"H.F.9: Individual income provisions modified, ...",H.F.9,MN,"Individual income provisions modified, depende...","Individual income provisions modified, depende...",The Minnesota House of Representatives introdu...,MN Rep. Carlie Kotyza-Witthuhn (D-MN-049B),Introduced or Prefiled,1/4/2023,Referred to House committee,1/4/2023,https://quorum.us/bill/3129676,320049
12189,H.F.18: School district special education serv...,H.F.18,MN,School district special education services ful...,School district special education services ful...,"On January 4, 2023, a bill (H.F. No. 18) was i...",MN Rep. Dan Wolgamott (D-MN-014B),Introduced or Prefiled,1/4/2023,Referred to House committee,1/4/2023,https://quorum.us/bill/3129701,320014
12190,"H.F.8: Student personnel aid established, and ...",H.F.8,MN,"Student personnel aid established, and money a...","Student personnel aid established, and money a...",The Minnesota House of Representatives introdu...,MN Rep. Kaela Berg (D-MN-055B),Introduced or Prefiled,1/4/2023,Referred to House committee,1/4/2023,https://quorum.us/bill/3129710,320055
12191,H.F.54: Trust established for foster children ...,H.F.54,MN,Trust established for foster children receivin...,Trust established for foster children receivin...,The Minnesota House of Representatives introdu...,MN Rep. Duane Quam (R-MN-024A),Introduced or Prefiled,1/4/2023,Referred to House committee,1/4/2023,https://quorum.us/bill/3129732,320024


## Tagging

In [8]:
pat = r'[Dd]iversity'


for a,b in zip(ed_bills.title, ed_bills.bill_sum):
    # print(a)
    if re.search(pat, str(a)):
        print('###### title #######')
        print(a)
        print('/n')
    if re.search(pat, str(b)):
        print('##### summary ########')
        print(b)
        print('/n')

##### summary ########
Affirms the importance and targeted value of the Minority Teachers of Illinois scholarship in increasing diversity in the teacher workforce. Celebrates the support that the Minority Teachers of Illinois scholarship has offered to aspiring teachers to date and the scholarship's role in growing the diversity of teacher preparation in Illinois. Urges the Illinois Student Assistance Commission to continue to administer the Minority Teachers of Illinois scholarship in order to support aspiring teachers of color to enter the teaching profession. Urges the State of Illinois and its entities to continue to support efforts grounded in research and data that increase the diversity of the educator workforce in order to improve outcomes for all students in this State.
/n
##### summary ########
Affirms the importance and targeted value of the Minority Teachers of Illinois scholarship in increasing diversity in the teacher workforce. Celebrates the support that the Minority Te

In [9]:
#tagging bills
ed_bills['HE_tag'] = False
ed_bills['EC_tag'] = False
ed_bills['ED_tag'] = False
i = 0
for a,b in zip(ed_bills.title, ed_bills.bill_sum):
    if re.search(r'[Ii]ccb|[Ee]duc-.?|[Hg]ighr [Ee]d|[Hh]igher ([Ee]d(ucation)?)?\s?([Ll]earning)?|.*[Cc]ollege.*|.*[Uu]niversit.*|.*[Tt]uition.*', str(a)):
        ed_bills.loc[i,'HE_tag'] = True
    
    if re.search('[Pp]ost-?\s?[Ss]econdary\s?[Ee]ducation', str(b)):
        ed_bills.loc[i,'HE_tag'] = True


    if re.search('[Ee]arly [Cc]h(i)?ld', str(a)):
        ed_bills.loc[i,'EC_tag'] = True

    if re.search(r'[Ee]arly\s?-?[Cc]hildhood', str(b)):
        ed_bills.loc[i,'EC_tag'] = True

    i += 1


ec_bills = ed_bills[ed_bills['EC_tag']==True]
he_bills = ed_bills[ed_bills['HE_tag']==True]

test_dupes = ed_bills[ed_bills['bill_label'].duplicated(keep=False)]
test_dupes


ed_bills[ed_bills['bill_label'].duplicated(keep="last")]
ed_bills['lookup'] = ed_bills['bill_label'] + "-[" + ed_bills['state'] + "]"

  if re.search('[Pp]ost-?\s?[Ss]econdary\s?[Ee]ducation', str(b)):


# leg_info file
Takes ed bills and compiles all of the sponsors together (in pk form)
goes into compiling act_and_infl_scores.ipynb

In [10]:
#Leg_bills_info creation
##creates list of bills and counts of bills per legislator
sponsor_bill_lookup = []
for i,j in enumerate(ed_bills['pk_sponsors']):
    is_nan = pd.isna(ed_bills.loc[i,'pk_sponsors'])  # True

    if is_nan == True:
        continue 
    sponsors_ls = []
    bill_nums = []
    sponsors_split = str(j).split('|')
    sponsors_split = [x for x in sponsors_split if len(x) > 0]
    n = len(sponsors_split)
    sponsors_ls.extend(sponsors_split)
    bill_nums.extend([ed_bills.loc[i,'bill_label']]*n)
    append_df = pd.DataFrame({'primary_key':sponsors_ls, 'bill_labels':bill_nums})
    sponsor_bill_lookup.append(append_df)

leg_bill_lookup = pd.concat(sponsor_bill_lookup)
leg_bill_lookup = leg_bill_lookup.reset_index(drop=True)
leg_bill_lookup_grouped = leg_bill_lookup.groupby(['primary_key']).agg({
    'bill_labels': ['|'.join, 'count']
}).reset_index()

leg_bill_lookup_grouped.columns = ['primary_key', 'bill_labels', 'bill_counts']




print(leg_bill_lookup_grouped.columns)

leg_bill_lookup_grouped


Index(['primary_key', 'bill_labels', 'bill_counts'], dtype='object')


Unnamed: 0,primary_key,bill_labels,bill_counts
0,100007,H.B.88,1
1,100008,H.B.188,1
2,100013,H.B.331|H.B.330|H.B.168|H.B.169,4
3,100015,H.B.173,1
4,100017,H.B.83|H.B.57,2
...,...,...,...
3269,591011,S.F.0124,1
3270,591012,S.F.0086,1
3271,591015,S.F.0046,1
3272,591030,S.F.0117,1


In [11]:
#export
leg_filename = r'C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\Legislators Data\leg_data_update_10_2024\build files\bills\leg_bills_info.xlsx'
leg_bill_lookup_grouped.to_excel(leg_filename, index=False)

# Tableau Ed Bills

In [16]:
#pull in bills and legislators
key_path = r"C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\Legislators Data\leg_data_update_10_2024\build files\bills and legislators\2025"

compiled_file = get_recent_file("bills_and_legislators*.xlsx", key_path)
print(compiled_file)
compiled_df = pd.read_excel(compiled_file)

leg_bills = compiled_df['bills'].to_list()
leg_bills = list(set(leg_bills))
print(*leg_bills, sep='|')

C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\Legislators Data\leg_data_update_10_2024\build files\bills and legislators\2025\bills_and_legislators2025_01_29.xlsx
H.R. 4267|S.B.1685|H.B. 1346|H.456|H.B.1808|S.B.3553|S.B.749|H.B.1257|H.B.2076|H.B.278|H.B. 41|H.B.4649|H.B.3329|S.B. 6006|H.B.4969|H.B.2516|S.B. 1394|H.B.4597|S.947|H.B. 1950|H.B.4958|H.515|H.B. 1077|H.B.451|H.B.3669|H.3311|S.B.2838|S.B.1520|S.B. 2208|S.1127|H.C.R. 3015|S.B. 314|H.B. 372|H.B.3922|S.B. 501|S.B.1257|H.B.1616|S.B.844|S.B. 2362|H.B.381|S.B.1833|H.1817|S.B.361|H.R.0734|H.B. 1291|H.B. 1989|H.B. 281|S.B. 966|S.B. 867|S.B. 1051|S.B.2755|H.B.1288|H.B.2670|H.B.468|S.B.568|S.B. 1297|H.B.1678|S.B.165|H.B.4094|H.B.516|S.B.2222|H.B.1780|H.B. 366|H.B. 1516|H.B.0299|H.B. 2631|H.B.487|S.B. 2257|H483|H406|H509|H716|S.B.838|S.B.0289|H.B.4618|H.B.1458|S.B.1221|S.B.1881|H.B.1051|S.B.1229|S.B.53|S.B. 19|S.B. 857|S.B.1239|S.B.1619|S.B.0

In [18]:
#moving columns around
# Move the 'C' column to the first position
column_to_move = ed_bills.pop('bill_label')
ed_bills.insert(0, 'bill_label', column_to_move)

column2_to_move = ed_bills.pop('lookup')
ed_bills.insert(0, 'lookup', column2_to_move)
ed_bills

cpf_column_to_move = compiled_df.pop('bill_lookup')
compiled_df.insert(0,'bill_lookup', cpf_column_to_move)

In [19]:
print(*ed_bills.columns, sep = "|")
print(*compiled_df.columns, sep = "|")

lookup|bill_label|full_bill_label|state|title|bill_sum|ai_sum|sponsors|status|status_date|detailed_status|detailed_status|quorum_link|pk_sponsors|HE_tag|EC_tag|ED_tag
bill_lookup|full_pk|primary_key|state_abbreviation|first_name|last_name|party|tenure|influence_score|activities_score|events|event_count|state|chamber|district|seat_num|year|bill_labels|bill_counts|bills


In [20]:
#ed bills is the finalized list of education bills and compiled_df comes in from bills and legislators are the exploded list of legislators with emphasis on bills with repeated legislator values
full_df = pd.merge(ed_bills, compiled_df, how='left', left_on='lookup', right_on='bill_lookup')
# full_df_v2 = pd.merge(ed_bills, compiled_df, how='left', left_on='bill_label', right_on='bills')

#this is just for testing
# print(full_df_v1.head(2).to_string())
# print(len(full_df_v1))
# print(full_df_v2.head(2).to_string())
# print(len(full_df_v2))
full_df

Unnamed: 0,lookup,bill_label,full_bill_label,state_x,title,bill_sum,ai_sum,sponsors,status,status_date,...,events,event_count,state_y,chamber,district,seat_num,year,bill_labels,bill_counts,bills
0,H.B.8-[OH],H.B.8,H.B.8: Enact the Parents' Bill of Rights,OH,Enact the Parents' Bill of Rights,"To amend sections 3313.6022, 3314.03, and 3326...",The legislative text introduces several key pr...,"OH Rep. D.J. Swearingen (R-OH-089), OH Former ...",Enacted,1/8/2025,...,,0.0,OH,House,District 89,,2025.0,H.B.8|H.B.302,2.0,H.B.8
1,H.B.29-[OH],H.B.29,H.B.29: Regards driver's license suspensions-f...,OH,Regards driver's license suspensions-failure t...,"To amend sections 1901.44, 1905.202, 1907.25, ...",The legislation introduces significant amendme...,"OH Rep. Darnell Brewer (D-OH-022), OH Rep. Lat...",Enacted,1/8/2025,...,,0.0,OH,House,District 2,,2025.0,H.B.29|H.B.114|H.B.63,3.0,H.B.29
2,H.B.206-[OH],H.B.206,H.B.206: Regards public school expulsion for a...,OH,Regards public school expulsion for actions da...,"To amend sections 3313.66, 3313.661, 3313.7117...",The legislation introduces significant amendme...,"OH Rep. Monica Robb Blasdel (R-OH-079), OH Rep...",Enacted,1/8/2025,...,,0.0,OH,House,District 40,,2025.0,H.B.206|H.B.135|H.B.14,3.0,H.B.206
3,H.B.206-[OH],H.B.206,H.B.206: Regards public school expulsion for a...,OH,Regards public school expulsion for actions da...,"To amend sections 3313.66, 3313.661, 3313.7117...",The legislation introduces significant amendme...,"OH Rep. Monica Robb Blasdel (R-OH-079), OH Rep...",Enacted,1/8/2025,...,,0.0,OH,House,District 79,,2025.0,H.B.206,1.0,H.B.206
4,H.B.206-[OH],H.B.206,H.B.206: Regards public school expulsion for a...,OH,Regards public school expulsion for actions da...,"To amend sections 3313.66, 3313.661, 3313.7117...",The legislation introduces significant amendme...,"OH Rep. Monica Robb Blasdel (R-OH-079), OH Rep...",Enacted,1/8/2025,...,OH SLR 2023,1.0,OH,House,District 88,,2025.0,H.B.206|H.B.445|H.B.339,3.0,H.B.206
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19912,H.F.9-[MN],H.F.9,"H.F.9: Individual income provisions modified, ...",MN,"Individual income provisions modified, depende...","Individual income provisions modified, depende...",The Minnesota House of Representatives introdu...,MN Rep. Carlie Kotyza-Witthuhn (D-MN-049B),Introduced or Prefiled,1/4/2023,...,,,,,,,,,,
19913,H.F.18-[MN],H.F.18,H.F.18: School district special education serv...,MN,School district special education services ful...,School district special education services ful...,"On January 4, 2023, a bill (H.F. No. 18) was i...",MN Rep. Dan Wolgamott (D-MN-014B),Introduced or Prefiled,1/4/2023,...,,,,,,,,,,
19914,H.F.8-[MN],H.F.8,"H.F.8: Student personnel aid established, and ...",MN,"Student personnel aid established, and money a...","Student personnel aid established, and money a...",The Minnesota House of Representatives introdu...,MN Rep. Kaela Berg (D-MN-055B),Introduced or Prefiled,1/4/2023,...,,,,,,,,,,
19915,H.F.54-[MN],H.F.54,H.F.54: Trust established for foster children ...,MN,Trust established for foster children receivin...,Trust established for foster children receivin...,The Minnesota House of Representatives introdu...,MN Rep. Duane Quam (R-MN-024A),Introduced or Prefiled,1/4/2023,...,,,,,,,,,,


In [23]:
file_name_full = f'ed_bills_v3_w_join{str(date.today()).replace('-','_')}.xlsx'
file_name_full_csv = f'ed_bills_v3_w_join{str(date.today()).replace('-','_')}.csv'
full_df.to_excel(fr'C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\Legislators Data\leg_data_update_10_2024\build files\compiled scores\2025\{file_name_full}',sheet_name=f'{file_name_full.replace(".xlsx", "")}', index=False)
full_df.to_csv(fr'C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\Legislators Data\leg_data_update_10_2024\build files\compiled scores\2025\{file_name_full_csv}', index=False)


# full_df.to_excel(r'C:\Users\clutz\THE HUNT INSTITUTE\The Hunt Institute Team Site - Documents\Development (formerly Grants Management)\!Administrative\Christian\Legislators Data\leg_data_update_10_2024\build files\bills\ed_bills_v3_w_join.xlsx', sheet_name=f'ed_bills',index = False)

In [None]:
ed_bills.head()

# END


## Tagging for Program and Content Teams



In [None]:
#[Bb]lack

count = 0
for i,b in enumerate(ed_bills['title']):
    if re.search(r'[Dd]ay\s?[Cc]are', str(b)):
    # if re.search(r'[Ee]arly [Cc]h(i)?ld', str(b)) and len(str(b)) < 100:
        # if re.search(r'[Ee]arly', str(b)):
            print(str(b))
            print('###################')
            # wrapped_text = textwrap.fill(ed_bills.loc[i,'title'], width=100)
            # print(wrapped_text)
            # print('###################')
            # print('\n')
            count += 1

print(count)

3


In [None]:

ed_bills.loc[:,"sponsors_helper"] = np.nan
for i,j in enumerate(ed_bills['sponsors']):
    
    # print(str(j))
    

    if isinstance(j,float):
        continue
    sponsors = str(j).split(',')
    s_list = []
    for s in sponsors:
        # display_markdown(f'{s}', raw=True)
        try:
            lname_identifier = re.findall(r'\w+\s\([RD]-[A-Z]{2}-\d+\)', str(s))[0]
        except:
            continue
            # print('no last name')
            # print(str(s))
            # # print(type(j))
        
        
        house_or_senate = re.findall(r'[Ss]en\.|[Rr]ep\.|[Dd]el\.',str(s))
        # print(house_or_senate)                

        if len(house_or_senate) == 0:
            if re.search(r'[Ss]peaker', str(s)):
                chamber = 'House'
            else:
                print('not speaker but something else')
                print(s)

        else:
            if house_or_senate[0] == "Sen.":
                chamber = "Senate"
                # print("found a senate")
            elif house_or_senate[0] == "Rep." or house_or_senate[0] == "Del.":
                chamber = "House"

        # print(chamber)
        state = re.findall(r'-[A-Z]{2}-',str(lname_identifier))[0]
        state = state.strip('-')
        # print(lname_identifier)
        # print(str(state))
        
        district = re.findall(r'-\d+', str(lname_identifier))[0]
        district = district.lstrip('-').strip().lstrip('0')
        name = re.findall(r'^\w+', str(lname_identifier))[0]
        # print(str(district))
        if 'ND' in str(state):
            helper = state +'-'+ chamber + "-" + district +'-' + name
        elif "CT" in str(state) and chamber == "Senate":
            helper = state +'-'+ chamber + "-S" + district

        else:
            helper = state +'-'+ chamber + "-" + district
        s_list.append(helper)

    sponsor_ext = "|".join(s_list)
    # try:
    #     print(sponsor_ext)
    # except:
    #     display_markdown(f'{s}', raw=True)


    ed_bills.loc[i, 'sponsors_helper'] = str(sponsor_ext)


    

In [97]:
first_column = ed_bills.pop('sponsors_helper') 
ed_bills.insert(0, 'sponsors_helper', first_column) 
ed_bills.reset_index(inplace=True, drop=True)



In [None]:
#This is used to check if the values match the compiled_info csv

helper_values = []
for i,j in enumerate(ed_bills['sponsors_helper']):
    helper_ls = str(j).split('|')
    helper_values.extend(helper_ls)



helper_values = list(set(helper_values))
df = pd.DataFrame({"helper":helper_values})
df.to_excel(r'C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\quorum\bill data downloads\exports\unique_helpers.xlsx', index=False)

In [None]:
#taking bill data and providing cumulative totals
sponsors_dfs = []
for i,j in enumerate(ed_bills['sponsors_helper']):
    sponsor_list = []
    bill_label_list = []
    
    sponsors = str(j).split('|')
    bill_number = ed_bills.loc[i,'bill_label']
    n = len(sponsors)
    print(bill_number)

    sponsor_list.extend(sponsors)
    bill_label_list.extend([str(bill_number)]*n)

    df = pd.DataFrame({'sponsors':sponsor_list, "bill_labels": bill_label_list})
    print(df.to_string())
    sponsors_dfs.append(df)

bills_and_sponsors = pd.concat(sponsors_dfs)
bills_and_sponsors.reset_index(inplace=True, drop=True)

In [None]:
#grouping data together and getting list of events per legislator
grouped_df = bills_and_sponsors.groupby('sponsors')#['bill_labels']#.apply('|'.join).reset_index()


# agg({
#     'bills': lambda x: 
#         f"{sc} ({ac})" if not pd.isna(ac) else f"{sc}"
#         for sc, ac in zip(thi_states_df.loc[x.index, 'event name'], thi_states_df.loc[x.index, 'role'])),

# }).reset_index()
# # grouped_df.reset_index()
# grouped_df.rename(columns={'event name': 'events'}, inplace=True)

: 

In [None]:

bills_clean_up = non_ed_bills.loc[non_ed_bills['bill_sum'].notna() & (non_ed_bills['bill_sum'].str.contains(r'[Hh]igher ([Ee]d(ucation)?)?\s?([Ll]earning)?'))]

# for bill in non_ed_bills['title']:
#     print('##############')
#     print(bill)


In [None]:

# Iterate through each row in the 'bill_sum' column
for text in all_bills['title'].dropna():
    # Find all matches with 5 words of context before and after
    matches = re.findall(r'(?:\b\w+\b\s){0,10}\b[Ee]ducation\b(?:\s\b\w+\b){0,10}', text)
    matches_v2 = re.findall(r'.*[Ee]ducation.*|.*[Cc]hild.*|.*[Ss]chool.*|.*[Cc]harter.*|.*[Mm]ath.*|.*[Rr]ead.*|.*[Tt]each.*|.*[Pp]arent.*|.*[Kk]id.*|.*[Ss]tudent.*|.*[Cc]ollege.*|.*[Uu]niversit.*|.*[Tt]uition.*', str(text))


    
    # Print matches if found
    print('####################')
    for match in matches_v2:
        
        print("****")
        print(str(match))
        print('\n')






In [None]:
ec_subs = ['child\s{0,1}care', 'early\s{0,1}childhood', 'preschool']
ec_pat = "|".join(ec_subs)
print(ec_pat)

%%

In [None]:
print(all_bills.columns)
for bill in all_bills['bill_sum']:
    if re.search(f'{ec_pat}', str(bill).lower()):
        matches = re.findall(ec_pat, str(bill).lower())
        print('############################')
        print('***********')
        print(*matches)
        print('***********')
        print(bill)
        print('\n')
# %%

In [None]:
ec_ed_bills = all_bills[all_bills.subjects.str.contains(ec_pat, regex = True, case=False)]

In [None]:
ec_ed_bills.reset_index(inplace=True, drop=True)

% Higher Ed

In [None]:
he_keywords = [
    "post-secondary transition",
    "equity gaps",
    "college-going rates",
    "workforce readiness",
    "certificate programs",
    "wraparound services",
    "stackable credentials",
    "student persistence",
    "retention strategies",
    "lifelong learning",
    "postsecondary barriers",
    "alternative pathways",
    "higher education institutions",
    "higher education",
    "legislative support for education",
    "student success metrics",
    "high quality credential",
    "vocational training",
    "attainment",
    "persistence",
    "resistance",
    "graduation"
]

In [None]:
he_pat = r'\b(' + '|'.join(map(re.escape, he_keywords)) + r')\b'

he_pat = "|".join(he_keywords)

In [None]:
print(he_pat)

In [None]:
all_bills['bill_sum'].fillna('nan',inplace=True)

In [None]:
he_ed_bills = all_bills[all_bills.bill_sum.str.contains(he_pat, regex = True, case=False)]
he_ed_bills.reset_index(inplace=True, drop=True)

In [None]:
os.chdir(r'C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\bill_data\Higher Ed')
he_ed_bills.to_csv('higher_ed_bills.csv', index=False)
# %%
# print(all_bills.columns)
for i,bill in enumerate(he_ed_bills['bill_sum']):
    # matches = re.findall(he_pat, str(bill).lower())
    matches = re.findall(r'graduation', str(bill).lower())
    if matches:
        grad_match = re.findall(r'higher ed|post-{0,1}secondary', str(bill).lower())
        if grad_match:
            continue
        
        else:
            print('############################')
            print(he_ed_bills.loc[i,'state'])
            print(he_ed_bills.loc[i,'bill'])
            print('############################')
            print('***********')
            print(*grad_match)
            print('***********')
            print(bill)
            print('\n')
    
        print('############################')
        print(he_ed_bills.loc[i,'bill'])
        print('############################')
        print('***********')
        print(*matches)
        print('***********')
        print(bill)
        print('\n')
# %%