# Full-text bill search

Analyzing the introduced versions of bills.

In [31]:
import glob
import os
import json

import pandas as pd

In [32]:
files_bills = glob.glob(
    os.path.join(
        '..',
        'data',
        'bills',
        '*.json'
    )
)

In [33]:
data_for_df = []

In [34]:
for f in files_bills:
    with open(f, 'r') as infile:
        data = json.load(infile)
    
    bill_id = data.get('bill_id')
    
    if not bill_id:
        continue
    
    d = {
        'session_id': data['session_id'],
        'bill_id': bill_id,
        'bill_type': data['bill_type'],
        'bill_number': data['bill_number'],
        'bill_title': data['bill_title']
    }
    
    for version in data['bill_versions']:
        if version['bill_version'] == 'Introduced':
            d['bill_text'] = version['bill_text']
            d['bill_text_date'] = version['bill_version_date']
            
    data_for_df.append(d)

In [35]:
df = pd.DataFrame().from_records(data_for_df).sort_values('bill_text_date', ascending=False)

In [36]:
df['bill_text_date'] = pd.to_datetime(df['bill_text_date'])

In [37]:
df.head()

Unnamed: 0,session_id,bill_id,bill_type,bill_number,bill_title,bill_text,bill_text_date
12602,68,24341,Senate Commemoration,811,Celebrating the life and accomplishments of Ra...,"A LEGISLATIVE COMMEMORATION, Celebrating the l...",2023-02-14 10:36:29.520000-06:00
12727,68,24357,House Commemoration,8010,Celebrating the LGBTQ+ and Two Spirit communit...,"A LEGISLATIVE COMMEMORATION, Celebrating the L...",2023-02-13 10:03:19.370000-06:00
15115,68,24356,Senate Commemoration,810,Celebrating the LGBTQ+ and Two Spirit communit...,"A LEGISLATIVE COMMEMORATION, Celebrating the L...",2023-02-13 08:06:21.700000-06:00
14231,68,24209,Senate Commemoration,809,Commending and honoring those members of the U...,"A LEGISLATIVE COMMEMORATION, Commending and ho...",2023-02-12 10:45:21.463000-06:00
1207,68,24359,House Commemoration,8009,Honoring Jackie Kocak for receiving the Direct...,"A LEGISLATIVE COMMEMORATION, Honoring Jackie K...",2023-02-09 09:40:12.903000-06:00


In [38]:
phrase_search = 'gender'

df[df.bill_text.str.contains(phrase_search, case=False, na=False)]

Unnamed: 0,session_id,bill_id,bill_type,bill_number,bill_title,bill_text,bill_text_date
12727,68,24357,House Commemoration,8010,Celebrating the LGBTQ+ and Two Spirit communit...,"A LEGISLATIVE COMMEMORATION, Celebrating the L...",2023-02-13 10:03:19.370000-06:00
15115,68,24356,Senate Commemoration,810,Celebrating the LGBTQ+ and Two Spirit communit...,"A LEGISLATIVE COMMEMORATION, Celebrating the L...",2023-02-13 08:06:21.700000-06:00
12896,68,23979,House Bill,1208,prevent government entities from entering cont...,An Act to prevent government entities from ent...,2023-01-31 22:37:34.267000-06:00
11919,68,23773,House Bill,1125,expand provisions regarding the protection of ...,An Act to expand provisions regarding the prot...,2023-01-24 14:08:24.330000-06:00
10519,68,23769,House Bill,1116,prohibit the use of state resources in hosting...,An Act to prohibit the use of state resources ...,2023-01-24 09:15:16.953000-06:00
...,...,...,...,...,...,...,...
4268,47,13313,Senate Bill,186,to ensure that enrollees receive health care s...,"FOR AN ACT ENTITLED, An Act to ensure that enr...",1998-01-23 00:00:00-06:00
14126,47,13245,House Bill,1092,to provide for the establishment of public cha...,"FOR AN ACT ENTITLED, An Act to provide for the...",1998-01-16 00:00:00-06:00
14813,47,13510,House Bill,1047,to revise certain provisions regarding the lic...,"FOR AN ACT ENTITLED, An Act to revise certain ...",1998-01-10 00:00:00-06:00
1061,45,12683,Senate Bill,208,to revise the requirements for individual and ...,"FOR AN ACT ENTITLED, An Act to revise the requ...",1997-01-29 00:00:00-06:00
