In [10]:
# import the following libraries for our web scraping project

import requests # to make https requests
from bs4 import BeautifulSoup # our web scraping library

import pandas as pd # for making dataframe later

In [2]:
# save the data from the website as a "soup" object

site = requests.get('https://translegislation.com/bills/2024/US') # gets the URL
html_code = site.content # saves the HTML code
soup = BeautifulSoup(html_code, 'lxml') # creates a soup object

In [7]:
# runs the loop on the bill cards
bill_cards = soup.find_all('div', class_ ='css-4rck61')

# creating empty lists to hold all of our data
titles = []
captions = []
categories = []
descriptions = []

# extracting the data from the bill cards
for item in bill_cards:
    title = item.h3.text
    category = item.find('span').text
    caption = item.h2.text
    if item.h2.text is not None:
        description = item.h2.text
    else:
        description = 'No bill description'
    
    # adding the items to the empty lists
    titles.append(title)
    categories.append(category)
    captions.append(caption)
    descriptions.append(description)
    # remember that "legiscan_links" is already saved as a list, so we don't have to create it here

In [8]:
urls = []
for item in bill_cards:
    extension = 'https://translegislation.com/' + item.a['href']
    urls.append(extension)

# making a soup object of *every* page that is linked
# this may take several seconds
soups = []
for item in urls:
    site = requests.get(item)
    html_code = site.content
    soup = BeautifulSoup(html_code, 'lxml')
    soups.append(soup)


legiscan_links = []
congress_links = []
for item in soups:
    # we are getting two links here, one to legiscan and one to the congress website
    links = item.find_all('a', class_='chakra-link css-oga2ct')
    anchor1 = links[0]['href'] # link to legiscan
    legiscan_links.append(anchor1)
    anchor2 = links[1]['href'] # link to congress
    congress_links.append(anchor2)

In [11]:
# creating a dataframe, with separate columns to hold each of our lists
df = pd.DataFrame(
    {'title': titles,
     'caption': captions,
     'category': categories,
     'description': descriptions,
     'url': urls,
     'legiscan': legiscan_links,
     'congress': congress_links
    })

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84 entries, 0 to 83
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   title        84 non-null     object
 1   caption      84 non-null     object
 2   category     84 non-null     object
 3   description  84 non-null     object
 4   url          84 non-null     object
 5   legiscan     84 non-null     object
 6   congress     84 non-null     object
dtypes: object(7)
memory usage: 4.7+ KB


In [14]:
df

Unnamed: 0,title,caption,category,description,url,legiscan,congress
0,US HB10075,Stopping the Mutilation of Children Act of 2024,HEALTHCARE,Stopping the Mutilation of Children Act of 2024,https://translegislation.com//bills/2024/US/HB...,https://legiscan.com/US/text/HB10075/id/3025659,https://www.congress.gov/bill/118th-congress/h...
1,US HB10186,Protecting Women’s Private Spaces Act,BIRTH CERTIFICATES,Protecting Women’s Private Spaces Act,https://translegislation.com//bills/2024/US/HB...,https://legiscan.com/US/text/HB10186/id/3029142,https://www.congress.gov/bill/118th-congress/h...
2,US HB1064,Ensuring Military Readiness Act of 2023,MILITARY,Ensuring Military Readiness Act of 2023,https://translegislation.com//bills/2024/US/HB...,https://legiscan.com/US/text/HB1064/id/2737306,https://www.congress.gov/bill/118th-congress/h...
3,US HB1112,Ensuring Military Readiness Act of 2023,MILITARY,Ensuring Military Readiness Act of 2023,https://translegislation.com//bills/2024/US/HB...,https://legiscan.com/US/text/HB1112/id/2742708,https://www.congress.gov/bill/118th-congress/h...
4,US HB1276,Protect Minors from Medical Malpractice Act of...,HEALTHCARE,Protect Minors from Medical Malpractice Act of...,https://translegislation.com//bills/2024/US/HB...,https://legiscan.com/US/text/HB1276/id/2755407,https://www.congress.gov/bill/118th-congress/h...
...,...,...,...,...,...,...,...
79,US SJR90,A joint resolution providing for congressional...,HEALTHCARE,A joint resolution providing for congressional...,https://translegislation.com//bills/2024/US/SJR90,https://legiscan.com/US/text/SJR90/id/3003899,https://www.congress.gov/bill/118th-congress/s...
80,US SJR96,A joint resolution providing for congressional...,EDUCATION,A joint resolution providing for congressional...,https://translegislation.com//bills/2024/US/SJR96,https://legiscan.com/US/text/SJR96/id/3009679,https://www.congress.gov/bill/118th-congress/s...
81,US SR267,A resolution supporting the designation of the...,SPORTS,A resolution supporting the designation of the...,https://translegislation.com//bills/2024/US/SR267,https://legiscan.com/US/text/SR267/id/2831179,https://www.congress.gov/bill/118th-congress/s...
82,US SR53,A resolution establishing a Women's Bill of Ri...,CIVIL RIGHTS,A resolution establishing a Women's Bill of Ri...,https://translegislation.com//bills/2024/US/SR53,https://legiscan.com/US/text/SR53/id/2696872,https://www.congress.gov/bill/118th-congress/s...


In [15]:
df['description']

0       Stopping the Mutilation of Children Act of 2024
1                 Protecting Women’s Private Spaces Act
2               Ensuring Military Readiness Act of 2023
3               Ensuring Military Readiness Act of 2023
4     Protect Minors from Medical Malpractice Act of...
                            ...                        
79    A joint resolution providing for congressional...
80    A joint resolution providing for congressional...
81    A resolution supporting the designation of the...
82    A resolution establishing a Women's Bill of Ri...
83    A resolution designating October 10, 2024, as ...
Name: description, Length: 84, dtype: object

In [16]:
df['description'].value_counts()

description
Ensuring Military Readiness Act of 2023                                                   3
Protect Children’s Innocence Act                                                          3
End Taxpayer Funding of Gender Experimentation Act of 2023                                2
SAFE Home Act Sensible Adoption For Every Home Act                                        2
Preventing Violence Against Female Inmates Act of 2023                                    2
                                                                                         ..
Protecting Women’s Private Spaces Act                                                     1
Eliminate DEI in Colleges Act                                                             1
Department of Homeland Security Appropriations Act, 2025                                  1
Department of State, Foreign Operations, and Related Programs Appropriations Act, 2025    1
A resolution designating October 10, 2024, as "American Girls in Spo

In [18]:
df.to_csv('../bill_data/transtracker_federal_bills.csv')