# US Bills Scraping

In [None]:
!pip install striprtf
!pip install openpyxl
!pip install selenium webdriver-manager

In [156]:
import os
import glob
import re
import openpyxl
import requests 
import time
import pandas as pd
import numpy as np 
from functools import reduce
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService 
from webdriver_manager.chrome import ChromeDriverManager 
from urllib.parse import urljoin
from striprtf.striprtf import rtf_to_text

### Search results

In [218]:
def get_urls(search_link):
    driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install())) 
    driver.get(search_link)
    time.sleep(3)  # time for web page loading 
    scroll_pause_time = 3 # pause time before next scroll
    screen_height = driver.execute_script("return window.screen.height;")   # get the screen height of the web
    i = 1

    while True:
        # scroll one screen height each time
        driver.execute_script("window.scrollTo(0, {screen_height}*{i});".format(screen_height=screen_height, i=i))  
        i += 1
        time.sleep(scroll_pause_time)
        # update scroll height each time after scrolled, as the scroll height can change after we scrolled the page
        scroll_height = driver.execute_script("return document.body.scrollHeight;")  
        # break the loop when the height we need to scroll to is larger than the total scroll height
        if (screen_height) * i > scroll_height:
            break

    urls = []
    soup = BeautifulSoup(driver.page_source, "html.parser")

    for parent in soup.find_all(class_="col-xs-10 col-md-11"):
        a_tag = parent.find("a")
        base = search_link
        link = a_tag.attrs['href']
        url = urljoin(base, link)
        urls.append(url)   

    return urls 


In [219]:
def get_url_set(links):
    url_set = []
    for link in links:
        set = get_urls(link)
        url_set.extend(set)
        print(str(len(set)) + " links in page " + str(link))
    return url_set

In [227]:
keywords = ["content+moderation", "moderation", "social+media", "platform"]
congress = [114, 115, 116, 117, 118]

search_links = []
for keyword in keywords:
    for no in congress:
        link = "https://www.govtrack.us/congress/bills/browse?text=platform#text=" + str(keyword) + "&congress=" + str(no) + "&terms=6293&sort=relevance"
        search_links.append(link)

In [229]:
search_links

['https://www.govtrack.us/congress/bills/browse?text=platform#text=content+moderation&congress=114&terms=6293&sort=relevance',
 'https://www.govtrack.us/congress/bills/browse?text=platform#text=content+moderation&congress=115&terms=6293&sort=relevance',
 'https://www.govtrack.us/congress/bills/browse?text=platform#text=content+moderation&congress=116&terms=6293&sort=relevance',
 'https://www.govtrack.us/congress/bills/browse?text=platform#text=content+moderation&congress=117&terms=6293&sort=relevance',
 'https://www.govtrack.us/congress/bills/browse?text=platform#text=content+moderation&congress=118&terms=6293&sort=relevance',
 'https://www.govtrack.us/congress/bills/browse?text=platform#text=moderation&congress=114&terms=6293&sort=relevance',
 'https://www.govtrack.us/congress/bills/browse?text=platform#text=moderation&congress=115&terms=6293&sort=relevance',
 'https://www.govtrack.us/congress/bills/browse?text=platform#text=moderation&congress=116&terms=6293&sort=relevance',
 'https:

In [230]:
urls = get_url_set(search_links)

54 links in page https://www.govtrack.us/congress/bills/browse?text=platform#text=content+moderation&congress=114&terms=6293&sort=relevance
78 links in page https://www.govtrack.us/congress/bills/browse?text=platform#text=content+moderation&congress=115&terms=6293&sort=relevance
149 links in page https://www.govtrack.us/congress/bills/browse?text=platform#text=content+moderation&congress=116&terms=6293&sort=relevance
163 links in page https://www.govtrack.us/congress/bills/browse?text=platform#text=content+moderation&congress=117&terms=6293&sort=relevance
3 links in page https://www.govtrack.us/congress/bills/browse?text=platform#text=content+moderation&congress=118&terms=6293&sort=relevance
2 links in page https://www.govtrack.us/congress/bills/browse?text=platform#text=moderation&congress=114&terms=6293&sort=relevance
2 links in page https://www.govtrack.us/congress/bills/browse?text=platform#text=moderation&congress=115&terms=6293&sort=relevance
9 links in page https://www.govtrack.

In [231]:
len(urls)

975

In [252]:
# unique links
len(np.unique(np.array(urls)))

628

In [256]:
# remove duplicates
urls = (np.unique(np.array(urls))).tolist()

In [257]:
len(urls)

628

### Main info

In [88]:
# manual read links file
with open("Links.rtf") as infile: 
    content = infile.read()
    links = rtf_to_text(content)

links = links.split('\n')

links_all = []
for link in links:
    links_all += re.findall('\(("[^)]*")\)', link)

links_all = [s.replace('"', "") for s in links_all]

In [278]:
def parse(links):
    data = []
    counter = 0

    for link in links: 
        r = requests.get(link)
        link_soup = BeautifulSoup(r.content)
        
        try: 
            data.append({"code": link_soup.select("li.active")[0].text,
                "title": link_soup.select("div.h1-multiline")[0].text,
                "date": link_soup.select("div.col-sm-9.col-md-10")[0].text,
                "status": link_soup.select("div.col-sm-9.col-md-10")[1].find('strong').text,
                "sponsor": link_soup.select("a.name")[0].text,
                "designation": link_soup.select("div.col-xs-7.col-sm-4")[0].find('p').text,
                #"# co-sponsor/s": link_soup.select("div.col-sm-9.col-md-10")[2].find('a').text, 
                "summary": link_soup.select("div.col-sm-8.col-sm-pull-4")[0].find('p', style="margin-bottom: 0").text,
                "link": str(link)})
            
        except Exception as e:
            data.append({
                "code": np.nan,
                "title": np.nan,
                "date": np.nan,
                "status": np.nan,
                "sponsor": np.nan,
                "designation": np.nan,
                #"# co-sponsor/s": np.nan,
                "summary": np.nan,
                "link": str(link)})
            print(f"Error on link {link}: {e}")

        counter += 1
        if (counter) % 20 == 0:
            print("No. links scraped: " + str(counter))

    raw = pd.DataFrame.from_dict(data)
    return raw
    #raw.to_csv('raw.csv')

In [279]:
df = parse(urls)

No. links scraped: 20
No. links scraped: 40
No. links scraped: 60
No. links scraped: 80
No. links scraped: 100
No. links scraped: 120
No. links scraped: 140
No. links scraped: 160
No. links scraped: 180
No. links scraped: 200
No. links scraped: 220
No. links scraped: 240
No. links scraped: 260
No. links scraped: 280
No. links scraped: 300
No. links scraped: 320
No. links scraped: 340
No. links scraped: 360
No. links scraped: 380
No. links scraped: 400
No. links scraped: 420
No. links scraped: 440
No. links scraped: 460
No. links scraped: 480
No. links scraped: 500
No. links scraped: 520
No. links scraped: 540
No. links scraped: 560
No. links scraped: 580
No. links scraped: 600
No. links scraped: 620


In [280]:
len(df)

628

In [281]:
df_main = (df
    #.drop(df.columns[0], axis =1)
    .applymap(lambda x: x.strip() if type(x)==str else x))

In [282]:
df_main

Unnamed: 0,code,title,date,status,sponsor,affiliation,summary,link
0,H.R. 104 (114th),H.R. 104 (114th): Cyber Privacy Fortification ...,"Jan 6, 2015\n\t\t\t\t\n\t\t\t\t114th Congress ...",Died in a previous Congress,John Conyers,Sponsor. Representative for Michigan's 13th co...,"To protect cyber privacy, and for other purposes.",https://www.govtrack.us/congress/bills/114/hr104
1,H.R. 1426 (114th),H.R. 1426 (114th): Public Access to Public Sci...,"Mar 18, 2015\n\t\t\t\t\n\t\t\t\t114th Congress...",Died in a previous Congress,James Sensenbrenner,Sponsor. Representative for Wisconsin's 5th co...,To ensure public access to published materials...,https://www.govtrack.us/congress/bills/114/hr1426
2,H.R. 1561 (114th),H.R. 1561 (114th): Weather Research and Foreca...,"Mar 24, 2015\n\t\t\t\t\n\t\t\t\t114th Congress...",Died in a previous Congress,Frank Lucas,Sponsor. Representative for Oklahoma's 3rd con...,To improve the National Oceanic and Atmospheri...,https://www.govtrack.us/congress/bills/114/hr1561
3,H.R. 1806 (114th),H.R. 1806 (114th): America COMPETES Reauthoriz...,"Apr 15, 2015\n\t\t\t\t\n\t\t\t\t114th Congress...",Died in a previous Congress,Lamar Smith,Sponsor. Representative for Texas's 21st congr...,To provide for technological innovation throug...,https://www.govtrack.us/congress/bills/114/hr1806
4,H.R. 1898 (114th),H.R. 1898 (114th): America Competes Reauthoriz...,"Apr 21, 2015\n\t\t\t\t\n\t\t\t\t114th Congress...",Died in a previous Congress,Eddie Bernice Johnson,Sponsor. Representative for Texas's 30th congr...,To provide for investment in innovation throug...,https://www.govtrack.us/congress/bills/114/hr1898
...,...,...,...,...,...,...,...,...
623,S.Res. 73 (117th),S.Res. 73 (117th): A resolution reaffirming th...,"Feb 24, 2021\n\t\t\t\t\n\t\t\t\t117th Congress...",Died in a previous Congress,Jacky Rosen,Sponsor. Junior Senator for Nevada. Democrat.,GovTrack automatically collects legislative in...,https://www.govtrack.us/congress/bills/117/sres73
624,S.Res. 768 (117th),S.Res. 768 (117th): A resolution recognizing a...,"Sep 15, 2022\n\t\t\t\t\n\t\t\t\t117th Congress...",Agreed To (Simple Resolution),Michael “Mike” Crapo,Sponsor. Senator for Idaho. Republican.,GovTrack automatically collects legislative in...,https://www.govtrack.us/congress/bills/117/sre...
625,H.R. 573 (118th),H.R. 573: CASE–IT Act,"Jan 26, 2023\n\t\t\t\t\n\t\t\t\t118th Congress...",Introduced,Gregory Steube,Sponsor. Representative for Florida's 17th con...,To amend section 230 of the Communications Act...,https://www.govtrack.us/congress/bills/118/hr573
626,S. 147 (118th),"S. 147: See Something, Say Something Online Ac...","Jan 30, 2023\n\t\t\t\t\n\t\t\t\t118th Congress...",Introduced,Joe Manchin,Sponsor. Senior Senator for West Virginia. Dem...,A bill to require reporting of suspicious tran...,https://www.govtrack.us/congress/bills/118/s147


In [283]:
df_main.date = df_main.date.str.extract(r'([a-zA-Z]{3,}\s*[0-9]{1,}\s*[,\s]\s*[0-9]{2,})') # extract date based on MMM-DD-YYYY format
df_main.title = df_main.title.str.extract(r'([^:]*$)') # remove code from title 
df_main['affiliation'] = df_main.designation.str.extract(r'\.([^\.]+)\.') # extract party

In [292]:
#df_main['# co-sponsor/s'] = ['None' if x == 'Congress.gov' else x for x in df_main['# co-sponsor/s']] 
df_main['designation'] = [x.replace("Sponsor. ", "") and re.sub('\.([^\.]+)\.', "", x) for x in df_main['designation']]


In [299]:
df_main

Unnamed: 0,code,title,date,status,sponsor,designation,summary,link,affiliation
0,H.R. 104 (114th),Cyber Privacy Fortification Act of 2015,"Jan 6, 2015",Died in a previous Congress,John Conyers,Representative for Michigan's 13th congression...,"To protect cyber privacy, and for other purposes.",https://www.govtrack.us/congress/bills/114/hr104,Democrat
1,H.R. 1426 (114th),Public Access to Public Science Act,"Mar 18, 2015",Died in a previous Congress,James Sensenbrenner,Representative for Wisconsin's 5th congression...,To ensure public access to published materials...,https://www.govtrack.us/congress/bills/114/hr1426,Republican
2,H.R. 1561 (114th),Weather Research and Forecasting Innovation A...,"Mar 24, 2015",Died in a previous Congress,Frank Lucas,Representative for Oklahoma's 3rd congressiona...,To improve the National Oceanic and Atmospheri...,https://www.govtrack.us/congress/bills/114/hr1561,Republican
3,H.R. 1806 (114th),America COMPETES Reauthorization Act of 2015,"Apr 15, 2015",Died in a previous Congress,Lamar Smith,Representative for Texas's 21st congressional ...,To provide for technological innovation throug...,https://www.govtrack.us/congress/bills/114/hr1806,Republican
4,H.R. 1898 (114th),America Competes Reauthorization Act of 2015,"Apr 21, 2015",Died in a previous Congress,Eddie Bernice Johnson,Representative for Texas's 30th congressional ...,To provide for investment in innovation throug...,https://www.govtrack.us/congress/bills/114/hr1898,Democrat
...,...,...,...,...,...,...,...,...,...
623,S.Res. 73 (117th),A resolution reaffirming the commitment to me...,"Feb 24, 2021",Died in a previous Congress,Jacky Rosen,Junior Senator for Nevada,GovTrack automatically collects legislative in...,https://www.govtrack.us/congress/bills/117/sres73,Democrat
624,S.Res. 768 (117th),A resolution recognizing and supporting the g...,"Sep 15, 2022",Agreed To (Simple Resolution),Michael “Mike” Crapo,Senator for Idaho,GovTrack automatically collects legislative in...,https://www.govtrack.us/congress/bills/117/sre...,Republican
625,H.R. 573 (118th),CASE–IT Act,"Jan 26, 2023",Introduced,Gregory Steube,Representative for Florida's 17th congressiona...,To amend section 230 of the Communications Act...,https://www.govtrack.us/congress/bills/118/hr573,Republican
626,S. 147 (118th),"See Something, Say Something Online Act of 2023","Jan 30, 2023",Introduced,Joe Manchin,Senior Senator for West Virginia,A bill to require reporting of suspicious tran...,https://www.govtrack.us/congress/bills/118/s147,Democrat


In [121]:
df_main.to_excel('US Bills-01.xlsx')

### Co-sponsors

In [286]:
len(urls)

628

In [287]:
sponsors_links = [x + "/cosponsors" for x in urls]

In [289]:
def parse_sponsors(links):
    sponsor_data = []
    counter = 0

    for link in links: 
        r = requests.get(link)
        link_soup = BeautifulSoup(r.content)

        for table in link_soup.find_all('table'):
            table = link_soup.find('table', id='cosponsors')

            temp = []
            for row in table.tbody.find_all('tr'):    
                columns = row.find_all('td')
                if(columns != []):
                    name = columns[0].text.strip()
                    sponsorship = columns[1].text.strip()
                    #committee = columns[2].text.strip()

                    temp.append({"name":name, "sponsorship":sponsorship})

            sponsor_df = pd.DataFrame.from_dict(temp)

            sponsor_df['affiliation'] = [re.findall('(?<=\[).+?(?=\])', x)[0] for x in sponsor_df['name']]
            sponsor_df['name'] = [re.findall('[A-Za-z]+,\s[A-Za-z\s]+', x)[0].strip() for x in sponsor_df['name']]

            co_sponsor_name = sponsor_df[(sponsor_df['sponsorship'] != "Primary Sponsor")]['name']
            co_sponsor_aff = sponsor_df[(sponsor_df['sponsorship'] != "Primary Sponsor")]['affiliation']
            no_cosponsors = len(co_sponsor_name)

        sponsor_data.append({"# cosponsor/s": no_cosponsors,
                            "cosponsor/s": '; '.join(co_sponsor_name),
                            "cosponsor/s affiliation": '; '.join(co_sponsor_aff),
                            "link": str(link).replace("/cosponsors", "")})

        counter += 1
        if (counter) % 20 == 0:
            print("No. links scraped: " + str(counter))

    sponsors = pd.DataFrame.from_dict(sponsor_data)
    return sponsors

In [290]:
sponsors = parse_sponsors(sponsors_links)

No. links scraped: 20
No. links scraped: 40
No. links scraped: 60
No. links scraped: 80
No. links scraped: 100
No. links scraped: 120
No. links scraped: 140
No. links scraped: 160
No. links scraped: 180
No. links scraped: 200
No. links scraped: 220
No. links scraped: 240
No. links scraped: 260
No. links scraped: 280
No. links scraped: 300
No. links scraped: 320
No. links scraped: 340
No. links scraped: 360
No. links scraped: 380
No. links scraped: 400
No. links scraped: 420
No. links scraped: 440
No. links scraped: 460
No. links scraped: 480
No. links scraped: 500
No. links scraped: 520
No. links scraped: 540
No. links scraped: 560
No. links scraped: 580
No. links scraped: 600
No. links scraped: 620


In [291]:
sponsors

Unnamed: 0,# cosponsor/s,cosponsor/s,cosponsor/s affiliation,link
0,1,"Johnson, Henry C",D-GA4,https://www.govtrack.us/congress/bills/114/hr104
1,1,"Johnson, Eddie",D-TX30,https://www.govtrack.us/congress/bills/114/hr1426
2,6,"Bonamici, Suzanne; Bridenstine, Jim; Johnson, ...",D-OR1; R-OK1; D-TX30; R-CA48; R-TX21; R-UT2,https://www.govtrack.us/congress/bills/114/hr1561
3,10,"Lucas, Frank; Babin, Brian; Weber, Randy; Coms...",R-OK3; R-TX36; R-TX14; R-VA10; R-IL14; R-CA25;...,https://www.govtrack.us/congress/bills/114/hr1806
4,16,"Lofgren, Zoe; Bonamici, Suzanne; Takano, Mark;...",D-CA19; D-OR1; D-CA41; D-CA7; D-VA8; D-MA5; D-...,https://www.govtrack.us/congress/bills/114/hr1898
...,...,...,...,...
623,1,"Rubio, Marco",R-FL,https://www.govtrack.us/congress/bills/117/sres73
624,6,"Blumenthal, Richard; Feinstein, Dianne; Padill...",D-CT; D-CA; D-CA; R-ID; D-GA; D-RI,https://www.govtrack.us/congress/bills/117/sre...
625,0,,,https://www.govtrack.us/congress/bills/118/hr573
626,1,"Cornyn, John",R-TX,https://www.govtrack.us/congress/bills/118/s147


Sample sponsors dataframe

In [39]:
sample = "https://www.govtrack.us/congress/bills/117/s673/cosponsors"

In [40]:
r = requests.get(sample)
s = BeautifulSoup(r.content)

In [6]:
for table in s.find_all('table'):
    print(table.get('id'))

cosponsors
None


In [7]:
table = s.find('table', id='cosponsors')

In [8]:
temp = []

for row in table.tbody.find_all('tr'):    
    # Find all data for each column
    columns = row.find_all('td')

    if(columns != []):
        name = columns[0].text.strip()
        sponsorship = columns[1].text.strip()
        #committee = columns[2].text.strip()

        temp.append({"name":name, "sponsorship":sponsorship})

sponsor_df = pd.DataFrame.from_dict(temp)

In [None]:
temp = []

for row in table.tbody.find_all('tr'):    
    # Find all data for each column
    columns = row.find_all('td')

    if(columns != []):
        name = columns[0].text.strip()
        sponsorship = columns[1].text.strip()
        #committee = columns[2].text.strip()

        temp.append({"name":name, "sponsorship":sponsorship})

sponsor_df = pd.DataFrame.from_dict(temp)

sponsor_df['affiliation'] = [re.findall('(?<=\[).+?(?=\])', x)[0] for x in sponsor_df['name']]
sponsor_df['name'] = [re.findall('[A-Za-z]+,\s[A-Za-z\s]+', x)[0].strip() for x in sponsor_df['name']]

co_sponsor_name = sponsor_df[(sponsor_df['sponsorship'] != "Primary Sponsor")]['name']
co_sponsor_aff = sponsor_df[(sponsor_df['sponsorship'] != "Primary Sponsor")]['affiliation']

cosponsors = '; '.join(co_sponsor_name)
cosponsors_aff = '; '.join(co_sponsor_aff)


In [10]:
sponsor_df

Unnamed: 0,name,sponsorship,affiliation
0,"Klobuchar, Amy",Primary Sponsor,D-MN
1,"Durbin, Richard","Aug 6, 2022",D-IL
2,"Graham, Lindsey","Mar 10, 2022",R-SC
3,"Booker, Cory",Original Cosponsor,D-NJ
4,"Kennedy, John Neely",Original Cosponsor,R-LA
5,"Whitehouse, Sheldon",Original Cosponsor,D-RI
6,"Feinstein, Dianne","Jun 24, 2021",D-CA
7,"Hirono, Mazie","Jul 13, 2022",D-HI
8,"Blumenthal, Richard","Aug 6, 2022",D-CT
9,"Paul, Rand",Original Cosponsor,R-KY


In [38]:
'; '.join(co_sponsor_name)

'Durbin, Richard; Graham, Lindsey; Booker, Cory; Kennedy, John Neely; Whitehouse, Sheldon; Feinstein, Dianne; Hirono, Mazie; Blumenthal, Richard; Paul, Rand; Collins, Susan; Lummis, Cynthia; Cassidy, Bill; Thune, John; Manchin, Joe; Wicker, Roger'

In [35]:
'; '.join(co_sponsor_aff)

'D-IL; R-SC; D-NJ; R-LA; D-RI; D-CA; D-HI; D-CT; R-KY; R-ME; R-WY; R-LA; R-SD; D-WV; R-MS'

### Committee

In [67]:
comm_links = [x + "/details" for x in urls]

In [85]:
def parse_comms(links):
    comm_data = []
    counter = 0

    for link in links: 
        r = requests.get(link)
        link_soup = BeautifulSoup(r.content)
        
        try: 
            comm_data.append({"committee": link_soup.find_all('div', style = 'line-height: 125%;')[0].text,
                "link": str(link).replace("/details", "")})
        except:
            pass

        counter += 1
        if (counter) % 20 == 0:
            print("No. links scraped: " + str(counter))

    comms = pd.DataFrame.from_dict(comm_data)
    return comms

In [86]:
comms = parse_comms(comm_links)

No. links scraped: 20
No. links scraped: 40
No. links scraped: 60
No. links scraped: 80
No. links scraped: 100


In [106]:
comms

Unnamed: 0,committee,link
0,"Senate Commerce, Science, and Transportation",https://www.govtrack.us/congress/bills/117/s4201
1,"Senate Commerce, Science, and Transportation",https://www.govtrack.us/congress/bills/117/s1896
2,House Energy and Commerce,https://www.govtrack.us/congress/bills/117/hr3611
3,House Energy and Commerce,https://www.govtrack.us/congress/bills/117/hr83
4,"Senate Commerce, Science, and Transportation",https://www.govtrack.us/congress/bills/117/s3663
...,...,...
104,"House Science, Space, and Technology",https://www.govtrack.us/congress/bills/114/hr6531
105,House Education and the Workforce,https://www.govtrack.us/congress/bills/114/hr1806
106,"House Science, Space, and Technology",https://www.govtrack.us/congress/bills/114/hr2039
107,"House Science, Space, and Technology",https://www.govtrack.us/congress/bills/114/hr1561


### Merge

In [122]:
dfs = [df_main, sponsors, comms]

df_merged = reduce(lambda  left,right: pd.merge(left,right,on=['link'], how='outer'), dfs).fillna(' ')

In [123]:
df_merged

Unnamed: 0,code,title,date,status,sponsor,affiliation,# co-sponsor/s,summary,link,cosponsors,cosponsors_affiliation,committee
0,S. 4201 (117th),Digital Platform Commission Act of 2022,"May 12, 2022",Died in a previous Congress,Michael Bennet,Senator for Colorado. Democrat.,,A bill to establish a new Federal body to prov...,https://www.govtrack.us/congress/bills/117/s4201,,,"Senate Commerce, Science, and Transportation"
1,S. 1896 (117th),Algorithmic Justice and Online Platform Trans...,"May 27, 2021",Died in a previous Congress,Edward “Ed” Markey,Junior Senator for Massachusetts. Democrat.,3 Cosponsors,A bill to prohibit the discriminatory use of p...,https://www.govtrack.us/congress/bills/117/s1896,"Whitehouse, Sheldon; Warren, Elizabeth; Booker...",D-RI; D-MA; D-NJ,"Senate Commerce, Science, and Transportation"
2,H.R. 3611 (117th),Algorithmic Justice and Online Platform Trans...,"May 28, 2021",Died in a previous Congress,Doris Matsui,Representative for California's 6th congressio...,11 Cosponsors,To prohibit the discriminatory use of personal...,https://www.govtrack.us/congress/bills/117/hr3611,"Eshoo, Anna; Clarke, Yvette; Deutch, Theodore;...",D-CA18; D-NY9; D-FL22; D-MI13; D-MD7; D-CA11; ...,House Energy and Commerce
3,H.R. 83 (117th),Protecting Constitutional Rights from Online ...,,Died in a previous Congress,Scott DesJarlais,Representative for Tennessee's 4th congression...,,To amend the Communications Act of 1934 to pro...,https://www.govtrack.us/congress/bills/117/hr83,,,House Energy and Commerce
4,S. 3663 (117th),Kids Online Safety Act,"Feb 16, 2022",Died in a previous Congress,Richard Blumenthal,Senator for Connecticut. Democrat.,13 Cosponsors,A bill to protect the safety of children on th...,https://www.govtrack.us/congress/bills/117/s3663,"Blackburn, Marsha; Capito, Shelley; Markey, Ed...",R-TN; R-WV; D-MA; D-NM; D-WI; D-MN; D-MI; D-CO...,"Senate Commerce, Science, and Transportation"
...,...,...,...,...,...,...,...,...,...,...,...,...
104,H.R. 1561 (114th),Weather Research and Forecasting Innovation A...,"Mar 24, 2015",Died in a previous Congress,Frank Lucas,Representative for Oklahoma's 3rd congressiona...,6 Cosponsors,To improve the National Oceanic and Atmospheri...,https://www.govtrack.us/congress/bills/114/hr1561,"Bonamici, Suzanne; Bridenstine, Jim; Johnson, ...",D-OR1; R-OK1; D-TX30; R-CA48; R-TX21; R-UT2,"House Science, Space, and Technology"
105,H.R. 6490 (114th),American Innovation and Competitiveness Act,,Enacted Via Other Measures,Lamar Smith,Representative for Texas's 21st congressional ...,,To invest in innovation through research and d...,https://www.govtrack.us/congress/bills/114/hr6490,,,House Education and the Workforce
106,,,,,,,,,https://www.govtrack.us/congress/bills/116/s893,"Blackburn, Marsha; Sullivan, Dan; Bennet, Mich...",R-TN; R-AK; D-CO; R-NC; R-ME; R-AR; D-CA; R-FL...,"Senate Commerce, Science, and Transportation"
107,,,,,,,,,https://www.govtrack.us/congress/bills/116/s2661,"Baldwin, Tammy; Moran, Jerry; Tester, Jon; Fis...",D-WI; R-KS; D-MT; R-NE; I-AZ; R-IN; R-TN; D-MA...,"Senate Commerce, Science, and Transportation"


In [124]:
df_merged.to_excel('US Bills-01-complete.xlsx')