In [1]:
import pandas as pd  ## Dataframe manipulation
import requests
from requests_html import HTMLSession ## We dont have to manipulate the webpage at all, so a chrome based scraper is not needed
from bs4 import BeautifulSoup as bs ## how we read the info from the request
import csv ## how we will store the data portably
import re ## this is for helping find the bill text

In [2]:
def searchDF(df, column, query):
    searchDF = df[df[column].apply(str).str.contains(query, na=False)]

    return searchDF


In [3]:
## check for datafile in local folder, or create it
fileName = 'mnLaws.csv'
try:
    dataframe = pd.read_csv(fileName)
    dataframe.head()
    
    ##TODO ContinueScrape()  ####   Demo is set up as if starting from 0 #####
except :
    with open(fileName, 'w', newline='') as csvfile:
        csvWriter = csv.writer(csvfile, delimiter=' ')
        header = ['LegislatureName', 'LegislatureUrl', 'sessionYear', 'sessionType',
       'sessionUrl', 'chapter', 'chapterUrl', 'bill', 'billUrl', 'text',
       'PresentmentDate', 'scrapeComplete']
        ## The DOM breaks it down into bill sections and subdivisions,
        ##but we just need the text for todays project
        csvWriter.writerow(header)
        
        dataframe = pd.read_csv(file)
dataframe.head()

Unnamed: 0.1,Unnamed: 0,LegislatureName,LegislatureUrl,sessionYear,sessionType,sessionUrl,chapter,chapterUrl,bill,billUrl,text,PresentmentDate,scrapeComplete
0,0,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022.0,Regular Session,www.revisor.mn.gov/laws/2022/0,32,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,HF1203,revisor.mn.gov/bills/bill.php?b=house&f=hf1203...,A bill for an act relating to labor and indus...,"February 3, 2022",True
1,1,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022.0,Regular Session,www.revisor.mn.gov/laws/2022/0,33,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,SF2774,revisor.mn.gov/bills/bill.php?b=senate&f=sf277...,A bill for an act relating to human services;...,"March 3, 2022",True
2,2,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022.0,Regular Session,www.revisor.mn.gov/laws/2022/0,34,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,SF2957,revisor.mn.gov/bills/bill.php?b=senate&f=sf295...,A bill for an act relating to retirement; Min...,"March 3, 2022",True
3,3,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022.0,Regular Session,www.revisor.mn.gov/laws/2022/0,35,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,SF1782,revisor.mn.gov/bills/bill.php?b=senate&f=sf178...,A bill for an act relating to local governmen...,"March 3, 2022",True
4,4,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022.0,Regular Session,www.revisor.mn.gov/laws/2022/0,36,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,HF2454,revisor.mn.gov/bills/bill.php?b=house&f=hf2454...,A bill for an act relating to local governmen...,"March 10, 2022",True


In [4]:
htmlRequester = HTMLSession()
# r = requests.get('https://www.revisor.mn.gov/laws/')  ## if just using requests, not requestsHTML, maybe we can test the speed
r = htmlRequester.get('https://www.revisor.mn.gov/laws/')  ## or efficency of these three methods (requests, html, chromium)
print(r.status_code, r.encoding)
soup = bs(r.text, 'html.parser')

200 utf-8


In [5]:
## Exploring the data a bit

In [6]:
soup.title

<title>
          
            MN Laws
          
        </title>

In [7]:
legislatures = soup.find_all('tr','alternate')  ## Using the inspect tool here to find the identifier for the data we want
legislatures[0]

## TODO the first row in the table does not have the alternate tag

<tr class="alternate">
<td>
<a href="//www.revisor.mn.gov/laws/91.0">
                    
                        2019 - 2020
                    
                    </a>
</td>
<td>
                    91st Legislature
                </td>
</tr>

In [8]:
legislatures[0].find('a', href=True)['href']  

'//www.revisor.mn.gov/laws/91.0'

In [9]:
legislatures[0].find_all('td')[1].text.strip('\n ').rstrip('\n ')

'91st Legislature'

In [10]:
sessionTracker = pd.DataFrame(data=None, index=None, columns= ['LegislatureName', 'LegislatureUrl'])
## using a df to track my progress on scraping, for a more polished project we should use another program to start these scrapes,
## so we could use more computing power / threads, and for resiliencey of the scraper to unexpected page issues
print(sessionTracker)


yearlysessionTracker = pd.DataFrame(data=None, index=None, columns= ['LegislatureName', 'sessionYear', 'sessionType', 'sessionUrl'])
print(yearlysessionTracker)

Empty DataFrame
Columns: [LegislatureName, LegislatureUrl]
Index: []
Empty DataFrame
Columns: [LegislatureName, sessionYear, sessionType, sessionUrl]
Index: []


In [11]:
## COLLECTING LIST OF LEGISLATURES  (I didnt turn these into functions because each page is too different)

for session in legislatures:
    name = session.find_all('td')[1].text.strip('\n ').rstrip('\n ')
    url = session.find('a', href=True)['href'].strip('//')
    sessionTracker.loc[len(sessionTracker.index)] = [name, url]
sessionTracker.head()
        ## TODO We are missing the 92nd legislature, it has a different layout, should manualy add to 
      ## session tracker or try to programaticly fix (only current year has this layout)? answer depends on use
      ## if i was trying to just fill the data lake, instead of just analyzing bill text, more time could
      ## be spent fixing?
newRow = pd.DataFrame({'LegislatureName':'92nd Legislature', 'LegislatureUrl':'www.revisor.mn.gov/laws/92.0'},index=[0])
sessionTracker = pd.concat([newRow, sessionTracker]).reset_index(drop = True)
sessionTracker.head()

Unnamed: 0,LegislatureName,LegislatureUrl
0,92nd Legislature,www.revisor.mn.gov/laws/92.0
1,91st Legislature,www.revisor.mn.gov/laws/91.0
2,89th Legislature,www.revisor.mn.gov/laws/89.0
3,87th Legislature,www.revisor.mn.gov/laws/87.0
4,85th Legislature,www.revisor.mn.gov/laws/85.0


In [12]:
# Collecting List of Sessions in a Legislature

iterator = 0         ## to limit requests here replace <5 with < len(sessionTracker.index) for complete scrape
while iterator < 15:
# while iterator < len(sessionTracker.index):
    sessionName = sessionTracker.loc[iterator]['LegislatureName']
    url = sessionTracker.loc[iterator]['LegislatureUrl']

    r = htmlRequester.get('http://' + url)
    soup = bs(r.text, 'html.parser')
    sessions = soup.find_all('p', 'p_session')
    for session in sessions:
        yearlySession = session.find_all('a')[0].text.replace('\n', '').strip(' ').rstrip(' ')

        sessionYear = yearlySession[0:4]
        sessionType = yearlySession[6:].strip(' ')
        sessionUrl = sessions[0].find_all('a', href=True)[0]['href'].strip('//')
        yearlysessionTracker.loc[len(yearlysessionTracker.index)] = [sessionName, sessionYear, sessionType, sessionUrl]
    iterator += 1

In [13]:
yearlysessionTracker.head(25
                         )

Unnamed: 0,LegislatureName,sessionYear,sessionType,sessionUrl
0,92nd Legislature,2022,Regular Session,www.revisor.mn.gov/laws/2022/0
1,92nd Legislature,2021,Regular Session,www.revisor.mn.gov/laws/2022/0
2,92nd Legislature,2021,1st Special Session,www.revisor.mn.gov/laws/2022/0
3,91st Legislature,2020,Regular Session,www.revisor.mn.gov/laws/2020/0
4,91st Legislature,2020,1st Special Session,www.revisor.mn.gov/laws/2020/0
5,91st Legislature,2020,2nd Special Session,www.revisor.mn.gov/laws/2020/0
6,91st Legislature,2020,3rd Special Session,www.revisor.mn.gov/laws/2020/0
7,91st Legislature,2020,4th Special Session,www.revisor.mn.gov/laws/2020/0
8,91st Legislature,2020,5th Special Session,www.revisor.mn.gov/laws/2020/0
9,91st Legislature,2020,6th Special Session,www.revisor.mn.gov/laws/2020/0


In [14]:
billTracker = pd.DataFrame(data=None, index=None, columns= (['sessionUrl', 'chapter',  'chapterUrl', 'bill', 'billUrl', 'text', 'PresentmentDate', 'scrapeComplete']))
billTracker.head()

Unnamed: 0,sessionUrl,chapter,chapterUrl,bill,billUrl,text,PresentmentDate,scrapeComplete


In [15]:
# Collecting List Bills in  a session
iterator = 0 ## to limit requests here replace <5 with < len(yearlysessionTracker.index) for complete scrape
while iterator < 15:
# while iterator < len(yearlysessionTracker.index):

    sessionUrl = yearlysessionTracker.loc[iterator]['sessionUrl']
    r = htmlRequester.get('http://' + sessionUrl)
    soup = bs(r.text, 'html.parser')
    chapters = soup.find_all('tr')
    try:
        for chapter in chapters[1:]:
            chapterUrl = chapter.find_all('a',href=True)[0]['href'].strip('//')
            chapterName =chapter.find_all('a',href=True)[0].text[7:]
            BillUrl = 'revisor.mn.gov/' + chapter.find_all('a',href=True)[1]['href'].strip('//')
            BillName = chapter.find_all('a',href=True)[1].text
            PresentmentDate = chapter.find_all('td')[2].text
            text = ""
            billTracker.loc[len(billTracker.index)] = [sessionUrl, chapterName, chapterUrl, BillName, BillUrl, text, PresentmentDate, False]
    except:
        billTracker.loc[len(billTracker.index)] = [sessionUrl, chapterName, chapterUrl, BillName, BillUrl, text, PresentmentDate, 'ERROR']
    iterator += 1
## TODO ASK DOMAIN EXPERT ABOUT CHAPTERS,for now we just continue towards bill text

In [16]:
searchDF(billTracker, 'scrapeComplete', 'Error')


Unnamed: 0,sessionUrl,chapter,chapterUrl,bill,billUrl,text,PresentmentDate,scrapeComplete


In [17]:
billTracker.head()

Unnamed: 0,sessionUrl,chapter,chapterUrl,bill,billUrl,text,PresentmentDate,scrapeComplete
0,www.revisor.mn.gov/laws/2022/0,32,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,HF1203,revisor.mn.gov/bills/bill.php?b=house&f=hf1203...,,"February 3, 2022",False
1,www.revisor.mn.gov/laws/2022/0,33,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,SF2774,revisor.mn.gov/bills/bill.php?b=senate&f=sf277...,,"March 3, 2022",False
2,www.revisor.mn.gov/laws/2022/0,34,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,SF2957,revisor.mn.gov/bills/bill.php?b=senate&f=sf295...,,"March 3, 2022",False
3,www.revisor.mn.gov/laws/2022/0,35,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,SF1782,revisor.mn.gov/bills/bill.php?b=senate&f=sf178...,,"March 3, 2022",False
4,www.revisor.mn.gov/laws/2022/0,36,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,HF2454,revisor.mn.gov/bills/bill.php?b=house&f=hf2454...,,"March 10, 2022",False


In [18]:
## Finally we made it down to the bill page, which has so much good info on it.  
## for today, we are just going to get the bill text

In [19]:
iterator = 0 ## to limit requests here replace <5 with < len(billTracker.index) for complete scrape
while iterator < 5:
# while iterator < len(billTracker.index):
    billUrl = billTracker.loc[iterator]['billUrl']
    print(billUrl)
    
### 
    r = htmlRequester.get('http://' + billUrl)
    soup = bs(r.text, 'html.parser')
    billcard = soup.find_all(class_="card-body")
    billtextUrl = 'http://revisor.mn.gov/bills/' + billcard[0].find('a')['href'].strip('/bills')
    print(billtextUrl)

    r = htmlRequester.get(billtextUrl)
    soup = bs(r.text, 'html.parser')
    billText = soup.find(id='document').text.replace('\n', '')
    billTracker.iat[iterator, 5] = billText
    billTracker.iat[iterator, -1] = True

###
    
    
    iterator += 1


revisor.mn.gov/bills/bill.php?b=house&f=hf1203&ssn=0&y=2022
http://revisor.mn.gov/bills/text.php?number=HF1203&type=bill&version=2&session=ls92&session_year=2021&session_number=0
revisor.mn.gov/bills/bill.php?b=senate&f=sf2774&ssn=0&y=2022
http://revisor.mn.gov/bills/text.php?number=SF2774&version=latest&session=ls92&session_year=2022&session_number=0
revisor.mn.gov/bills/bill.php?b=senate&f=sf2957&ssn=0&y=2022
http://revisor.mn.gov/bills/text.php?number=SF2957&version=latest&session=ls92&session_year=2022&session_number=0
revisor.mn.gov/bills/bill.php?b=senate&f=sf1782&ssn=0&y=2022
http://revisor.mn.gov/bills/text.php?number=SF1782&version=latest&session=ls92&session_year=2021&session_number=0
revisor.mn.gov/bills/bill.php?b=house&f=hf2454&ssn=0&y=2022
http://revisor.mn.gov/bills/text.php?number=HF2454&type=bill&version=0&session=ls92&session_year=2021&session_number=0


In [20]:
print(billTracker.head(1)['text'][0])
billTracker.head(1)




Unnamed: 0,sessionUrl,chapter,chapterUrl,bill,billUrl,text,PresentmentDate,scrapeComplete
0,www.revisor.mn.gov/laws/2022/0,32,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,HF1203,revisor.mn.gov/bills/bill.php?b=house&f=hf1203...,A bill for an act relating to labor and indus...,"February 3, 2022",True


In [21]:
sessionTracker.merge(yearlysessionTracker.merge(billTracker))

Unnamed: 0,LegislatureName,LegislatureUrl,sessionYear,sessionType,sessionUrl,chapter,chapterUrl,bill,billUrl,text,PresentmentDate,scrapeComplete
0,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022,Regular Session,www.revisor.mn.gov/laws/2022/0,32,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,HF1203,revisor.mn.gov/bills/bill.php?b=house&f=hf1203...,A bill for an act relating to labor and indus...,"February 3, 2022",True
1,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022,Regular Session,www.revisor.mn.gov/laws/2022/0,33,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,SF2774,revisor.mn.gov/bills/bill.php?b=senate&f=sf277...,A bill for an act relating to human services;...,"March 3, 2022",True
2,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022,Regular Session,www.revisor.mn.gov/laws/2022/0,34,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,SF2957,revisor.mn.gov/bills/bill.php?b=senate&f=sf295...,A bill for an act relating to retirement; Min...,"March 3, 2022",True
3,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022,Regular Session,www.revisor.mn.gov/laws/2022/0,35,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,SF1782,revisor.mn.gov/bills/bill.php?b=senate&f=sf178...,A bill for an act relating to local governmen...,"March 3, 2022",True
4,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022,Regular Session,www.revisor.mn.gov/laws/2022/0,36,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,HF2454,revisor.mn.gov/bills/bill.php?b=house&f=hf2454...,A bill for an act relating to local governmen...,"March 10, 2022",True
...,...,...,...,...,...,...,...,...,...,...,...,...
6561,89th Legislature,www.revisor.mn.gov/laws/89.0,2015,1st Special Session,www.revisor.mn.gov/laws/2016/0,185,www.revisor.mn.gov/laws/2016/0/Session+Law/Cha...,SF1440,revisor.mn.gov/bills/bill.php?b=senate&f=sf144...,,"May 24, 2016",False
6562,89th Legislature,www.revisor.mn.gov/laws/89.0,2015,1st Special Session,www.revisor.mn.gov/laws/2016/0,186,www.revisor.mn.gov/laws/2016/0/Session+Law/Cha...,SF2963,revisor.mn.gov/bills/bill.php?b=senate&f=sf296...,,"May 24, 2016",False
6563,89th Legislature,www.revisor.mn.gov/laws/89.0,2015,1st Special Session,www.revisor.mn.gov/laws/2016/0,187,www.revisor.mn.gov/laws/2016/0/Session+Law/Cha...,HF3980,revisor.mn.gov/bills/bill.php?b=house&f=hf3980...,,"May 24, 2016",False
6564,89th Legislature,www.revisor.mn.gov/laws/89.0,2015,1st Special Session,www.revisor.mn.gov/laws/2016/0,188,www.revisor.mn.gov/laws/2016/0/Session+Law/Cha...,HF848,revisor.mn.gov/bills/bill.php?b=house&f=hf848&...,,"May 24, 2016",False


In [22]:
sessionTracker.head(1)

Unnamed: 0,LegislatureName,LegislatureUrl
0,92nd Legislature,www.revisor.mn.gov/laws/92.0


In [23]:
yearlysessionTracker.head(1)

Unnamed: 0,LegislatureName,sessionYear,sessionType,sessionUrl
0,92nd Legislature,2022,Regular Session,www.revisor.mn.gov/laws/2022/0


In [24]:
billTracker.head(1)

Unnamed: 0,sessionUrl,chapter,chapterUrl,bill,billUrl,text,PresentmentDate,scrapeComplete
0,www.revisor.mn.gov/laws/2022/0,32,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,HF1203,revisor.mn.gov/bills/bill.php?b=house&f=hf1203...,A bill for an act relating to labor and indus...,"February 3, 2022",True


In [25]:
legislatureSessionCombo = sessionTracker.merge(yearlysessionTracker, how='outer')
legislatureSessionCombo.head(10)  

Unnamed: 0,LegislatureName,LegislatureUrl,sessionYear,sessionType,sessionUrl
0,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022,Regular Session,www.revisor.mn.gov/laws/2022/0
1,92nd Legislature,www.revisor.mn.gov/laws/92.0,2021,Regular Session,www.revisor.mn.gov/laws/2022/0
2,92nd Legislature,www.revisor.mn.gov/laws/92.0,2021,1st Special Session,www.revisor.mn.gov/laws/2022/0
3,91st Legislature,www.revisor.mn.gov/laws/91.0,2020,Regular Session,www.revisor.mn.gov/laws/2020/0
4,91st Legislature,www.revisor.mn.gov/laws/91.0,2020,1st Special Session,www.revisor.mn.gov/laws/2020/0
5,91st Legislature,www.revisor.mn.gov/laws/91.0,2020,2nd Special Session,www.revisor.mn.gov/laws/2020/0
6,91st Legislature,www.revisor.mn.gov/laws/91.0,2020,3rd Special Session,www.revisor.mn.gov/laws/2020/0
7,91st Legislature,www.revisor.mn.gov/laws/91.0,2020,4th Special Session,www.revisor.mn.gov/laws/2020/0
8,91st Legislature,www.revisor.mn.gov/laws/91.0,2020,5th Special Session,www.revisor.mn.gov/laws/2020/0
9,91st Legislature,www.revisor.mn.gov/laws/91.0,2020,6th Special Session,www.revisor.mn.gov/laws/2020/0


In [26]:
legislatureSessionCombo.tail(10)  ## At this point i realized that the territoral legislatures were broken somehow
# Examining the page showed a different style, fixing not needed at this point

Unnamed: 0,LegislatureName,LegislatureUrl,sessionYear,sessionType,sessionUrl
73,11th Legislature,www.revisor.mn.gov/laws/11.0,,,
74,9th Legislature,www.revisor.mn.gov/laws/9.0,,,
75,7th Legislature,www.revisor.mn.gov/laws/7.0,,,
76,5th Legislature,www.revisor.mn.gov/laws/5.0,,,
77,3rd Legislature,www.revisor.mn.gov/laws/3.0,,,
78,1st Legislature,www.revisor.mn.gov/laws/1.0,,,
79,7th Territorial Legislature,www.revisor.mn.gov/laws/0.7,,,
80,5th Territorial Legislature,www.revisor.mn.gov/laws/0.5,,,
81,3rd Territorial Legislature,www.revisor.mn.gov/laws/0.3,,,
82,1st Territorial Legislature,www.revisor.mn.gov/laws/0.1,,,


In [27]:
finalDF = legislatureSessionCombo.merge(billTracker, how='outer', on='sessionUrl')

In [28]:
finalDF.head(10)

Unnamed: 0,LegislatureName,LegislatureUrl,sessionYear,sessionType,sessionUrl,chapter,chapterUrl,bill,billUrl,text,PresentmentDate,scrapeComplete
0,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022,Regular Session,www.revisor.mn.gov/laws/2022/0,32,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,HF1203,revisor.mn.gov/bills/bill.php?b=house&f=hf1203...,A bill for an act relating to labor and indus...,"February 3, 2022",True
1,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022,Regular Session,www.revisor.mn.gov/laws/2022/0,33,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,SF2774,revisor.mn.gov/bills/bill.php?b=senate&f=sf277...,A bill for an act relating to human services;...,"March 3, 2022",True
2,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022,Regular Session,www.revisor.mn.gov/laws/2022/0,34,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,SF2957,revisor.mn.gov/bills/bill.php?b=senate&f=sf295...,A bill for an act relating to retirement; Min...,"March 3, 2022",True
3,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022,Regular Session,www.revisor.mn.gov/laws/2022/0,35,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,SF1782,revisor.mn.gov/bills/bill.php?b=senate&f=sf178...,A bill for an act relating to local governmen...,"March 3, 2022",True
4,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022,Regular Session,www.revisor.mn.gov/laws/2022/0,36,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,HF2454,revisor.mn.gov/bills/bill.php?b=house&f=hf2454...,A bill for an act relating to local governmen...,"March 10, 2022",True
5,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022,Regular Session,www.revisor.mn.gov/laws/2022/0,37,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,HF2841,revisor.mn.gov/bills/bill.php?b=house&f=hf2841...,,"March 10, 2022",False
6,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022,Regular Session,www.revisor.mn.gov/laws/2022/0,38,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,HF3175,revisor.mn.gov/bills/bill.php?b=house&f=hf3175...,,"March 17, 2022",False
7,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022,Regular Session,www.revisor.mn.gov/laws/2022/0,39,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,HF3035,revisor.mn.gov/bills/bill.php?b=house&f=hf3035...,,"March 24, 2022",False
8,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022,Regular Session,www.revisor.mn.gov/laws/2022/0,40,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,SF2876,revisor.mn.gov/bills/bill.php?b=senate&f=sf287...,,"March 24, 2022",False
9,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022,Regular Session,www.revisor.mn.gov/laws/2022/0,41,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,HF2875,revisor.mn.gov/bills/bill.php?b=house&f=hf2875...,,"March 24, 2022",False


In [29]:
finalDF.to_csv('mnLaws.csv')

In [30]:
searchDF(finalDF, 'text', 'transportation')

Unnamed: 0,LegislatureName,LegislatureUrl,sessionYear,sessionType,sessionUrl,chapter,chapterUrl,bill,billUrl,text,PresentmentDate,scrapeComplete
1,92nd Legislature,www.revisor.mn.gov/laws/92.0,2022,Regular Session,www.revisor.mn.gov/laws/2022/0,33,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,SF2774,revisor.mn.gov/bills/bill.php?b=senate&f=sf277...,A bill for an act relating to human services;...,"March 3, 2022",True
205,92nd Legislature,www.revisor.mn.gov/laws/92.0,2021,Regular Session,www.revisor.mn.gov/laws/2022/0,33,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,SF2774,revisor.mn.gov/bills/bill.php?b=senate&f=sf277...,A bill for an act relating to human services;...,"March 3, 2022",True
409,92nd Legislature,www.revisor.mn.gov/laws/92.0,2021,1st Special Session,www.revisor.mn.gov/laws/2022/0,33,www.revisor.mn.gov/laws/2022/0/Session+Law/Cha...,SF2774,revisor.mn.gov/bills/bill.php?b=senate&f=sf277...,A bill for an act relating to human services;...,"March 3, 2022",True


In [31]:
searchDF(finalDF, 'text', 'eagle')  ## 92nd Legislature, 2021 1st Special Session
                                    ## from searchbar on revisor.gov
                                    ## couldnt find because query in CHAPTER text,
                                    ## not bill text, TODO WHAT IS CHAPTER

Unnamed: 0,LegislatureName,LegislatureUrl,sessionYear,sessionType,sessionUrl,chapter,chapterUrl,bill,billUrl,text,PresentmentDate,scrapeComplete


In [32]:
# Thank you for reading! This scraper is not totaly complete,
# I feel comfortable doing basic visualizations, but perhaps that is a different role i can grow into