## 6.1: Importing necessary packages

In [1]:
import json
import requests
from bs4 import BeautifulSoup
import re
import string
import pandas as pd
import numpy as np



## 6.2: Importing all .csv files as dataframes

In [2]:
hr_bills = pd.read_csv('hr_bills.csv')
s_bills = pd.read_csv('senate_bills.csv')
sjres = pd.read_csv('sjres.csv')
hjres = pd.read_csv('hjres.csv')

## 6.3: Adding column, 'type' for each dataframe specifying the corresponding type of legislation

In [6]:
# HR bills are house resolutions, abbreviated as hr
hr_bills['type'] = 'hr'

# S bills are senate bills, abbreviated as s
s_bills['type'] = 's'

# Hjres bills are house joint resolutions, abbreviated as hjres
hjres['type'] = 'hjres'

# Sjres bills are senate joint resolutions, abbreviated as sjres
sjres['type'] = 'sjres'

In [8]:
# Checking to make sure it worked
hr_bills.head()

Unnamed: 0,congress,bill_number,url,word_count,type
0,104,248,https://www.congress.gov/bill/104th-congress/h...,2025.0,hr
1,104,255,https://www.congress.gov/bill/104th-congress/h...,175.0,hr
2,104,325,https://www.congress.gov/bill/104th-congress/h...,345.0,hr
3,104,394,https://www.congress.gov/bill/104th-congress/h...,611.0,hr
4,104,395,https://www.congress.gov/bill/104th-congress/h...,215.0,hr


In [9]:
s_bills.head()

Unnamed: 0,congress,bill_number,url,word_count,type
0,104,1,https://www.congress.gov/bill/104th-congress/s...,10498.0,s
1,104,2,https://www.congress.gov/bill/104th-congress/s...,19733.0,s
2,104,4,https://www.congress.gov/bill/104th-congress/s...,6164.0,s
3,104,39,https://www.congress.gov/bill/104th-congress/s...,29252.0,s
4,104,178,https://www.congress.gov/bill/104th-congress/s...,184.0,s


In [10]:
hjres.head()

Unnamed: 0,congress,bill_number,url,word_count,type
0,104,69,https://www.congress.gov/bill/104th-congress/h...,168.0,hjres
1,104,78,https://www.congress.gov/bill/104th-congress/h...,414.0,hjres
2,104,108,https://www.congress.gov/bill/104th-congress/h...,2330.0,hjres
3,104,110,https://www.congress.gov/bill/104th-congress/h...,192.0,hjres
4,104,111,https://www.congress.gov/bill/104th-congress/h...,178.0,hjres


In [11]:
sjres.head()

Unnamed: 0,congress,bill_number,url,word_count,type
0,104,20,https://www.congress.gov/bill/104th-congress/s...,2618.0,sjres
1,104,38,https://www.congress.gov/bill/104th-congress/s...,1345.0,sjres
2,104,53,https://www.congress.gov/bill/104th-congress/s...,297.0,sjres
3,104,64,https://www.congress.gov/bill/104th-congress/s...,452.0,sjres
4,105,5,https://www.congress.gov/bill/105th-congress/s...,350.0,sjres


## 6.4: Concatenating all four dataframes

In [13]:
legislation = [hr_bills, s_bills, hjres, sjres]
all_leg = pd.concat(legislation)

In [14]:
all_leg.head()

Unnamed: 0,congress,bill_number,url,word_count,type
0,104,248,https://www.congress.gov/bill/104th-congress/h...,2025.0,hr
1,104,255,https://www.congress.gov/bill/104th-congress/h...,175.0,hr
2,104,325,https://www.congress.gov/bill/104th-congress/h...,345.0,hr
3,104,394,https://www.congress.gov/bill/104th-congress/h...,611.0,hr
4,104,395,https://www.congress.gov/bill/104th-congress/h...,215.0,hr


In [15]:
# Checking length of new all_leg dataframe
len(all_leg)

5164

In [19]:
# Making sure it added all the dataframes properly
total = len(hr_bills) + len(s_bills) + len(hjres) + len(sjres)
total

5164

## 6.4: Sorting new all_leg dataframe by word count

In [23]:
longest_bills = all_leg.sort_values(by=['word_count'], ascending=False)
longest_bills.head(50)

Unnamed: 0,congress,bill_number,url,word_count,type
3239,116,133,https://www.congress.gov/bill/116th-congress/h...,950369.0,hr
1442,116,1790,https://www.congress.gov/bill/116th-congress/s...,478736.0,s
456,105,4328,https://www.congress.gov/bill/105th-congress/h...,429189.0,hr
1244,114,2943,https://www.congress.gov/bill/114th-congress/s...,420040.0,s
2812,114,2029,https://www.congress.gov/bill/114th-congress/h...,411428.0,hr
3037,115,1625,https://www.congress.gov/bill/115th-congress/h...,411050.0,hr
2198,111,3590,https://www.congress.gov/bill/111th-congress/h...,395627.0,hr
2223,111,4173,https://www.congress.gov/bill/111th-congress/h...,374137.0,hr
198,104,3610,https://www.congress.gov/bill/104th-congress/h...,347227.0,hr
1458,109,3,https://www.congress.gov/bill/109th-congress/h...,343979.0,hr
