# Get Congress Members

In [62]:
import requests as rq
from bs4 import BeautifulSoup
import pandas as pd
import os
from itertools import groupby
import numpy as np

In [68]:
url2 = 'https://en.wikipedia.org/wiki/List_of_current_United_States_senators'
response2 = rq.get(url2)

soup2 = BeautifulSoup(response2.content,'html.parser')

tables = soup2.find_all('table')
party_state = pd.read_html(str(tables[5]))
party_state = pd.DataFrame(party_state[0])

split_names = party_state['Senator'].str.split(expand=True)
party_state['First_Name'] = split_names[0]

def get_last_name(row):
    if pd.isna(row[2]):
        return row[1]
    else:
        return f'{row[1]} {row[2]}'

party_state['Last_Name'] = split_names.apply(get_last_name, axis=1)


## Get Senate Members

In [70]:
url2 = 'https://www.senate.gov/general/committee_assignments/assignments.htm'
response2 = rq.get(url2)

soup2 = BeautifulSoup(response2.content,'html.parser')

gov_data = soup2.select('a')

actual_titles = []
for title in gov_data:
    if title.parent.name == 'div' or title.parent.name == 'strong':
        actual_titles.append(title.text.strip())


In [71]:
arr = np.array(actual_titles)
idx = np.where(arr == 'Back to top')[0]
subarrays = np.split(arr, idx+1)

result = [subarray.tolist() for subarray in subarrays if len(subarray) > 0]

filtered_result = [[item for item in sublist if item != '' and item != 'Back to top'] for sublist in result]

senators_committees = pd.DataFrame(filtered_result).rename(columns={0:'senator',1:'committee_1',2:'committee_2',3:'committee_3',4:'committee_4',5:'committee_5',6:'committee_6',7:'committee_7',8:'committee_8'})

In [72]:
senators_committees

Unnamed: 0,senator,committee_1,committee_2,committee_3,committee_4,committee_5,committee_6,committee_7,committee_8
0,"Baldwin, Tammy",Committee on Appropriations,"Committee on Commerce, Science, and Transporta...","Committee on Health, Education, Labor, and Pen...",,,,,
1,"Barrasso, John",Committee on Energy and Natural Resources,Committee on Finance,Committee on Foreign Relations,,,,,
2,"Bennet, Michael F.","Committee on Agriculture, Nutrition, and Forestry",Committee on Finance,Committee on Rules and Administration,Select Committee on Intelligence,,,,
3,"Blackburn, Marsha","Committee on Commerce, Science, and Transporta...",Committee on Finance,Committee on the Judiciary,Committee on Veterans' Affairs,,,,
4,"Blumenthal, Richard",Commission on Security and Cooperation in Europe,Committee on Armed Services,Committee on Homeland Security and Governmenta...,Committee on the Judiciary,Committee on Veterans' Affairs,Special Committee on Aging,United States Senate Caucus on International N...,
...,...,...,...,...,...,...,...,...,...
96,"Whitehouse, Sheldon",Commission on Security and Cooperation in Europe,Committee on Environment and Public Works,Committee on Finance,Committee on the Budget,Committee on the Judiciary,United States Senate Caucus on International N...,,
97,"Wicker, Roger F.",Commission on Security and Cooperation in Europe,Committee on Armed Services,"Committee on Commerce, Science, and Transporta...",Committee on Environment and Public Works,Committee on Rules and Administration,Select Committee on Intelligence,,
98,"Wyden, Ron",Committee on Energy and Natural Resources,Committee on Finance,Committee on the Budget,Joint Committee on Taxation,Select Committee on Intelligence,,,
99,"Young, Todd","Committee on Commerce, Science, and Transporta...",Committee on Finance,Committee on Foreign Relations,Committee on Small Business and Entrepreneurship,,,,


In [73]:
senators_committees['first_name'] = senators_committees['senator'].str.split(', ').str[1]
senators_committees['last_name'] = senators_committees['senator'].str.split(', ').str[0]
senators_committees['full_name'] = senators_committees['first_name'] + ' ' + senators_committees['last_name']

In [74]:
senators_committees['first_name'] = senators_committees['first_name'].str.split(' ').str[0]

In [75]:
senators_committees

Unnamed: 0,senator,committee_1,committee_2,committee_3,committee_4,committee_5,committee_6,committee_7,committee_8,first_name,last_name,full_name
0,"Baldwin, Tammy",Committee on Appropriations,"Committee on Commerce, Science, and Transporta...","Committee on Health, Education, Labor, and Pen...",,,,,,Tammy,Baldwin,Tammy Baldwin
1,"Barrasso, John",Committee on Energy and Natural Resources,Committee on Finance,Committee on Foreign Relations,,,,,,John,Barrasso,John Barrasso
2,"Bennet, Michael F.","Committee on Agriculture, Nutrition, and Forestry",Committee on Finance,Committee on Rules and Administration,Select Committee on Intelligence,,,,,Michael,Bennet,Michael F. Bennet
3,"Blackburn, Marsha","Committee on Commerce, Science, and Transporta...",Committee on Finance,Committee on the Judiciary,Committee on Veterans' Affairs,,,,,Marsha,Blackburn,Marsha Blackburn
4,"Blumenthal, Richard",Commission on Security and Cooperation in Europe,Committee on Armed Services,Committee on Homeland Security and Governmenta...,Committee on the Judiciary,Committee on Veterans' Affairs,Special Committee on Aging,United States Senate Caucus on International N...,,Richard,Blumenthal,Richard Blumenthal
...,...,...,...,...,...,...,...,...,...,...,...,...
96,"Whitehouse, Sheldon",Commission on Security and Cooperation in Europe,Committee on Environment and Public Works,Committee on Finance,Committee on the Budget,Committee on the Judiciary,United States Senate Caucus on International N...,,,Sheldon,Whitehouse,Sheldon Whitehouse
97,"Wicker, Roger F.",Commission on Security and Cooperation in Europe,Committee on Armed Services,"Committee on Commerce, Science, and Transporta...",Committee on Environment and Public Works,Committee on Rules and Administration,Select Committee on Intelligence,,,Roger,Wicker,Roger F. Wicker
98,"Wyden, Ron",Committee on Energy and Natural Resources,Committee on Finance,Committee on the Budget,Joint Committee on Taxation,Select Committee on Intelligence,,,,Ron,Wyden,Ron Wyden
99,"Young, Todd","Committee on Commerce, Science, and Transporta...",Committee on Finance,Committee on Foreign Relations,Committee on Small Business and Entrepreneurship,,,,,Todd,Young,Todd Young


In [76]:
senators_committees_merged = senators_committees.merge(party_state, left_on=['last_name','first_name'], right_on=['Last_Name','First_Name'], validate='1:1')
senators_committees_merged = senators_committees_merged[['senator', 'first_name', 'last_name', 'full_name', 'committee_1','committee_2','committee_3','committee_4','committee_5','committee_6','committee_7','committee_8','Party.1','State']]

In [77]:
senators_committees_merged

Unnamed: 0,senator,first_name,last_name,full_name,committee_1,committee_2,committee_3,committee_4,committee_5,committee_6,committee_7,committee_8,Party.1,State
0,"Baldwin, Tammy",Tammy,Baldwin,Tammy Baldwin,Committee on Appropriations,"Committee on Commerce, Science, and Transporta...","Committee on Health, Education, Labor, and Pen...",,,,,,Democratic,Wisconsin
1,"Barrasso, John",John,Barrasso,John Barrasso,Committee on Energy and Natural Resources,Committee on Finance,Committee on Foreign Relations,,,,,,Republican,Wyoming
2,"Bennet, Michael F.",Michael,Bennet,Michael F. Bennet,"Committee on Agriculture, Nutrition, and Forestry",Committee on Finance,Committee on Rules and Administration,Select Committee on Intelligence,,,,,Democratic,Colorado
3,"Blackburn, Marsha",Marsha,Blackburn,Marsha Blackburn,"Committee on Commerce, Science, and Transporta...",Committee on Finance,Committee on the Judiciary,Committee on Veterans' Affairs,,,,,Republican,Tennessee
4,"Blumenthal, Richard",Richard,Blumenthal,Richard Blumenthal,Commission on Security and Cooperation in Europe,Committee on Armed Services,Committee on Homeland Security and Governmenta...,Committee on the Judiciary,Committee on Veterans' Affairs,Special Committee on Aging,United States Senate Caucus on International N...,,Democratic,Connecticut
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80,"Welch, Peter",Peter,Welch,Peter Welch,"Committee on Agriculture, Nutrition, and Forestry","Committee on Commerce, Science, and Transporta...",Committee on Rules and Administration,Committee on the Judiciary,Joint Economic Committee,,,,Democratic,Vermont
81,"Whitehouse, Sheldon",Sheldon,Whitehouse,Sheldon Whitehouse,Commission on Security and Cooperation in Europe,Committee on Environment and Public Works,Committee on Finance,Committee on the Budget,Committee on the Judiciary,United States Senate Caucus on International N...,,,Democratic,Rhode Island
82,"Wicker, Roger F.",Roger,Wicker,Roger F. Wicker,Commission on Security and Cooperation in Europe,Committee on Armed Services,"Committee on Commerce, Science, and Transporta...",Committee on Environment and Public Works,Committee on Rules and Administration,Select Committee on Intelligence,,,Republican,Mississippi
83,"Wyden, Ron",Ron,Wyden,Ron Wyden,Committee on Energy and Natural Resources,Committee on Finance,Committee on the Budget,Joint Committee on Taxation,Select Committee on Intelligence,,,,Democratic,Oregon


In [None]:
DATA_PATH = '../../data'

if not os.path.exists(DATA_PATH+'/inputs'):
    os.makedirs(DATA_PATH+'/inputs')

senators_committees_csv = '/inputs/senators_committees.csv'

if not os.path.exists(senators_committees_csv):
    senators_committees.to_csv(DATA_PATH+senators_committees_csv,index=False)

## Get gsectors

In [None]:
# url3 = 'https://en.wikipedia.org/wiki/Global_Industry_Classification_Standard'
# response3 = rq.get(url3)

# soup3 = BeautifulSoup(response3.content,'html.parser')

# gsect = soup3.select('table')
# gsect = pd.read_html(str(gsect))
# gsect = pd.DataFrame(gsect[0])

In [None]:
# gsect