# NY State Assembly and Senate

The goal of this project is to produce two csv files of member names and districts in both the NY Assembly and Senate since they aren't pubically available as spreadsheets. This information is being web scraped using beautiful soup library. These members are in office as of Feb 2023. It may be used given that the structure of the government websites doesn't change for the future. 

### NY State Assembly Website

<img src="https://i.postimg.cc/4NDv5Kyg/Screen-Shot-2023-02-23-at-4-08-43-PM.png" width="500">

In [5]:
# importing libraries
import pandas as pd
from bs4 import BeautifulSoup
import requests
import re

In [6]:
assem_url = "https://nyassembly.gov/mem/"
assem_response = requests.get(assem_url) # 200 - OK - Requested action was successfully executed

In [8]:
# html content
assem_response_text = assem_response.text
# assem_response_text

In [10]:
assem_soup = BeautifulSoup(assem_response_text, 'lxml')

In [11]:
# title of the url page
assem_soup.title

<title>Assembly Member Directory | New York State Assembly </title>

In [12]:
# finding html tags with member name and district using google chrome inspect tool
# storing each case into a list

assemblyMembers = [ member.text.strip().replace('\t',',')   for member in assem_soup.find_all('h3', {"class": "mem-name"})]


In [32]:
# example of the first instance inside the list
assemblyMembers[0:3] # first three elements

['George Alvarez,,,,,,,,,,,,,District 78',
 'Khaleel M. Anderson,,,,,,,,,,,,,District 31',
 'Joe Angelino,,,,,,,,,,,,,District 121']

In [14]:
# creating two separate lists to store members name and district
assemblyMemName = []
assemblyDistrict  = []

# for loop:
# separating member name and district into two list elements
# using split method for each instance by the multiple commas
# grabbing the first element and the last 

for member in assemblyMembers:
    record = member.split(',') 
    # first instance of record 
    # ['George Alvarez', '', '', '', '', '', '', '', '', '', '', '', '', 'District 78']
    assemblyMemName.append(record[0]) 
    assemblyDistrict.append(record[-1])

In [18]:
# populated asssembly member names
assemblyMemName[0:3] # first three elements

['George Alvarez', 'Khaleel M. Anderson', 'Joe Angelino']

In [19]:
# populated assembly districts
assemblyDistrict[0:3] # first three elements

['District 78', 'District 31', 'District 121']

In [25]:
assem_df = pd.DataFrame(zip(assemblyMemName, assemblyDistrict), columns=['memberName', 'assemblyDistrict'])
assem_df.head(2)

Unnamed: 0,memberName,assemblyDistrict
0,George Alvarez,District 78
1,Khaleel M. Anderson,District 31


In [27]:
assem_df[['r', 'district']] = assem_df['assemblyDistrict'].str.split(' ', 1, expand=True)


  assem_df[['r', 'district']] = assem_df['assemblyDistrict'].str.split(' ', 1, expand=True)


In [28]:
assem_df.head(2)

Unnamed: 0,memberName,assemblyDistrict,r,district
0,George Alvarez,District 78,District,78
1,Khaleel M. Anderson,District 31,District,31


In [29]:
assem_df.drop('r',axis=1,inplace=True)

In [30]:
assem_df.head()

Unnamed: 0,memberName,assemblyDistrict,district
0,George Alvarez,District 78,78
1,Khaleel M. Anderson,District 31,31
2,Joe Angelino,District 121,121
3,Juan Ardila,District 37,37
4,Jeffrion L. Aubry,District 35,35


In [31]:
# saving to a csv file
# assem_df.to_csv('ny_Assembly_District_Members.csv',index=False)

###  NY Senate Members

<img src="https://i.postimg.cc/cJnf8d0J/Screen-Shot-2023-02-23-at-4-22-07-PM.png" width="500">

In [None]:
senate_url = 'https://www.nysenate.gov/senators-committees'
senate_response = requests.get(senate_url) # 200 - OK - Requested action was successfully executed

In [None]:
senate_response

In [None]:
# html content
senate_response_text = senate_response.text

In [None]:
senate_soup = BeautifulSoup(senate_response_text, 'lxml')

In [None]:
senate_soup.title

In [None]:
# senate member names from url page
senateMembersName =  [ member.text for member in senate_soup.find_all('h4', {"class": "nys-senator--name"})]


In [None]:
# senate member district from url page
senateMembersDistrict =  [ district.text.strip().replace('\t','').split('\n')[-1] for district in senate_soup.find_all('span', {"class": "nys-senator--district"})]

In [None]:
sdf = pd.DataFrame(zip(senateMembersName, senateMembersDistrict), columns=['memberName', 'senateDistrict'])
sdf.head()

In [None]:
sdf[['district', 'r']] = sdf['senateDistrict'].str.split(' ', 1, expand=True)


In [None]:
sdf.drop('r',axis=1,inplace=True)

In [None]:
districtNumber = []
for i in sdf['district']:
    without_letters = re.sub(r'[a-z]', '', i.lower())
    districtNumber.append(without_letters)

In [None]:
sdf['district'] =  districtNumber

In [None]:
sdf

In [None]:
# saving to a csv file
# sdf.to_csv('ny_Senate_Members.csv',index=False)