# Scraping New York Lobbying Data on TREES Act

In [199]:
from bs4 import BeautifulSoup

In [200]:
from playwright.async_api import async_playwright

In [201]:
import os

playwright = await async_playwright().start()
browser = await playwright.chromium.launch(headless=False)

In [202]:
page = await browser.new_page()

In [203]:
await page.goto("https://reports.ethics.ny.gov/publicquery/")

<Response url='https://reports.ethics.ny.gov/publicquery/' request=<Request url='https://reports.ethics.ny.gov/publicquery/' method='GET'>>

In [204]:
await page.click("#btnSearchLobbyingFocuses")

In [205]:
await page.click("#btnStateActivities")

In [206]:
await page.click("#sbilid")

In [207]:
await page.fill("#tbSearchInput", "S8898")

In [208]:
await page.click("#btnSearch")

In [170]:
await page.wait_for_selector('select[name="gvResultsLob_length"]')

<JSHandle preview=JSHandle@node>

In [209]:
await page.select_option('select[name="gvResultsLob_length"]', value="99999999")

['99999999']

In [210]:
html = await page.content()

In [211]:
soup_doc = BeautifulSoup(html)

In [212]:
filings = soup_doc.find(id='gvResultsLob').find_all('tr')[1:]

In [213]:
len(filings)

25

In [189]:
filings_list = []

for filing in filings: 
    filing_type = filing.find_all('td')[1].text
    filing_period = filing.find_all('td')[2].text
    principal_lobbyist = filing.find_all('td')[6].find('a').text
    link = filing.find_all('td')[9].find('a')['href']
    url = f'https://reports.ethics.ny.gov{link}'
    await page.goto(url)

    lobby_dict = {}
    if filing_type == 'BI-MONTHLY':
        await page.click('a[data-toggle="collapse"][href="#bimotable"]')
        await page.click('a[href^="/publicquery/ViewFiling/BIMO/"]')
        html = await page.content()
        soup_doc = BeautifulSoup(html)
        lobbyists = soup_doc.find(id='FormSummaryHeaderDetails').find(class_='form-summary-header-people').find_all('div')
        principal_lobbyist_name = lobbyists[0].div.strong.text
        principal_lobbyist_address = lobbyists[0].div.address.text
        contractual_client_name = lobbyists[1].strong.text
        contractual_client_address = lobbyists[1].address.text
        beneficial_clients_name = lobbyists[2].strong.text
        beneficial_clients_address = lobbyists[2].address.text
        
        indiv_lobbyists = soup_doc.find(class_='col-lg-10').find(class_='pad-left').tbody.find_all('tr')
        num_lobbyists = len(indiv_lobbyists)

        expenses = soup_doc.find('label', string="COMPENSATION (CURRENT PERIOD ONLY)")
        compensation = expenses.parent.span.text

        total_expenses = soup_doc.find('small', string="(total of all expense categories)").parent.parent.span.text

        bills = soup_doc.find('table', id='tbPartiesLobbied').tbody.find_all('tr')
        num_bills = len(bills)


        lobby_dict['filing_type'] = filing_type
        lobby_dict['filing_period'] = filing_period
        lobby_dict['principal_lobbyist_name'] = principal_lobbyist_name
        lobby_dict['principal_lobbyist_address'] = principal_lobbyist_address
        lobby_dict['contractual_client_name'] = contractual_client_name
        lobby_dict['contractual_client_address'] = contractual_client_address
        lobby_dict['beneficial_clients_name'] = beneficial_clients_name
        lobby_dict['beneficial_clients_address'] = beneficial_clients_address
        lobby_dict['num_lobbyists'] = num_lobbyists
        lobby_dict['compensation'] = compensation
        lobby_dict['total_expenses'] = total_expenses
        lobby_dict['num_bills'] = num_bills

        filings_list.append(lobby_dict)

        

    # if filing_type == 'CLIENT SEMI-ANNUAL AMENDMENT':
    #     await page.click('a[data-toggle="collapse"][href="#csatable"]')
    #     await page.click('a[href^="/publicquery/ViewFiling/CSA/"]')
    #     await page.click('a[href="#amendList"]')
    #     html = await page.content()
    #     soup_doc = BeautifulSoup(html)
    #     lobbyists = soup_doc.find(id='FormSummaryHeaderDetails').find(class_='form-summary-header-people').find_all('div')
    #     principal_lobbyist_name = lobbyists[0].div.strong.text
    #     principal_lobbyist_address = lobbyists[0].div.address.text
    #     contractual_client_name = lobbyists[1].strong.text
    #     contractual_client_address = lobbyists[1].address.text
    #     print(contractual_client_address)
    #     beneficial_clients_name = lobbyists[2].strong.text
    #     beneficial_clients_address = lobbyists[2].strong.next.text
        
        

    #if filing_type == 'PUBLIC CORPORATION BI-MONTHLY':




        

#filings_list

        #expenses = soup_doc.find(class_='cardbox').find_all('div', class_='pad-left')
        #print(expenses)
        #compensation = expenses.div.div.p.text
        #.span.text
        #print(compensation)

In [190]:
filings_list

[{'filing_type': 'BI-MONTHLY',
  'filing_period': 'JULY/AUGUST',
  'principal_lobbyist_name': 'ASSOCIATION OF COUNTIES AND ITS AFFILIATED ORGANIZATIONS (NYS)',
  'principal_lobbyist_address': '\n                                515 BROADWAYSUITE 402ALBANY, NY 12207\n\n                                \np.518-465-1473\n                                    x.225                                \n',
  'contractual_client_name': 'ASSOCIATION OF COUNTIES AND ITS AFFILIATED ORGANIZATIONS (NYS)',
  'contractual_client_address': '\n                                515 BROADWAYSUITE 402ALBANY, NY 12207\n\n                                \np.518-465-1473\n                                    x.225                                \n',
  'beneficial_clients_name': 'ASSOCIATION OF COUNTIES AND ITS AFFILIATED ORGANIZATIONS (NYS)',
  'beneficial_clients_address': '\n                                    515 BROADWAYSUITE 402ALBANY, NY 12207\n                                    \np.518-465-1473\n              

In [191]:
import pandas as pd

df = pd.DataFrame(filings_list)

In [192]:
df

Unnamed: 0,filing_type,filing_period,principal_lobbyist_name,principal_lobbyist_address,contractual_client_name,contractual_client_address,beneficial_clients_name,beneficial_clients_address,num_lobbyists,compensation,total_expenses,num_bills
0,BI-MONTHLY,JULY/AUGUST,ASSOCIATION OF COUNTIES AND ITS AFFILIATED ORG...,\n 515 BROADWAY...,ASSOCIATION OF COUNTIES AND ITS AFFILIATED ORG...,\n 515 BROADWAY...,ASSOCIATION OF COUNTIES AND ITS AFFILIATED ORG...,\n 515 BROA...,6,$4706,$5580,22
1,BI-MONTHLY,MARCH/APRIL,"BOLTON-ST. JOHNS, LLC",\n 146 STATE ST...,"BOLTON-ST. JOHNS, LLC",\n 146 STATE ST...,"FRIENDS OF THE EARTH (ACTION), INC.",\n 1101 15T...,43,$10000,$150,2
2,BI-MONTHLY,JULY/AUGUST,"CHARTER COMMUNICATIONS OPERATING, LLC",\n 120 EAST 23R...,"CHARTER COMMUNICATIONS OPERATING, LLC",\n 120 EAST 23R...,"CHARTER COMMUNICATIONS OPERATING, LLC",\n 120 EAST...,7,$16636,$4159,18
3,BI-MONTHLY,JULY/AUGUST,"DICKINSON & AVELLA, PLLC.",\n 111 WASHINGT...,"DICKINSON & AVELLA, PLLC.",\n 111 WASHINGT...,VERIZON,\n 140 WEST...,4,$16666,$193,12
4,BI-MONTHLY,SEPTEMBER/OCTOBER,"ENVIRONMENTAL ADVOCATES ACTION, INC",\n 353 HAMILTON...,"ENVIRONMENTAL ADVOCATES ACTION, INC",\n 353 HAMILTON...,"ENVIRONMENTAL ADVOCATES ACTION, INC",\n 353 HAMI...,2,$2066,$500,73
5,BI-MONTHLY,NOVEMBER/DECEMBER,"ENVIRONMENTAL ADVOCATES ACTION, INC",\n 353 HAMILTON...,"ENVIRONMENTAL ADVOCATES ACTION, INC",\n 353 HAMILTON...,"ENVIRONMENTAL ADVOCATES ACTION, INC",\n 353 HAMI...,2,$2066,$500,73
6,BI-MONTHLY,MARCH/APRIL,"ENVIRONMENTAL ADVOCATES NY, INC",\n 353 HAMILTON...,"ENVIRONMENTAL ADVOCATES NY, INC",\n 353 HAMILTON...,"ENVIRONMENTAL ADVOCATES NY, INC",\n 353 HAMI...,8,$6886,$1496,105
7,BI-MONTHLY,MAY/JUNE,"ENVIRONMENTAL ADVOCATES NY, INC",\n 353 HAMILTON...,"ENVIRONMENTAL ADVOCATES NY, INC",\n 353 HAMILTON...,"ENVIRONMENTAL ADVOCATES NY, INC",\n 353 HAMI...,8,$6886,$1496,105
8,BI-MONTHLY,JULY/AUGUST,"ENVIRONMENTAL ADVOCATES NY, INC",\n 353 HAMILTON...,"ENVIRONMENTAL ADVOCATES NY, INC",\n 353 HAMILTON...,"ENVIRONMENTAL ADVOCATES NY, INC",\n 353 HAMI...,8,$6886,$1496,105
9,BI-MONTHLY,SEPTEMBER/OCTOBER,"ENVIRONMENTAL ADVOCATES NY, INC",\n 353 HAMILTON...,"ENVIRONMENTAL ADVOCATES NY, INC",\n 353 HAMILTON...,"ENVIRONMENTAL ADVOCATES NY, INC",\n 353 HAMI...,8,$6886,$1496,105


In [None]:
0