In [3]:
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

import time

from bs4 import BeautifulSoup

#Ignore all warnings
import warnings
warnings.filterwarnings('ignore')

# Library to accessa nd work with excel
import openpyxl

def retrieve_link_list(file_path, sheet_name):
    """
    Retrieve a list of links from the second column of an Excel file, excluding the first row.

    Args:
        file_path (str): The path of the Excel file.
        sheet_name (str): The name of the sheet containing the data.

    Returns:
        list: A list of links retrieved from the second column, excluding the first row.

    Raises:
        FileNotFoundError: If the specified file path does not exist.
        openpyxl.utils.exceptions.InvalidFileException: If the specified file is not a valid Excel file.
        KeyError: If the specified sheet name does not exist in the Excel file.
    """

    # Load the workbook
    try:
        wb = openpyxl.load_workbook(file_path)
    except FileNotFoundError:
        raise FileNotFoundError(f"The file '{file_path}' does not exist.")
    except openpyxl.utils.exceptions.InvalidFileException:
        raise openpyxl.utils.exceptions.InvalidFileException(f"The file '{file_path}' is not a valid Excel file.")

    # Access the sheet
    try:
        sheet = wb[sheet_name]
    except KeyError:
        raise KeyError(f"The sheet '{sheet_name}' does not exist in the Excel file.")

    link_list = []

    # Iterate over rows and retrieve the links from the second column
    for row in sheet.iter_rows(min_row=2, values_only=True):
        proposal_link = row[1] + '/proposals'
        link_list.append(proposal_link)

    # Close the workbook
    wb.close()

    return link_list

file_path = './data_extracts/output_page_consolidated_1to8.xlsx'  # Replace with the actual file path
sheet_name = 'proposal_extract_list'  # Replace with the actual sheet name

link_list = retrieve_link_list(file_path, sheet_name)
print(link_list)


def perform_infinite_scroll_retrieve_code(link):
    """
    Open the Firefox browser, perform infinite scroll, and wait until the page loads completely.

    Args:
        link (str): The URL of the web page to scroll.

    Raises:
        TimeoutException: If the page loading times out.
    """

    # Set Firefox options
    options = Options()
    options.headless = False  # Set to True to run Firefox in headless mode

    # Create Firefox driver
    driver = webdriver.Firefox(options=options)

    try:
        # Open the web page
        driver.get(link)

        # Wait for the page to load completely
        time.sleep(5)

        # Scroll to the bottom of the page
        elements = driver.find_elements(By.CLASS_NAME,"MuiTableRow-root")
        initial_count = len(elements)
        print(initial_count)
        while True:
            last_element = elements[-1]
            driver.execute_script("arguments[0].scrollIntoView();", last_element)
            time.sleep(5)
            elements = driver.find_elements(By.CLASS_NAME,"MuiTableRow-root")
            current_count = len(elements)
            if current_count == initial_count:
                try:
                    button = driver.find_element(By.XPATH, "/html/body/div[1]/div/div/div[3]/div[2]/div[2]/div[3]/button")
                    if button.is_displayed():
                        button.click()
                        continue
                except:
                    pass
                    
                break
            initial_count = current_count

    except TimeoutException:
        print("Page loading timed out.")
    
    html_code = driver.page_source
    return BeautifulSoup(html_code, 'html.parser')

    

# Example usage
# for link in link_list[0]:
soup = perform_infinite_scroll_retrieve_code(link_list[0])



['https://messari.io/dao/aave-governance/proposals', 'https://messari.io/dao/gitcoin-governance/proposals', 'https://messari.io/dao/balancer-governance/proposals', 'https://messari.io/dao/compound-governance/proposals', 'https://messari.io/dao/synthetix-governance/proposals', 'https://messari.io/dao/uniswap-governance/proposals', 'https://messari.io/dao/arbitrum/proposals', 'https://messari.io/dao/Radicle/proposals', 'https://messari.io/dao/aragon/proposals', 'https://messari.io/dao/apecoin-governance/proposals', 'https://messari.io/dao/harvest-finance-governance/proposals', 'https://messari.io/dao/rarible-governance/proposals', 'https://messari.io/dao/audius/proposals', 'https://messari.io/dao/uma/proposals', 'https://messari.io/dao/bitdao-governance/proposals', 'https://messari.io/dao/illuvium-governance/proposals', 'https://messari.io/dao/cream-finance-governance/proposals', 'https://messari.io/dao/barnbridge/proposals', 'https://messari.io/dao/super-rare/proposals', 'https://messar

In [20]:
categories = set()
elements = soup.find_all('tr', class_="MuiTableRow-root css-93kbmv")
for element in elements:
    set_category = {element.select(".css-fv3lde")[0].text}
    categories = categories | set_category
categories

{'Active Vote',
 'Canceled',
 'Executed',
 'Failed',
 'Preliminary Discussion',
 'Queued',
 'Succeeded',
 'Upcoming Vote'}

In [33]:
try_driver = webdriver.Firefox()
try_driver.get("https://messari.io/governor/proposal/23c203bf-4282-470c-a572-f532cdc50bc1?daoSlug=aave-governance&daoTab=proposals")

button_xpath = "/html/body/div[1]/div/div/div[3]/div/div[2]/div[2]/div/div[4]/div[5]/div/div[2]/div/div[3]/button"
                
time.sleep(5)
button = try_driver.find_element("xpath", button_xpath)


# Execute JavaScript to click on the button
try_driver.execute_script("arguments[0].click();", button)
wait = WebDriverWait(try_driver, 10)

while True:
    
    load_more_button_xpath = "/html/body/div[4]/div[3]/div/div/div/button/span"
    load_more_button = wait.until(EC.visibility_of_element_located((By.XPATH, load_more_button_xpath)))
    try_driver.execute_script("arguments[0].click();", load_more_button)
    tr_items = try_driver.find_elements(By.CLASS_NAME, "MuiTableRow-root")
    print(len(tr_items))
    

print("Number of table_row_items:", len(tr_items))


25
45
45
45
45
45
45
45
45
45
45
45
45
45
45
45
45
45
45
45
45
45
45
45
45
65
65
65
65
65
65
65
65
65
65
65
65
65
65
65
65
65
65
65
65
65
85
85
85
85
85
85
85
85
85
85
85
85
85
85
85
85
85
85
85
105
105
105
105
105
105
105
105
105
105
105
105
105
105
105
105
105
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
145
145
145
145
145
145
145
145
145
145
145
145
145
145
145
145
145
145
145
145
145
145
145
145
145
145
145
145
165
165
165
165
165
165
165
165
165
165
165
165
165
165
185
185
185
185
185
185
185
185
185
185
185
185
185
205
205
205
205
205
205
205
205
205
205
205
205
225
225
225
225
225
225
225
225
225
225
225
245
245
245
245
245
245
245
245
245
245
245
245
245
265
265
265
265
265
265
265
265
265
265
265
265
265
265
265
265
265
265
265
265
265
285
285
285
285
285
285
285
285
285
305
305
305
305
305
305
305
305
305
325
325
325
325
325
325
325
345
345
345
3

TimeoutException: Message: 
Stacktrace:
RemoteError@chrome://remote/content/shared/RemoteError.sys.mjs:8:8
WebDriverError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:183:5
NoSuchElementError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:395:5
element.find/</<@chrome://remote/content/marionette/element.sys.mjs:134:16
