#

## Financial Statements

In [7]:
import os
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def get_financial_statements(company_name):
    # Initialize Chrome driver
    driver = webdriver.Chrome()

    try:
        # Open the target webpage
        url = "https://investors.sgx.com/stock-screener"
        driver.get(url)

        # Wait for the page to load and accept Cookies
        time.sleep(5)
        try:
            accept_cookies_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Accept Cookies')]"))
            )
            accept_cookies_button.click()
            print("Clicked 'Accept Cookies'")
        except:
            print("No 'Accept Cookies' button found, continuing")

        # Wait for the page to fully load
        time.sleep(5)

        # Click on the page body to ensure it is fully loaded (if necessary)
        driver.find_element(By.TAG_NAME, "body").click()
        print("Clicked on page body")
        time.sleep(2)

        # Enter the company name in the search box and search
        search_box = WebDriverWait(driver, 20).until(
            EC.element_to_be_clickable((By.XPATH, "//input[@placeholder='Search Keywords, eg: S68, 1.12']"))
        )
        search_box.send_keys(company_name)
        search_box.send_keys(Keys.ENTER)
        time.sleep(5)

        # Click on the first search result link matching the company name
        first_result = WebDriverWait(driver, 20).until(
            EC.element_to_be_clickable((By.LINK_TEXT, company_name))
        )
        first_result.click()
        print(f"Clicked on the first '{company_name}'")
        time.sleep(5)

        # Click on the "Financial Statements" tab on the company page
        financial_statements_tab = WebDriverWait(driver, 20).until(
            EC.element_to_be_clickable((By.XPATH, "//li[contains(., 'Financial Statements')]"))
        )
        financial_statements_tab.click()
        print("Clicked 'Financial Statements' tab")
        time.sleep(5)

        # Click "Ratios" and wait for data to load
        ratios_tab = WebDriverWait(driver, 20).until(
            EC.element_to_be_clickable((By.XPATH, "//span[text()='Ratios']"))
        )
        ratios_tab.click()
        print("Clicked 'Ratios'")
        time.sleep(5)

        # Wait for the table to load
        table = WebDriverWait(driver, 30).until(
            EC.presence_of_element_located((By.XPATH, "//table"))
        )
        print("Table loaded.")

        # Extract table headers
        header_rows = driver.find_elements(By.XPATH, "//table//thead//tr")
        headers = []
        for header_row in header_rows:
            header_cells = header_row.find_elements(By.XPATH, ".//th")
            row_headers = [cell.get_attribute('innerText').strip() for cell in header_cells]
            headers.append(row_headers)

        # Transpose headers list so each sublist corresponds to a column name
        headers = list(map(list, zip(*headers)))

        # Merge multi-level headers, handling merged cells
        merged_headers = []
        for header in headers:
            merged_header = ' '.join([h for h in header if h]).strip()
            merged_headers.append(merged_header)

        print("Merged Headers:", merged_headers)

        # Extract data rows
        data = []
        rows = driver.find_elements(By.XPATH, "//table//tbody//tr")
        for row in rows:
            # Get the row header (indicator name), located in the first <th> element
            try:
                row_header = row.find_element(By.XPATH, ".//th").get_attribute('innerText').strip()
            except:
                # If no <th> element, skip the row
                continue
            # Get all data cells in the row
            cols = row.find_elements(By.XPATH, ".//td")
            row_data = [col.get_attribute('innerText').strip() for col in cols]
            # Combine the row header and data
            data.append([row_header] + row_data)

        # Ensure the number of headers matches the number of data columns
        if len(merged_headers) != len(data[0]):
            print(f"Number of headers ({len(merged_headers)}) does not match number of data columns ({len(data[0])}), adjusting headers.")
            # Trim or pad headers to match data columns
            merged_headers = merged_headers[:len(data[0])]
            if len(merged_headers) < len(data[0]):
                merged_headers += [''] * (len(data[0]) - len(merged_headers))

        # Create DataFrame
        ratios_df = pd.DataFrame(data, columns=merged_headers)

        # Rename DataFrame columns to 'Indicator', '2020', '2021', '2022', '2023', '2024'
        new_column_names = ['Indicator', '2020', '2021', '2022', '2023', '2024']
        ratios_df.columns = new_column_names[:len(ratios_df.columns)]

        print(ratios_df)

        # Ensure the directory exists
        output_dir = 'FinancialStatements'
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        # Save the DataFrame to a CSV file using relative path
        filename = os.path.join(output_dir, company_name + '_financial_statements.csv')
        ratios_df.to_csv(filename, index=False)
        print(f"Data saved to '{filename}'.")

    except Exception as e:
        print("An exception occurred:", e)
        # Save page source for debugging using relative path
        with open('page_source.html', 'w', encoding='utf-8') as f:
            f.write(driver.page_source)
        print("Page source saved to 'page_source.html'.")

    finally:
        driver.quit()

NoSuchWindowException: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=130.0.6723.117)
Stacktrace:
0   chromedriver                        0x0000000102b495dc cxxbridge1$str$ptr + 3653648
1   chromedriver                        0x0000000102b41e3c cxxbridge1$str$ptr + 3623024
2   chromedriver                        0x00000001025ac100 cxxbridge1$string$len + 88404
3   chromedriver                        0x0000000102587b78 core::str::slice_error_fail::h1cab30ac4b13c655 + 3792
4   chromedriver                        0x0000000102614664 cxxbridge1$string$len + 515768
5   chromedriver                        0x0000000102627630 cxxbridge1$string$len + 593540
6   chromedriver                        0x00000001025e30fc cxxbridge1$string$len + 313680
7   chromedriver                        0x00000001025e3d4c cxxbridge1$string$len + 316832
8   chromedriver                        0x0000000102b1464c cxxbridge1$str$ptr + 3436672
9   chromedriver                        0x0000000102b17988 cxxbridge1$str$ptr + 3449788
10  chromedriver                        0x0000000102afba5c cxxbridge1$str$ptr + 3335312
11  chromedriver                        0x0000000102b1824c cxxbridge1$str$ptr + 3452032
12  chromedriver                        0x0000000102aecce8 cxxbridge1$str$ptr + 3274524
13  chromedriver                        0x0000000102b329dc cxxbridge1$str$ptr + 3560464
14  chromedriver                        0x0000000102b32b58 cxxbridge1$str$ptr + 3560844
15  chromedriver                        0x0000000102b41ad4 cxxbridge1$str$ptr + 3622152
16  libsystem_pthread.dylib             0x000000018c5d1f94 _pthread_start + 136
17  libsystem_pthread.dylib             0x000000018c5ccd34 thread_start + 8
