In [7]:
import requests
from bs4 import BeautifulSoup
import openpyxl

In [8]:
def get_table_from_website(url):
  response = requests.get(url)

  if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table')
    if table:
      table_data = []
      rows = table.find_all('tr')
      for row in rows:
        row_data = []
        cells = row.find_all(['td', 'th'])
        for cell in cells:
          cell_text = cell.text.strip()
          row_data.append(cell_text)
        table_data.append(row_data)
      return table_data
    else:
      print("No table found on the website.")
  else:
    print(f"Error getting website content: {response.status_code}")
  return None

In [9]:
url = "https://inbank.eu/investors"
table_data = get_table_from_website(url)

if table_data:
  for row in table_data:
    print(row)

['', '2019', '2020', '2021', '2022', '2023']
['Net interest income', '31.7', '36', '40.0', '48.1', '53.4']
['Net rental and other income', '0.3', '−1.1', '2.6', '3.3', '6.8']
['Total net income', '32.0', '34.8', '42.6', '51.4', '60.2']
['Operating expenses', '16.0', '17.3', '24.0', '30.3', '36.9']
['Operating profit', '16.0', '17.5', '18.6', '21.0', '23.2']
['Impairment losses on loans', '6.0', '11.5', '6.7', '11.2', '13.2']
['Net profit', '10.0', '5.9', '11.0', '21.1', '10.2']
['GMV', '284.6', '300.9', '523.1', '576.9', '687.7']
['Total assets', '462.8', '490.0', '787.0', '1 020.5', '1 320.6']
['Loan and rental portfolio', '324.2', '389.2', '640.5', '815.9', '1 030.2']
['Deposit portfolio', '377.5', '391.3', '617.9', '828.9', '1 081.6']
["Owner's equity", '47.3', '61.2', '79.0', '101.9', '124.1']
['Return on equity (ROE)', '23.9%', '10.8%', '15.6%', '23.3%', '9.0%']
['Return on total assets', '2.6%', '1.2%', '1.7%', '2.3%', '0.9%']
['Net interest margin', '8.5%', '8.0%', '6.6%', '5.9%

In [10]:
def save_table_to_excel(table_data, filename):
  if not table_data:
    print("No table data to save.")
    return
  
  wb = openpyxl.Workbook()
  ws = wb.active
  ws.title = "Extracted Table Data"

  for row_index, row_data in enumerate(table_data):
    for col_index, cell_text in enumerate(row_data):
      ws.cell(row=row_index+1, column=col_index+1).value = cell_text

  wb.save(filename)
  print(f"Table data saved to Excel file: {filename}")

In [11]:
# filename = "inbank_financial_performance.xlsx"

# if table_data:
#   save_table_to_excel(table_data, filename)

In [12]:
def get_table_from_website(url, table_id=None, table_class=None):

  # Send an HTTP GET request to the website
  response = requests.get(url)

  # Check if request was successful
  if response.status_code == 200:
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find the desired table using ID or class name
    if table_id:
      table = soup.find('table', id=table_id)
    elif table_class:
      table = soup.find('table', class_=table_class)
    else:
      print("Please provide either table ID or class name for identification.")
      return None

    # Check if a table was found
    if table:
      # Extract table data (rows and cells)
      table_data = []
      rows = table.find_all('tr')
      for row in rows:
        row_data = []
        cells = row.find_all(['td', 'th'])
        for cell in cells:
          # Extract cell text and strip any leading/trailing whitespace
          cell_text = cell.text.strip()
          row_data.append(cell_text)
        table_data.append(row_data)
      return table_data
    else:
      print("Desired table not found on the website.")
  else:
    print(f"Error getting website content: {response.status_code}")
  return None



In [16]:
url = "https://inbank.eu/investors"
table_class = "w-full"

table_data = get_table_from_website(url, table_class=table_class)

if table_data:
  for row in table_data:
    print(row)

['', '2019', '2020', '2021', '2022', '2023']
['Net interest income', '31.7', '36', '40.0', '48.1', '53.4']
['Net rental and other income', '0.3', '−1.1', '2.6', '3.3', '6.8']
['Total net income', '32.0', '34.8', '42.6', '51.4', '60.2']
['Operating expenses', '16.0', '17.3', '24.0', '30.3', '36.9']
['Operating profit', '16.0', '17.5', '18.6', '21.0', '23.2']
['Impairment losses on loans', '6.0', '11.5', '6.7', '11.2', '13.2']
['Net profit', '10.0', '5.9', '11.0', '21.1', '10.2']
['GMV', '284.6', '300.9', '523.1', '576.9', '687.7']
['Total assets', '462.8', '490.0', '787.0', '1 020.5', '1 320.6']
['Loan and rental portfolio', '324.2', '389.2', '640.5', '815.9', '1 030.2']
['Deposit portfolio', '377.5', '391.3', '617.9', '828.9', '1 081.6']
["Owner's equity", '47.3', '61.2', '79.0', '101.9', '124.1']
['Return on equity (ROE)', '23.9%', '10.8%', '15.6%', '23.3%', '9.0%']
['Return on total assets', '2.6%', '1.2%', '1.7%', '2.3%', '0.9%']
['Net interest margin', '8.5%', '8.0%', '6.6%', '5.9%

In [22]:
from selenide import webdriver, open_url, click, wait_for

def get_table_from_website(url, button_selector, table_selector):
  # Open the website using Selenide
  open_url(url)

  # Click the button using the provided selector
  click(button_selector)

  # Wait for the desired table to be visible (adjust timeout if needed)
  wait_for(table_selector, timeout=10)

  # Get the HTML content of the entire page
  html_content = driver.page_source

  # Parse the HTML content using BeautifulSoup
  soup = BeautifulSoup(html_content, 'html.parser')

  # Find the desired table using the provided selector
  table = soup.select_one(table_selector)

  # Check if a table was found
  if table:
    # Extract table data (rows and cells)
    table_data = []
    rows = table.find_all('tr')
    for row in rows:
      row_data = []
      cells = row.find_all(['td', 'th'])
      for cell in cells:
        # Extract cell text and strip any leading/trailing whitespace
        cell_text = cell.text.strip()
        row_data.append(cell_text)
      table_data.append(row_data)
    return table_data
  else:
    print("Desired table not found on the website.")
  return None


ImportError: cannot import name 'webdriver' from 'selenide' (/usr/local/lib/python3.11/site-packages/selenide/__init__.py)

In [None]:
button_selector = "#headlessui-tabs-tab-623477"
table_selector = "w-full"

# Extract data from the table appearing after clicking the button
table_data = get_table_from_website(url, button_selector, table_selector)

if table_data:
  # Print the extracted table data
  for row in table_data:
    print(row)