In [1]:
import requests
from bs4 import BeautifulSoup

url = "https://www.srnav.com/reports"
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")

In [25]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import time

url = "https://www.srnav.com/reports"
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get(url)
time.sleep(5)  # Wait for JS to load content

soup = BeautifulSoup(driver.page_source, "html.parser")
container = soup.find("div", class_="col-span-full row-span-1 row-start-2 grid h-fit grid-cols-subgrid")
if container:
    rows = container.find_all("div", class_="cursor-grab")
    print(f"Found {len(rows)} rows")
    for row in rows:
        print(row.get_text(strip=True))
else:
    print("Container not found")

driver.quit()

Found 554 rows
(M6) Metropole TelevisionFranceServicesMedia & Entertainment283-38097KPMG17/03/2025
74SoftwareFranceTechnology & CommunicationsSoftware & IT Services61-163102ACA Nexia & Forvis Mazars24/03/2025
AalbertsNetherlandsResource TransformationIndustrial Machinery & Goods44-9854Deloitte27/02/2025
Aareal BankGermanyFinancialsConsumer Finance63-262199KPMG27/03/2025
AB InBevBelgiumFood & BeverageAlcoholic Beverages145-18136PwC26/02/2025
Abertis InfraestructurasSpainInfrastructureEngineering & Construction Services54-268214KPMG27/02/2025
ABN AmroNetherlandsFinancialsCommercial Banks215-361146EY12/03/2025
AccionaSpainInfrastructureEngineering & Construction Services17-455438KPMG12/02/2025
AccorFranceServicesHotels & Lodging118-266148PwC28/03/2025
AceaItalyInfrastructureElectric Utilities & Power Generators75-182107PwC07/04/2025
AcerinoxSpainExtractives & Mineral ProcessingIron & Steel Producers57-184127PwC28/02/2025
Achmea HoldingNetherlandsFinancialsInsurance54-217163EY15/04/2025
Ad

In [29]:
import csv

data = []

for row in rows:
    company = row.find("span", class_="text-xs font-medium")
    company_name = company.get_text(strip=True) if company else ""

    columns = row.find_all(lambda tag: tag.name == "span" and tag.has_attr("class") and "border-primary-white-100" in tag["class"])
    col_texts = [" ".join(col.stripped_strings) for col in columns]

    pages_div = row.find("div", class_="hidden md:flex")
    page_links = []
    if pages_div:
        page_links = [a['href'] for a in pages_div.find_all("a", href=True)]

    total_pages = ""
    total_pages_div = row.find("div", class_="hidden text-sm md:flex")
    if total_pages_div:
        total_pages_span = total_pages_div.find("span")
        if total_pages_span:
            total_pages = total_pages_span.get_text(strip=True)

    # Split page links into two columns
    page_link_1 = page_links[0] if len(page_links) > 0 else ""
    page_link_2 = page_links[1] if len(page_links) > 1 else ""

    data.append({
        "Company": company_name,
        "Country": col_texts[0] if len(col_texts) > 0 else "",
        "Sector": col_texts[1] if len(col_texts) > 1 else "",
        "Industry": col_texts[2] if len(col_texts) > 2 else "",
        # "Page Link 1": page_link_1,
        # "Page Link 2": page_link_2,
        "Total Pages": total_pages,
        "Auditor": col_texts[3] if len(col_texts) > 3 else "",
        "Published": col_texts[4] if len(col_texts) > 4 else "",
    })

In [None]:
# Write to CSV
with open("srnav_reports.csv", "w", newline='', encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=data[0].keys())
    writer.writeheader()
    writer.writerows(data)

In [34]:
import pandas as pd

# Read the CSV file into a pandas DataFrame
df = pd.read_csv("srnav_reports.csv", encoding="utf-8")

print(len(df))
# Display the first few rows
df.head()

554


Unnamed: 0,Company,Country,Sector,Industry,Total Pages,Auditor,Published
0,(M6) Metropole Television,France,Services,Media & Entertainment,97,97,KPMG
1,74Software,France,Technology & Communications,Software & IT Services,102,102,ACA Nexia & Forvis Mazars
2,Aalberts,Netherlands,Resource Transformation,Industrial Machinery & Goods,54,54,Deloitte
3,Aareal Bank,Germany,Financials,Consumer Finance,199,199,KPMG
4,AB InBev,Belgium,Food & Beverage,Alcoholic Beverages,36,36,PwC


In [None]:
industry_counts = df.groupby('Industry').filter(lambda x: len(x) > 20)
print(industry_counts['Industry'].value_counts())
display(industry_counts[(industry_counts['Industry'] == 'Commercial Banks')].head())


Industry
Commercial Banks                         71
Electric Utilities & Power Generators    31
Software & IT Services                   27
Insurance                                26
Chemicals                                24
Industrial Machinery & Goods             22
Engineering & Construction Services      22
Name: count, dtype: int64


Unnamed: 0,Company,Country,Sector,Industry,Total Pages,Auditor,Published
6,ABN Amro,Netherlands,Financials,Commercial Banks,146,146,EY
12,Addiko Bank,Austria,Financials,Commercial Banks,138,138,KPMG
27,Aktia Bank,Finland,Financials,Commercial Banks,66,66,KPMG
34,Allied Irish Banks (AIB) Group,Ireland,Financials,Commercial Banks,78,78,PwC
65,Banca Generali,Italy,Financials,Commercial Banks,196,196,KPMG


In [38]:
eupg = industry_counts[(industry_counts['Industry'] == 'Electric Utilities & Power Generators')]
display(eupg.head())
eupg.to_csv("electric_utilities_power_generators.csv", index=False, encoding="utf-8")

Unnamed: 0,Company,Country,Sector,Industry,Total Pages,Auditor,Published
9,Acea,Italy,Infrastructure,Electric Utilities & Power Generators,107,107,PwC
18,"ÄEZ, a.s.",Czech Republic,Infrastructure,Electric Utilities & Power Generators,152,152,Deloitte
32,Alliander,Netherlands,Infrastructure,Electric Utilities & Power Generators,89,89,PwC
165,E.ON,Germany,Infrastructure,Electric Utilities & Power Generators,53,53,KPMG
169,Edison,Italy,Infrastructure,Electric Utilities & Power Generators,159,159,KPMG


In [39]:
# Get all unique industries in the filtered DataFrame
industries = industry_counts['Industry'].unique()

for industry in industries:
    # Filter for each industry
    group = industry_counts[industry_counts['Industry'] == industry]
    # Create a safe filename
    safe_name = industry.replace("&", "and").replace("/", "_").replace(" ", "_")
    filename = f"{safe_name}.csv"
    # Export to CSV
    group.to_csv(filename, index=False, encoding="utf-8")
    print(f"Exported {filename}")

Exported Software_and_IT_Services.csv
Exported Industrial_Machinery_and_Goods.csv
Exported Engineering_and_Construction_Services.csv
Exported Commercial_Banks.csv
Exported Electric_Utilities_and_Power_Generators.csv
Exported Insurance.csv
Exported Chemicals.csv
