Loading Quotes

In [25]:
import pandas as pd

# Load only the Company_Name and Quote_Code of the latest Publication_Date
quotes_df = pd.read_csv('quotes_potential.csv', usecols=['Company_Name', 'Quote_Code', 'Publication_Date'])
quotes_df['Publication_Date'] = pd.to_datetime(quotes_df['Publication_Date'])
latest_date = quotes_df['Publication_Date'].max()
latest_quotes_df = quotes_df[quotes_df['Publication_Date'] == latest_date][['Quote_Code', 'Company_Name']]
print(f"✅ Loaded {len(latest_quotes_df)} quotes from the latest published date: {latest_date.strftime('%Y-%m-%d')}")

✅ Loaded 86 quotes from the latest published date: 2026-02-07


In [26]:
import requests
from bs4 import BeautifulSoup

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
}

shares

In [29]:
shares = pd.DataFrame([])
for quote in latest_quotes_df['Quote_Code']:
    url = f'https://www.ilboursa.com/marches/societe/{quote}'
    response = requests.get(url, headers=headers, timeout=30)
    soup = BeautifulSoup(response.content, 'html.parser')
    lst_actionnaires = soup.find('span', {'id': 'lstActionnaires'})

    if lst_actionnaires:
        raw_data = lst_actionnaires.text.strip()
        shareholders = []
        for entry in raw_data.split(';'):
            if '*' in entry:
                name, percentage = entry.split('*')
                percentage = percentage.replace(',', '.').strip()  # Remove % and whitespace
                shareholders.append({
                    'Shareholder': name.strip().upper(),
                    'Percentage': float(percentage)
                })
        
        df = pd.DataFrame(shareholders)
        df['Quote_Code'] = quote
        shares = pd.concat([shares, df], ignore_index=True)
        print(f"\n✅ Found {len(df)} shareholders:")
        print(df)
    else:
        print("❌ No shareholder data found")
    
shares["Publication_Date"] = pd.to_datetime('today').strftime('%Y-%m-%d')
# Loading existing shares.csv if it exists
try:
    existing_shares = pd.read_csv('shares.csv')
    shares = pd.concat([existing_shares, shares], ignore_index=True)
    shares.drop_duplicates(keep='last', inplace=True)
except FileNotFoundError:
    pass
# Saving to shares.csv
shares.to_csv('shares.csv', index=False)


✅ Found 1 shareholders:
     Shareholder  Percentage Quote_Code
0  GROUPE KILANI       99.16      ADWYA

✅ Found 2 shareholders:
                    Shareholder  Percentage Quote_Code
0                ZOUBEIR CHAIEB       67.77      AETEC
1  PUBLIC SUR LA PLACE DE TUNIS       32.23      AETEC

✅ Found 4 shareholders:
                    Shareholder  Percentage Quote_Code
0     AIR LIQUIDE INTERNATIONAL       59.11         AL
1             BANQUE DE TUNISIE       16.46         AL
2  PUBLIC SUR LA PLACE DE TUNIS       13.37         AL
3                    GROUPE BNA       11.06         AL

✅ Found 4 shareholders:
             Shareholder  Percentage Quote_Code
0        ASSURANCE COMAR       30.80         AB
1            PGI HOLDING       20.85         AB
2  STE ENNAKL AUTOMOBILE        7.93         AB
3         EKUITY CAPITAL        5.00         AB

✅ Found 0 shareholders:
Empty DataFrame
Columns: [Quote_Code]
Index: []

✅ Found 3 shareholders:
                    Shareholder  Percentag

performance

In [33]:
from io import StringIO

performance = pd.DataFrame([])

for quote in latest_quotes_df['Quote_Code']:
    # FIX 1: Remove the space in URL!
    url = f'https://www.ilboursa.com/marches/societe/{quote}'
    
    try:
        response = requests.get(url, headers=headers, timeout=30)
        response.raise_for_status()  # Check for HTTP errors
        
        tables = pd.read_html(StringIO(response.text))
        
        # Skip if no tables found
        if len(tables) == 0:
            print(f"⚠️  No tables found for {quote}")
            continue
            
        df = tables[-1]
        
        # Skip if table is empty or doesn't have expected structure
        if df.empty or 'Unnamed: 0' not in df.columns:
            print(f"⚠️  Unexpected table structure for {quote}")
            continue
        
        # Clean up column name
        df = df.rename(columns={'Unnamed: 0': 'Item'})
        
        # Unpivot/melt
        df_melted = df.melt(
            id_vars=['Item'],
            var_name='Year',
            value_name='Value'
        )
        
        # Add quote_code
        df_melted['Quote_Code'] = quote
        
        # Reorder columns
        df_melted = df_melted[['Quote_Code', 'Year', 'Item', 'Value']]
        
        # FIX 2: Clean ONLY this batch, not entire performance dataframe
        df_melted['Value'] = (
            df_melted['Value']
            .astype(str)
            .str.replace(' ', '', regex=False)
            .str.replace(',', '.', regex=False)
            .str.replace('%', '', regex=False)
            .replace(['-', 'nan', 'None', ''], pd.NA)
        )
        df_melted['Value'] = pd.to_numeric(df_melted['Value'], errors='coerce')
        
        # Append
        performance = pd.concat([performance, df_melted], ignore_index=True)
        
        print(f"✅ Processed {quote}: {len(df_melted)} rows")
        
    except Exception as e:
        print(f"❌ Error with {quote}: {e}")
        continue
performance["Publication_Date"] = pd.to_datetime('today').strftime('%Y-%m-%d')
# Loading existing performance.csv if it exists
try:
    existing_performance = pd.read_csv('performance.csv')
    performance = pd.concat([existing_performance, performance], ignore_index=True)
    performance.drop_duplicates(keep='last', inplace=True)
except FileNotFoundError:
    pass
# Saving to performance.csv
performance.to_csv('performance.csv', index=False)

✅ Processed ADWYA: 35 rows
✅ Processed AETEC: 35 rows
✅ Processed AL: 35 rows
✅ Processed AB: 35 rows
✅ Processed AMS: 35 rows
✅ Processed ATB: 35 rows
✅ Processed ATL: 35 rows
✅ Processed ARTES: 35 rows
✅ Processed ASSAD: 35 rows
✅ Processed ASSMA: 35 rows
✅ Processed AST: 35 rows
✅ Processed TJARI: 35 rows
✅ Processed TJL: 35 rows
✅ Processed BT: 35 rows
✅ Processed BNA: 35 rows
✅ Processed BL: 35 rows
✅ Processed BHASS: 35 rows
✅ Processed BH: 35 rows
✅ Processed BHL: 35 rows
✅ Processed BIAT: 35 rows
✅ Processed BTE: 35 rows
✅ Processed CC: 35 rows
✅ Processed CELL: 35 rows
✅ Processed CREAL: 35 rows
✅ Processed CIL: 35 rows
✅ Processed SCB: 35 rows
✅ Processed CITY: 35 rows
✅ Processed DH: 35 rows
✅ Processed LSTR: 35 rows
✅ Processed NAKL: 35 rows
✅ Processed SOKNA: 35 rows
✅ Processed ECYCL: 35 rows
✅ Processed GIF: 35 rows
✅ Processed HL: 35 rows
✅ Processed XABYT: 35 rows
✅ Processed ICF: 35 rows
✅ Processed LNDOR: 35 rows
✅ Processed MAG: 35 rows
✅ Processed AMV: 35 rows
✅ Pr