In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Step 1: Load ERP links from Excel or CSV file
df_links = pd.read_csv(r"H:\myzone\ERP Link.csv")  # Use raw string for Windows path
erp_links = df_links['ERP Link'].tolist()

# Step 2: Initialize a list to hold extracted data
student_data = []

# Step 3: Loop through each ERP link and extract student info
for link in erp_links:
    try:
        response = requests.get(link, timeout=10)
        soup = BeautifulSoup(response.content, 'html.parser')

        # Step 4a: Extract student name from <span> with dynamic ID
        name_tag = soup.find('span', id=lambda x: x and x.startswith('GridView4_lblName_'))
        name = name_tag.text.strip() if name_tag else 'Not found'

        # Step 4b: Extract phone and email from <td> row containing the email
        table = soup.find('table', {'id': 'GridView4'})
        rows = table.find_all('tr')
        data_row = None

        for row in rows:
            tds = row.find_all('td')
            if any('@' in td.text for td in tds):  # look for a row with an email
                data_row = row
                break

        if data_row:
            tds = data_row.find_all('td')
            phone = tds[4].text.strip() if len(tds) > 4 else 'Not found'
            alt_phone = tds[5].text.strip() if len(tds) > 5 else 'Not found'
            email = tds[6].text.strip() if len(tds) > 6 else 'Not found'
        else:
            phone = alt_phone = email = 'Not found'

        # Step 5: Append extracted data
        student_data.append({
            'ERP Link': link,
            'Name': name,
            'Phone': phone,
            'Alt Phone': alt_phone,
            'Email': email
        })

    except Exception as e:
        print(f"❌ Error while processing {link}: {e}")
        student_data.append({
            'ERP Link': link,
            'Name': 'Error',
            'Phone': 'Error',
            'Alt Phone': 'Error',
            'Email': 'Error'
        })

# Step 6: Convert list to DataFrame and export to Excel
df_result = pd.DataFrame(student_data)
df_result.to_excel(r"H:\myzone\student_contact_list.xlsx", index=False)

print("✅ Done! Extracted data saved to 'H:\\myzone\\student_contact_list.xlsx'")


✅ Done! Extracted data saved to 'H:\myzone\student_contact_list.xlsx'


In [1]:
pwd


'h:\\myzone'