In [None]:
import re
import pandas as pd
import requests
from bs4 import BeautifulSoup

# Step 1: Load the page
url = "https://www.eduonix.com/gpt-tools-for-marketing-edegree"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# Step 2: Find curriculum section
curriculum_div = soup.find("div", id="curriculum")

if curriculum_div:
    # Step 3: Extract section headers and lessons
    syllabus_lines = []

    sections = curriculum_div.find_all("div", class_="course-section")
    for section in sections:
        # Section title
        section_title_tag = section.find("div", class_="section-header")
        if section_title_tag:
            section_title = section_title_tag.get_text(strip=True)
            syllabus_lines.append(f"\n{section_title}")

        # Lessons inside this section
        lessons = section.find_all("div", class_="section-item")
        for lesson in lessons:
            lesson_text = lesson.get_text(strip=True)
            if lesson_text:
                syllabus_lines.append(f"- {lesson_text}")

    # Step 4: Format and save
    syllabus_text = "\n".join(syllabus_lines)
    df = pd.DataFrame([syllabus_text], columns=["Syllabus"])
    df.to_excel("C:\\Users\\taslim.siddiqui\\Downloads\\eduonix_syllabus_output.xlsx", index=False)
    print("✅ Course syllabus extracted and saved.")
else:
    print("❌ Curriculum section not found on the page.")


✅ Extraction complete. Preview:




In [16]:
from bs4 import BeautifulSoup
import pandas as pd

# Load HTML content
with open("C:\\Users\\taslim.siddiqui\\Downloads\\div id=syllabusAcc class=edegreeLiv.txt", 'r', encoding='utf-8') as f:
    html = f.read()

soup = BeautifulSoup(html, 'html.parser')

# Initialize variable to store all syllabus content
all_syllabus_content = ""

# Find all module containers
module_wraps = soup.find_all('div', class_='E-degSylWrap')

for module_wrap in module_wraps:
    try:
        # Extract module title
        module_title_elem = module_wrap.find('h4', class_='curriculumName')
        module_num_elem = module_wrap.find('div', class_='moduleHeading')
        
        if not module_title_elem or not module_num_elem:
            continue
            
        module_title = module_title_elem.get_text(strip=True)
        module_num = module_num_elem.get_text(strip=True)
        full_module_title = f"{module_num}: {module_title}"
        
        # Add module header to content
        all_syllabus_content += f"\n\n{full_module_title}\n{module_title}\n"
        
        # Find all sections in this module
        sections = module_wrap.find_all('div', class_='sectionHeader')
        
        for section in sections:
            try:
                # Get section title safely
                sec_info = section.find('span', class_='secInfo')
                if not sec_info:
                    continue
                    
                section_title_b = sec_info.find('b')
                section_title = section_title_b.get_text(strip=True) if section_title_b else "Untitled Section"
                
                # Add section to content
                all_syllabus_content += f"\n{section_title}\n"
                
                # Find the section body safely
                button = section.find('button')
                if not button or 'data-target' not in button.attrs:
                    continue
                    
                section_body_id = button['data-target'].replace('#', '')
                section_body = module_wrap.find('div', id=section_body_id)
                if not section_body:
                    continue
                
                # Extract all lessons in this section
                lesson_items = section_body.find_all('div', class_='syllbusMain')
                
                for item in lesson_items:
                    try:
                        lesson_num_elem = item.find('div', class_='syllbusSwq')
                        lesson_desc_elem = item.find('div', class_='syllbusDesc')
                        preview_button = item.find('button', class_='previewButton')
                        
                        if lesson_num_elem and lesson_desc_elem:
                            lesson_num = lesson_num_elem.get_text(strip=True)
                            lesson_desc = lesson_desc_elem.get_text(strip=True)
                            preview_text = "Preview" if preview_button else ""
                            all_syllabus_content += f"{lesson_num} {lesson_desc} {preview_text}\n"
                    except Exception as e:
                        print(f"Error processing lesson: {e}")
                        continue
                    
            except Exception as e:
                print(f"Error processing section: {e}")
                continue
                
    except Exception as e:
        print(f"Error processing module: {e}")
        continue

# Create DataFrame with single cell
df = pd.DataFrame({'Complete Syllabus': [all_syllabus_content.strip()]})

# Save to Excel
output_path = "C:\\Users\\taslim.siddiqui\\Downloads\\eduonix_syllabus_single_cell.xlsx"

with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
    df.to_excel(writer, index=False, sheet_name='Syllabus')
    
    workbook = writer.book
    worksheet = writer.sheets['Syllabus']
    
    # Format for syllabus content
    content_format = workbook.add_format({
        'text_wrap': True,
        'valign': 'top'
    })
    
    # Apply formatting to the cell
    worksheet.set_column('A:A', 100)  # Very wide column
    worksheet.set_row(0, 300)  # Tall row to show more content
    
    # Write with formatting (row 1, column 0)
    worksheet.write(0, 0, all_syllabus_content.strip(), content_format)

print(f"Complete syllabus saved to {output_path}")
print(f"Total modules processed: {len(module_wraps)}")

Complete syllabus saved to C:\Users\taslim.siddiqui\Downloads\eduonix_syllabus_single_cell.xlsx
Total modules processed: 5


In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import pandas as pd
import time

# === 1. Load Excel file with course links ===
input_excel_path = "C:\\Users\\taslim.siddiqui\\Downloads\\eduonix_links.xlsx"
df_links = pd.read_excel(input_excel_path)

# Change this if your Excel has a different column name
link_column = 'Course Link'

# === 2. Setup Selenium Headless Chrome ===
options = Options()
options.add_argument("--headless")
options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=options)

# === 3. Extract syllabus for each course ===
results = []

for idx, row in df_links.iterrows():
    url = row[link_column]
    print(f"🔗 Processing: {url}")
    
    try:
        driver.get(url)
        time.sleep(5)  # wait for content to load

        soup = BeautifulSoup(driver.page_source, 'html.parser')
        all_syllabus_content = ""

        module_wraps = soup.find_all('div', class_='E-degSylWrap')

        for module_wrap in module_wraps:
            module_title_elem = module_wrap.find('h4', class_='curriculumName')
            module_num_elem = module_wrap.find('div', class_='moduleHeading')

            if not module_title_elem or not module_num_elem:
                continue

            module_title = module_title_elem.get_text(strip=True)
            module_num = module_num_elem.get_text(strip=True)
            full_module_title = f"{module_num}: {module_title}"
            all_syllabus_content += f"\n\n{full_module_title}\n{module_title}\n"

            sections = module_wrap.find_all('div', class_='sectionHeader')

            for section in sections:
                sec_info = section.find('span', class_='secInfo')
                if not sec_info:
                    continue

                section_title_b = sec_info.find('b')
                section_title = section_title_b.get_text(strip=True) if section_title_b else "Untitled Section"
                all_syllabus_content += f"\n{section_title}\n"

                button = section.find('button')
                if not button or 'data-target' not in button.attrs:
                    continue

                section_body_id = button['data-target'].replace('#', '')
                section_body = module_wrap.find('div', id=section_body_id)
                if not section_body:
                    continue

                lesson_items = section_body.find_all('div', class_='syllbusMain')
                for item in lesson_items:
                    lesson_num_elem = item.find('div', class_='syllbusSwq')
                    lesson_desc_elem = item.find('div', class_='syllbusDesc')

                    if lesson_num_elem and lesson_desc_elem:
                        lesson_num = lesson_num_elem.get_text(strip=True)
                        lesson_desc = lesson_desc_elem.get_text(strip=True)
                        all_syllabus_content += f"{lesson_num} {lesson_desc}\n"  # NO preview tag

        results.append({
            'Course Link': url,
            'Complete Syllabus': all_syllabus_content.strip()
        })

    except Exception as e:
        print(f"❌ Error processing {url}: {e}")
        continue

# === 4. Close browser ===
driver.quit()

# === 5. Save result to Excel ===
output_path = "C:\\Users\\taslim.siddiqui\\Downloads\\eduonix_syllabus_output.xlsx"
df_result = pd.DataFrame(results)

with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
    df_result.to_excel(writer, index=False, sheet_name='Syllabus')

    workbook = writer.book
    worksheet = writer.sheets['Syllabus']

    wrap_format = workbook.add_format({'text_wrap': True, 'valign': 'top'})
    worksheet.set_column('B:B', 100, wrap_format)

print(f"\n✅ All syllabi saved to: {output_path}")


🔗 Processing: https://www.eduonix.com/online-course-marketing-blueprint
🔗 Processing: https://www.eduonix.com/business-data-analytics-bootcamp-master-ai-powered-insights
🔗 Processing: https://www.eduonix.com/complete-ai-for-professionals
🔗 Processing: https://www.eduonix.com/12-real-world-casestudies-for-machine-learning
🔗 Processing: https://www.eduonix.com/20-end-to-end-machine-learning-projects-amp-deployment-2021

✅ All syllabi saved to: C:\Users\taslim.siddiqui\Downloads\eduonix_syllabus_output.xlsx
