In [6]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
import pandas as pd
import time
import requests
from bs4 import BeautifulSoup
import os

def setup_driver():
    """Setup Chrome driver with proper options"""
    chrome_options = Options()
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--window-size=1920,1080")
    chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
    # Remove headless for debugging, add it back if needed: chrome_options.add_argument("--headless")
    
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_options)
    return driver

def extract_courses_selenium():
    """Extract courses using Selenium WebDriver"""
    driver = None
    try:
        print("🚀 Starting Pure Soul Academy Course Extraction")
        print("=" * 60)
        
        # Setup driver
        driver = setup_driver()
        
        # Navigate to the store page
        print("📄 Loading Pure Soul Academy store page...")
        driver.get("https://www.puresoulacademy.in/s/store")
        
        # Wait for page to load completely
        wait = WebDriverWait(driver, 20)
        
        print("⏳ Waiting for course content to load...")
        time.sleep(5)
        
        # Wait for specific elements that indicate content has loaded
        try:
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, ".scourse, .card, [class*='course'], a[href*='/courses/']")))
            print("✅ Course content detected")
        except:
            print("⚠️ No specific course elements found, continuing with general content...")
        
        # Scroll to load lazy content
        print("📜 Scrolling to load all content...")
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)
        driver.execute_script("window.scrollTo(0, 0);")
        time.sleep(2)
        
        # Get the page source after JavaScript execution
        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'html.parser')
        
        # Extract courses using multiple strategies
        courses_data = []
        
        # Strategy 1: Find all links containing '/courses/'
        print("🔍 Searching for course links...")
        course_links = soup.find_all('a', href=lambda x: x and '/courses/' in x)
        print(f"📊 Found {len(course_links)} course links via HTML parsing")
        
        for link in course_links:
            try:
                href = link.get('href', '')
                if href.startswith('/'):
                    full_url = f"https://www.puresoulacademy.in{href}"
                else:
                    full_url = href
                
                # Extract title
                title = link.get('title', '').strip()
                if not title:
                    # Try to find title in text or parent elements
                    title = link.get_text(strip=True)
                    if not title or len(title) > 100:
                        # Look for title in parent elements
                        parent = link.find_parent(['div', 'section'])
                        if parent:
                            title_elem = parent.find(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', '.title', '.ctitle'])
                            if title_elem:
                                title = title_elem.get_text(strip=True)
                
                if title and len(title) > 100:
                    title = title[:100] + "..."
                
                if full_url and '/courses/' in full_url:
                    courses_data.append({
                        'Course Title': title if title else 'Title Not Available',
                        'Course URL': full_url,
                        'Source': 'HTML Parsing'
                    })
            except Exception as e:
                continue
        
        # Strategy 2: Use Selenium to find course elements directly
        print("🔍 Searching with Selenium selectors...")
        selenium_selectors = [
            "a[href*='/courses/']",
            ".scourse a",
            ".card a",
            "[class*='course'] a"
        ]
        
        for selector in selenium_selectors:
            try:
                elements = driver.find_elements(By.CSS_SELECTOR, selector)
                print(f"📊 Found {len(elements)} elements with selector: {selector}")
                
                for element in elements:
                    try:
                        href = element.get_attribute('href')
                        title = element.get_attribute('title') or element.text.strip()
                        
                        if href and '/courses/' in href:
                            if title and len(title) > 100:
                                title = title[:100] + "..."
                            
                            # Check if this course already exists
                            existing_urls = [course['Course URL'] for course in courses_data]
                            if href not in existing_urls:
                                courses_data.append({
                                    'Course Title': title if title else 'Title Not Available',
                                    'Course URL': href,
                                    'Source': f'Selenium: {selector}'
                                })
                    except:
                        continue
            except Exception as e:
                print(f"❌ Selector {selector} failed: {e}")
                continue
        
        # Remove duplicates
        unique_courses = []
        seen_urls = set()
        for course in courses_data:
            if course['Course URL'] not in seen_urls:
                seen_urls.add(course['Course URL'])
                unique_courses.append(course)
        
        print(f"✅ Found {len(unique_courses)} unique courses")
        
        if unique_courses:
            # Save to Excel
            df = pd.DataFrame(unique_courses)
            output_path = r'C:\Users\taslim.siddiqui\Downloads\puresoul_courses_final.xlsx'
            df.to_excel(output_path, index=False)
            print(f"💾 Successfully saved to: {output_path}")
            
            # Display results
            print("\n📋 Extracted Courses:")
            print("=" * 80)
            for i, course in enumerate(unique_courses, 1):
                print(f"{i:2d}. {course['Course Title']}")
                print(f"    🔗 {course['Course URL']}")
                print(f"    📍 Source: {course['Source']}")
                print()
            
            return unique_courses
        else:
            print("❌ No courses found. The website structure might have changed.")
            return []
            
    except Exception as e:
        print(f"❌ Error during extraction: {e}")
        return []
    finally:
        if driver:
            driver.quit()
            print("🔚 Browser closed")

def verify_course_urls(courses_data):
    """Verify if the extracted course URLs are accessible"""
    if not courses_data:
        return []
    
    print("\n🔍 Verifying course URLs...")
    verified_courses = []
    
    for course in courses_data[:10]:  # Verify first 10 to save time
        try:
            response = requests.head(course['Course URL'], timeout=10, allow_redirects=True)
            if response.status_code == 200:
                course['Status'] = 'Accessible'
                verified_courses.append(course)
                print(f"✅ {course['Course Title']} - Accessible")
            else:
                course['Status'] = f'HTTP {response.status_code}'
                verified_courses.append(course)
                print(f"⚠️ {course['Course Title']} - HTTP {response.status_code}")
        except Exception as e:
            course['Status'] = f'Error: {str(e)}'
            verified_courses.append(course)
            print(f"❌ {course['Course Title']} - Error: {e}")
    
    return verified_courses

def main():
    """Main function to run the course extraction"""
    print("🎯 Pure Soul Academy Course Extractor")
    print("=" * 50)
    
    # Extract courses
    courses = extract_courses_selenium()
    
    if courses:
        # Verify URLs (optional)
        verified_courses = verify_course_urls(courses)
        
        # Final summary
        print("\n🎊 EXTRACTION COMPLETED SUCCESSFULLY!")
        print("=" * 50)
        print(f"📊 Total Courses Found: {len(courses)}")
        print(f"💾 File Saved: puresoul_courses_final.xlsx")
        print(f"📁 Location: C:\\Users\\taslim.siddiqui\\Downloads\\")
        
        # Display sample of courses
        print("\n📖 Sample Courses:")
        for i, course in enumerate(courses[:5], 1):
            print(f"   {i}. {course['Course Title']}")
    else:
        print("\n💔 No courses were extracted.")
        print("Possible reasons:")
        print("  1. Website requires login")
        print("  2. Website structure changed")
        print("  3. JavaScript content not loading properly")
        print("  4. Network or firewall issues")

if __name__ == "__main__":
    main()

🎯 Pure Soul Academy Course Extractor
🚀 Starting Pure Soul Academy Course Extraction
📄 Loading Pure Soul Academy store page...
⏳ Waiting for course content to load...
✅ Course content detected
📜 Scrolling to load all content...
🔍 Searching for course links...
📊 Found 16 course links via HTML parsing
🔍 Searching with Selenium selectors...
📊 Found 16 elements with selector: a[href*='/courses/']
📊 Found 11 elements with selector: .scourse a
📊 Found 0 elements with selector: .card a
📊 Found 11 elements with selector: [class*='course'] a
✅ Found 21 unique courses
💾 Successfully saved to: C:\Users\taslim.siddiqui\Downloads\puresoul_courses_final.xlsx

📋 Extracted Courses:
 1. Corporate banking
    🔗 https://www.puresoulacademy.in/s/store/courses/Corporate banking
    📍 Source: HTML Parsing

 2. corporate finance
    🔗 https://www.puresoulacademy.in/s/store/courses/corporate finance
    📍 Source: HTML Parsing

 3. Employee or Customer Training
    🔗 https://www.puresoulacademy.in/s/store/cours

In [13]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
import pandas as pd
import time
import requests
from bs4 import BeautifulSoup
import re
import os

def setup_driver():
    """Setup Chrome driver with proper options"""
    chrome_options = Options()
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--window-size=1920,1080")
    chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
    
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_options)
    return driver

def extract_course_details(course_url):
    """Extract detailed information from a course page"""
    driver = None
    try:
        print(f"📖 Extracting details from: {course_url}")
        driver = setup_driver()
        driver.get(course_url)
        
        # Wait for page to load
        wait = WebDriverWait(driver, 15)
        time.sleep(3)
        
        # Get page source
        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'html.parser')
        
        course_data = {
            'Course URL': course_url,
            'Course Name': '',
            'Price': '',
            'About Course': '',
            'Who Should Enroll': '',
            'Course Syllabus': '',
            'Career Opportunities': ''
        }
        
        # Extract Course Name
        try:
            title_elem = soup.find('h2', class_='title') or soup.find('h1') or soup.find('title')
            if title_elem:
                course_data['Course Name'] = title_elem.get_text(strip=True)
                print(f"✅ Course Name: {course_data['Course Name']}")
        except Exception as e:
            print(f"❌ Course Name extraction failed: {e}")
            course_data['Course Name'] = 'Not Found'
        
        # Extract Price - FIXED: Look for strikethrough price
        try:
            # Look for price with strikethrough first
            price_elems = soup.find_all('span', class_='curr')
            original_price = ''
            discounted_price = ''
            
            for price_elem in price_elems:
                price_text = price_elem.get_text(strip=True)
                # Check if it has strikethrough style
                if 'text-decoration: line-through' in str(price_elem):
                    original_price = price_text
                    print(f"✅ Found original price: {original_price}")
                elif price_text and price_text != '₹0':
                    discounted_price = price_text
            
            # Use original price if available, otherwise use discounted price
            if original_price:
                course_data['Price'] = original_price
            elif discounted_price:
                course_data['Price'] = discounted_price
            else:
                course_data['Price'] = 'Free/Not Specified'
                
            print(f"✅ Final Price: {course_data['Price']}")
                
        except Exception as e:
            print(f"❌ Price extraction failed: {e}")
            course_data['Price'] = 'Not Found'
        
        # Extract About Course - FIXED: Get proper description
        try:
            about_course_text = ""
            # Look for description in multiple places
            desc_selectors = [
                'div[data-id="description"]',
                '.description',
                '.course-description',
                '.tab-content.description'
            ]
            
            for selector in desc_selectors:
                desc_div = soup.select_one(selector)
                if desc_div:
                    # Get all text content
                    all_text = desc_div.get_text(strip=True)
                    
                    # Remove "Description:" label if present
                    all_text = re.sub(r'^Description:\s*', '', all_text)
                    
                    # Take first meaningful paragraph (first 500 chars)
                    if len(all_text) > 50:
                        about_course_text = all_text[:500] + "..." if len(all_text) > 500 else all_text
                        break
            
            if not about_course_text:
                # Fallback: look for first paragraph in the page
                first_p = soup.find('p')
                if first_p:
                    about_course_text = first_p.get_text(strip=True)[:500] + "..." if len(first_p.get_text(strip=True)) > 500 else first_p.get_text(strip=True)
            
            course_data['About Course'] = about_course_text if about_course_text else 'Not Found'
            print(f"✅ About Course: {course_data['About Course'][:100]}...")
                
        except Exception as e:
            print(f"❌ About Course extraction failed: {e}")
            course_data['About Course'] = 'Not Found'
        
        # Extract Who Should Enroll - IMPROVED
        try:
            who_should_text = ""
            
            # Method 1: Look for specific patterns in the entire page
            page_text = soup.get_text()
            
            # Look for "who should" patterns
            who_patterns = [
                r'who should[^:]*:([^.•\n]+(?:\n[^.•\n]+)*)',
                r'target audience[^:]*:([^.•\n]+(?:\n[^.•\n]+)*)',
                r'ideal for[^:]*:([^.•\n]+(?:\n[^.•\n]+)*)',
                r'designed for[^:]*:([^.•\n]+(?:\n[^.•\n]+)*)'
            ]
            
            for pattern in who_patterns:
                match = re.search(pattern, page_text, re.IGNORECASE | re.DOTALL)
                if match:
                    who_should_text = match.group(1).strip()
                    break
            
            # Method 2: Look for lists that might contain target audience
            if not who_should_text:
                # Find all lists on the page
                lists = soup.find_all(['ul', 'ol'])
                for list_elem in lists:
                    list_text = list_elem.get_text().lower()
                    # Check if this list contains target audience indicators
                    if any(keyword in list_text for keyword in ['graduate', 'fresher', 'professional', 'student', 'aspiring', 'experienced']):
                        # Check if previous elements indicate this is a target audience list
                        prev_elements = []
                        prev_elem = list_elem.find_previous_sibling()
                        for _ in range(2):
                            if prev_elem:
                                prev_elements.append(prev_elem.get_text().lower())
                                prev_elem = prev_elem.find_previous_sibling()
                        
                        prev_text = ' '.join(prev_elements)
                        if any(phrase in prev_text for phrase in ['who', 'target', 'audience', 'ideal for', 'designed for']):
                            who_should_text = list_elem.get_text(strip=True)
                            break
            
            course_data['Who Should Enroll'] = who_should_text if who_should_text else 'Not Specified'
            print(f"✅ Who Should Enroll: {course_data['Who Should Enroll']}")
            
        except Exception as e:
            print(f"❌ Who Should Enroll extraction failed: {e}")
            course_data['Who Should Enroll'] = 'Not Specified'
        
        # Extract Course Syllabus - IMPROVED
        try:
            syllabus_data = []
            
            # Method 1: Look for course contents table
            syllabus_table = soup.find('table', class_='courseContents')
            
            if syllabus_table:
                print("✅ Found course contents table")
                rows = syllabus_table.find_all('tr')
                
                for row in rows:
                    cells = row.find_all('td')
                    if len(cells) >= 3:
                        # Check if it's a main topic (has icon in first cell)
                        first_cell = cells[0]
                        if first_cell.find('i', {'data-type': 'label'}):
                            topic = cells[2].get_text(strip=True)
                            if topic:
                                syllabus_data.append(f"📚 {topic}")
                        else:
                            # Check if it's a subtopic (has icon in second cell)
                            second_cell = cells[1]
                            if second_cell.find('i'):
                                subtopic = cells[2].get_text(strip=True)
                                if subtopic:
                                    syllabus_data.append(f"   • {subtopic}")
            
            # Method 2: Look for other syllabus structures
            if not syllabus_data:
                # Look for accordion or other syllabus structures
                syllabus_sections = soup.find_all(['div', 'section'], class_=lambda x: x and any(word in str(x).lower() for word in ['syllabus', 'curriculum', 'content', 'module']))
                for section in syllabus_sections:
                    headings = section.find_all(['h2', 'h3', 'h4', 'h5', 'h6', 'strong'])
                    for heading in headings:
                        text = heading.get_text(strip=True)
                        if text and len(text) > 5:
                            syllabus_data.append(f"📚 {text}")
            
            course_data['Course Syllabus'] = '\n'.join(syllabus_data) if syllabus_data else 'Not Available'
            print(f"✅ Course Syllabus: {len(syllabus_data)} topics found")
                
        except Exception as e:
            print(f"❌ Course Syllabus extraction failed: {e}")
            course_data['Course Syllabus'] = 'Not Available'
        
        # Extract Career Opportunities - IMPROVED
        try:
            career_text = ""
            page_text = soup.get_text()
            
            # Look for career-related sections
            career_patterns = [
                r'career opportunities[^:]*:([^.•\n]+(?:\n[^.•\n]+)*)',
                r'job opportunities[^:]*:([^.•\n]+(?:\n[^.•\n]+)*)',
                r'career prospects[^:]*:([^.•\n]+(?:\n[^.•\n]+)*)',
                r'placement opportunities[^:]*:([^.•\n]+(?:\n[^.•\n]+)*)'
            ]
            
            for pattern in career_patterns:
                match = re.search(pattern, page_text, re.IGNORECASE | re.DOTALL)
                if match:
                    career_text = match.group(1).strip()
                    break
            
            # If no specific career section found, infer from course content
            if not career_text:
                course_name_lower = course_data['Course Name'].lower()
                if 'banking' in course_name_lower or 'finance' in course_name_lower:
                    career_text = "Banking roles, Financial Analyst, Relationship Manager, Loan Officer"
                elif 'sales' in course_name_lower:
                    career_text = "Sales Officer, Business Development Executive, Sales Manager"
                elif 'marketing' in course_name_lower:
                    career_text = "Marketing Executive, Digital Marketer, Brand Manager"
                
            course_data['Career Opportunities'] = career_text if career_text else 'Not Specified'
            print(f"✅ Career Opportunities: {course_data['Career Opportunities']}")
                
        except Exception as e:
            print(f"❌ Career Opportunities extraction failed: {e}")
            course_data['Career Opportunities'] = 'Not Specified'
        
        print(f"✅ Successfully extracted all data for: {course_data['Course Name']}")
        return course_data
        
    except Exception as e:
        print(f"❌ Error extracting course details: {e}")
        return None
    finally:
        if driver:
            driver.quit()

def read_input_file(input_file_path):
    """Read input Excel file and extract course URLs"""
    try:
        print(f"📂 Reading input file: {input_file_path}")
        
        # Check if file exists
        if not os.path.exists(input_file_path):
            print(f"❌ Input file not found: {input_file_path}")
            return []
        
        # Read Excel file
        df = pd.read_excel(input_file_path)
        print(f"📊 Input file loaded with {len(df)} rows and {len(df.columns)} columns")
        print(f"📋 Columns: {list(df.columns)}")
        
        # Display first few rows for verification
        print("\n📄 First few rows of input file:")
        print(df.head())
        
        # Find column containing URLs
        url_columns = [col for col in df.columns if any(keyword in col.lower() for keyword in ['url', 'link'])]
        
        if not url_columns:
            # If no URL column found, try to find any column containing URLs
            for col in df.columns:
                if df[col].astype(str).str.contains('http').any():
                    url_columns = [col]
                    break
        
        if not url_columns:
            print("❌ No URL column found in the input file")
            print("Please ensure your Excel file has a column containing course URLs")
            return []
        
        url_column = url_columns[0]
        print(f"🔗 Using URL column: {url_column}")
        
        # Extract URLs
        urls = df[url_column].dropna().unique().tolist()
        print(f"📋 Found {len(urls)} unique URLs")
        
        # Display first few URLs for verification
        print("Sample URLs:")
        for url in urls[:3]:
            print(f"  - {url}")
        
        return urls
        
    except Exception as e:
        print(f"❌ Error reading input file: {e}")
        return []

def process_courses_from_file(input_file_path, output_file_path):
    """Main function to process courses from input Excel file"""
    print("🚀 Starting Course Details Extraction from Input File")
    print("=" * 70)
    print(f"📥 Input File: {input_file_path}")
    print(f"📤 Output File: {output_file_path}")
    print("=" * 70)
    
    # Read URLs from input file
    course_urls = read_input_file(input_file_path)
    
    if not course_urls:
        print("❌ No URLs found to process")
        return False
    
    # Extract details for each course
    all_courses_data = []
    successful_count = 0
    failed_count = 0
    
    for i, course_url in enumerate(course_urls, 1):
        print(f"\n{'='*60}")
        print(f"🔍 Processing {i}/{len(course_urls)}")
        print(f"🌐 URL: {course_url}")
        
        try:
            course_data = extract_course_details(course_url)
            
            if course_data:
                all_courses_data.append(course_data)
                successful_count += 1
                print(f"✅ SUCCESS: {course_data['Course Name']}")
                
                # Print extracted data for verification
                print("\n📊 EXTRACTED DATA:")
                print(f"   Course Name: {course_data['Course Name']}")
                print(f"   Price: {course_data['Price']}")
                print(f"   About Course: {course_data['About Course'][:100]}...")
                print(f"   Who Should Enroll: {course_data['Who Should Enroll']}")
                syllabus_lines = course_data['Course Syllabus'].count('\n') + 1 if course_data['Course Syllabus'] != 'Not Available' else 0
                print(f"   Course Syllabus: {syllabus_lines} topics")
                print(f"   Career Opportunities: {course_data['Career Opportunities']}")
                
            else:
                failed_count += 1
                print(f"❌ FAILED: {course_url}")
            
        except Exception as e:
            failed_count += 1
            print(f"❌ ERROR: {course_url} - {e}")
        
        # Add delay to be respectful to the server
        if i < len(course_urls):  # Don't wait after the last one
            print("⏳ Waiting before next request...")
            time.sleep(3)
    
    # Save results to output file
    if all_courses_data:
        save_success = save_to_excel(all_courses_data, output_file_path)
        
        if save_success:
            print("\n🎊 PROCESSING COMPLETED SUCCESSFULLY!")
            print("=" * 50)
            print(f"📊 Total URLs Processed: {len(course_urls)}")
            print(f"✅ Successful Extractions: {successful_count}")
            print(f"❌ Failed Extractions: {failed_count}")
            print(f"💾 Output File: {output_file_path}")
            
            return True
        else:
            print("❌ Failed to save output file")
            return False
    else:
        print("❌ No course data was extracted successfully")
        return False

def save_to_excel(courses_data, output_file_path):
    """Save course data to Excel file"""
    try:
        # Create DataFrame
        df = pd.DataFrame(courses_data)
        
        # Reorder columns for better readability
        column_order = [
            'Course Name', 
            'Price', 
            'About Course', 
            'Who Should Enroll',
            'Course Syllabus',
            'Career Opportunities',
            'Course URL'
        ]
        
        # Only include columns that exist in the data
        available_columns = [col for col in column_order if col in df.columns]
        df = df[available_columns]
        
        # Save to Excel
        df.to_excel(output_file_path, index=False, engine='openpyxl')
        
        print(f"💾 Successfully saved {len(courses_data)} courses to: {output_file_path}")
        
        # Display the saved data
        print("\n💾 SAVED DATA PREVIEW:")
        print("=" * 60)
        print(df[['Course Name', 'Price', 'Who Should Enroll']].head())
        
        return True
        
    except Exception as e:
        print(f"❌ Error saving to Excel: {e}")
        return False

def main():
    """Main function with predefined file paths"""
    # Define file paths
    input_file_path = r"C:\Users\taslim.siddiqui\Downloads\puresoul_courses_selenium.xlsx"
    output_file_path = r"C:\Users\taslim.siddiqui\Downloads\puresoul_courses_detailed_output.xlsx"
    
    # Check if input file exists
    if not os.path.exists(input_file_path):
        print(f"❌ Input file not found: {input_file_path}")
        print("Please ensure the input file exists at the specified location.")
        return
    
    # Process courses
    success = process_courses_from_file(input_file_path, output_file_path)
    
    if success:
        print(f"\n✅ All done! Check your output file: {output_file_path}")
    else:
        print(f"\n💔 Processing failed. Please check the errors above.")

if __name__ == "__main__":
    print("🎯 Course Details Extractor")
    print("=" * 50)
    print("This script will:")
    print("1. 📥 Read course URLs from input Excel file")
    print("2. 🔍 Extract detailed course information from each URL")
    print("3. 📤 Save all details to output Excel file")
    print("=" * 50)
    
    # Run main function with predefined paths
    main()

🎯 Course Details Extractor
This script will:
1. 📥 Read course URLs from input Excel file
2. 🔍 Extract detailed course information from each URL
3. 📤 Save all details to output Excel file
🚀 Starting Course Details Extraction from Input File
📥 Input File: C:\Users\taslim.siddiqui\Downloads\puresoul_courses_selenium.xlsx
📤 Output File: C:\Users\taslim.siddiqui\Downloads\puresoul_courses_detailed_output.xlsx
📂 Reading input file: C:\Users\taslim.siddiqui\Downloads\puresoul_courses_selenium.xlsx
📊 Input file loaded with 11 rows and 4 columns
📋 Columns: ['Course Title', 'Course URL', 'Unnamed: 2', 'sss']

📄 First few rows of input file:
                                        Course Title  \
0                     Sales Officer - Retail Banking   
1  Advanced certificate in Corporate Banking and ...   
2            Banking and finance course for freshers   
3         Job oriented course in banking and finance   
4    Skill development course in banking and finance   

                        