# CPP 3ritechnologies

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from urllib.parse import urljoin

# Configure base URL and headers
base_url = "https://www.3ritechnologies.com"
headers = {'User-Agent': 'Mozilla/5.0'}
course_data = []

def scrape_courses():
    try:
        print(f"🌐 Connecting to {base_url}...")
        response = requests.get(base_url, headers=headers, timeout=10)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # First try: Check if courses are listed in navigation menu
        print("🔍 Checking main navigation for courses...")
        nav_menu = soup.select('.menu-item a')
        for item in nav_menu:
            href = item.get('href', '')
            if '/course/' in href or '/training/' in href:
                course_name = item.get_text(strip=True)
                course_url = urljoin(base_url, href)
                if not any(c['url'] == course_url for c in course_data):
                    course_data.append({
                        'name': course_name,
                        'url': course_url
                    })
                    print(f"✅ Found in menu: {course_name} - {course_url}")
        
        # Second try: Check for course cards/grids
        print("🔍 Scanning for course cards...")
        course_cards = soup.select('.elementor-widget-wrap a, .course-card a, .eael-course a')
        for card in course_cards:
            href = card.get('href', '')
            if ('/course/' in href or '/training/' in href) and not href.endswith('/course/'):
                course_name = card.get_text(strip=True)
                if len(course_name) > 3:  # Filter out very short names
                    course_url = urljoin(base_url, href)
                    if not any(c['url'] == course_url for c in course_data):
                        course_data.append({
                            'name': course_name,
                            'url': course_url
                        })
                        print(f"✅ Found in cards: {course_name} - {course_url}")
        
        # Third try: Check dedicated course pages
        course_pages = [
            '/courses/',
            '/training/',
            '/programs/',
            '/it-courses/'
        ]
        
        for page in course_pages:
            print(f"🔍 Scanning {page} page...")
            page_url = urljoin(base_url, page)
            try:
                page_response = requests.get(page_url, headers=headers, timeout=10)
                if page_response.status_code == 200:
                    page_soup = BeautifulSoup(page_response.text, 'html.parser')
                    courses = page_soup.select('a[href*="/course/"], a[href*="/training/"]')
                    for course in courses:
                        href = course.get('href', '')
                        if not href.endswith(('/course/', '/courses/', '/training/')):
                            course_name = course.get_text(strip=True)
                            if len(course_name) > 3:
                                course_url = urljoin(base_url, href)
                                if not any(c['url'] == course_url for c in course_data):
                                    course_data.append({
                                        'name': course_name,
                                        'url': course_url
                                    })
                                    print(f"✅ Found on {page}: {course_name} - {course_url}")
            except Exception as e:
                print(f"⚠️ Error scanning {page_url}: {str(e)}")
        
        # Save results
        if course_data:
            df = pd.DataFrame(course_data)
            df = df.drop_duplicates(subset=['url'])
            output_path = "C:\\Users\\taslim.siddiqui\\Downloads\\3RI_Courses_List.xlsx"
            df.to_excel(output_path, index=False, columns=['name', 'url'])
            
            print("\n📋 Final Course List:")
            for course in course_data:
                print(f"{course['name']}\t{course['url']}")
                
            print(f"\n✅ Success! Found {len(df)} courses. Saved to {output_path}")
        else:
            print("❌ No courses found. The website structure may have changed.")
            
    except Exception as e:
        print(f"⚠️ Error: {str(e)}")

# Run the scraper
scrape_courses()