In [None]:
import requests
from bs4 import BeautifulSoup
import csv
import os

In [None]:
# Function to extract course information
def extract_course_info(html_content, url):
    soup = BeautifulSoup(html_content, 'html.parser')

    # Extract course name
    course_name = soup.title.string if soup.title else "Course name not found"

    # Extract key takeaways
    key_takeaways = []
    checklist_container = soup.find('div', class_='checklist__container')
    if checklist_container:
        takeaway_items = checklist_container.find_all('li', class_='checklist__list-item')
        for item in takeaway_items:
            p_tag = item.find('p')
            if p_tag:
                takeaway_text = p_tag.text.replace('\uf00c', '').strip()
                takeaway_text = takeaway_text.replace('fa fa-check', '').strip()
                key_takeaways.append(takeaway_text)

    # Extract course description
    description = "Description not found"
    description_section = soup.find('div', class_='rich-text__container')
    if description_section:
        first_p = description_section.find('p')
        if first_p:
            description = first_p.text.strip()

    return {
        "course_name": course_name,
        "key_takeaways": ', '.join(key_takeaways) if key_takeaways else "No key takeaways found",
        "description": description,
        "website": url
    }


In [None]:
# Function to append course data to a CSV file
def append_to_csv(course_info, csv_filename="course_data.csv"):
    file_exists = os.path.isfile(csv_filename)

    with open(csv_filename, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        if not file_exists:
            writer.writerow(["Course Name", "Description", "Website"])

        writer.writerow([
            course_info["course_name"],
            course_info["description"],
            course_info["website"]
        ])


In [None]:
base_url = 'https://courses.analyticsvidhya.com/collections/courses?page={}'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}

course_urls = []
current_page = 1

while current_page < 10:
  url = base_url.format(current_page)
  response = requests.get(url, headers)

  if response.status_code != 200:
      print(f"Failed to fetch page {current_page}. Stopping the scraper.")
      break

  soup = BeautifulSoup(response.content, 'html.parser')
  for link in soup.find_all("a", attrs={'class':'course-card course-card__public published'}):
    course_url = 'https://courses.analyticsvidhya.com' + link.get("href")
    course_urls.append(course_url)
  current_page += 1
print(len(course_urls))

78


In [None]:
for url in course_urls:
    try:
        print(f"Processing {url}...")
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Check for HTTP errors

        html = response.content
        course_info = extract_course_info(html, url)
        append_to_csv(course_info)

        print(f"Data for {url} has been successfully appended.")
    except Exception as e:
        print(f"Failed to process {url}: {e}")


Processing https://courses.analyticsvidhya.com/courses/frameworks-for-effective-problem-solving...
Data for https://courses.analyticsvidhya.com/courses/frameworks-for-effective-problem-solving has been successfully appended.
Processing https://courses.analyticsvidhya.com/courses/your-ultimate-guide-to-becoming-an-agentic-ai-expert-by-2025...
Data for https://courses.analyticsvidhya.com/courses/your-ultimate-guide-to-becoming-an-agentic-ai-expert-by-2025 has been successfully appended.
Processing https://courses.analyticsvidhya.com/courses/a-comprehensive-learning-path-to-become-a-data-analyst-in-2025...
Data for https://courses.analyticsvidhya.com/courses/a-comprehensive-learning-path-to-become-a-data-analyst-in-2025 has been successfully appended.
Processing https://courses.analyticsvidhya.com/courses/reimagining-genai-common-mistakes-and-best-practices-for-success...
Data for https://courses.analyticsvidhya.com/courses/reimagining-genai-common-mistakes-and-best-practices-for-success 

In [None]:
# Download the CSV file to your local machine
from google.colab import files
files.download("course_data.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>