In [14]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

def extract_course_data(url):
    response = requests.get(url)
    content = response.content
    soup = BeautifulSoup(content, 'html.parser')
    
    # Extracting the course link
    link = url

    # Extracting the title
    h1_tags = soup.find_all('h1')
    title = [tag.get_text(strip=True) for tag in h1_tags]

    # Extracting the description
    div_tags = soup.find_all(class_='desc_less')
    description = ' '.join(para.find('p').text.strip() for para in div_tags if para.find('p'))

    # Extracting the duration
    dur=soup.find_all(class_='duration-of-course')
    duration=[]
    for i in dur:
        res=i.find_all('p')
        text=res[0].get_text(strip=True)
        duration.append(text)
    duration_text = ' | '.join(duration) if duration else None

   #Course Start date
    dur=soup.find_all(class_='duration-of-course')
    start=[]
    for i in dur:
        res=i.find_all('p')
        text=res[2].get_text(strip=True)
        start.append(text) 
    duration_text1 = ' | '.join(start) if duration else None

                
    # Print debug information

    
    # Extracting unique items for "What you will learn"
    learn = soup.find_all(class_='pl-deeper-undstnd to_flex_ul')
    unique_items = set()
    for item in learn:
        ul = item.find('ul')
        if ul:
            li_tags = ul.find_all('li')
            for li in li_tags:
                item_text = li.get_text(strip=True)
                if item_text:
                    unique_items.add(item_text)
    what_you_will_learn = ' | '.join(sorted(unique_items))

    # Extracting fee details
    fees = soup.find_all(class_='program-details-total-pay-amt-right')
    inr_fee = None
    usd_fee = None
    for div in fees:
        text = div.get_text(separator=' ', strip=True)
        if 'Due by:' in text:
            text = text.split('Due by:')[0].strip()
        text = ' '.join(text.split())
        if 'INR' in text:
            inr_fee = text
        elif 'USD' in text:
            usd_fee = text

    # Extracting the target audience
    target_tags = soup.find_all('h4', class_='cs-titlec')
    targets = [tag.get_text(strip=True) for tag in target_tags]
    target_audience = ' | '.join(targets) if targets else None

    # Extracting skills
    skills = soup.find_all(class_='key-skills-sec')
    skills_tag = []
    for skill in skills:
        ul = skill.find('ul')
        if ul:
            li_tags = ul.find_all('li')
            for li in li_tags:
                item_text = li.get_text(strip=True)
                if item_text:
                    skills_tag.append(item_text)
    formatted_skills = ' | '.join(sorted(skills_tag))

    # Extracting institute name
    institute_tags = soup.find_all('h4', class_='about-ititle')
    institute = [tag.get_text(strip=True) for tag in institute_tags]

    # Extracting prerequisites or experience
    experience = soup.find_all(class_='eligible-right-top-list')
    Working = [exp.get_text(strip=True) for exp in experience]

    # Extracting content
    glances = soup.find_all(class_='sylab-tab-ul')
    formatted_items = []
    for glance in glances:
        items = glance.get_text(strip=True).split('\n')
        for item in items:
            if item.strip():
                formatted_items.append(item.strip())
    content_text = ' | '.join(formatted_items)

    # Creating a dictionary with extracted data
    data = {
        'Course Link': link,
        'Title': title[0] if title else None,
        'Description': description,
        'Duration': duration_text,
        'Course Start':duration_text1,
        'What you will learn': what_you_will_learn,
        'INR Fee': inr_fee,
        'USD Fee': usd_fee,
        'Target Audience': target_audience,
        'Skills': formatted_skills,
        'Institute': ' | '.join(institute),
        'Prerequisites/Eligibility criteria': ' | '.join(Working),
        'Content': content_text
    }
    
    return data

# List of URLs to process
urls = [
    'https://talentedge.com/golden-gate-university/doctor-of-business-administration',
    'https://talentedge.com/iim-kozhikode/professional-certificate-programme-in-hr-management-and-analytics',
    'https://talentedge.com/opjindal-global-business-school/masters-of-business-administration-opj-global-university',
    'https://talentedge.com/iim-raipur/executive-certificate-program-in-general-management',
    'https://talentedge.com/iim-kozhikode/applied-financial-risk-management-course',
    'https://talentedge.com/goa-institute-of-management/exectuive-pg-program-in-health-care-management',
    'https://talentedge.com/iim-raipur/certificate-course-machine-learning-for-managers',
    'https://talentedge.com/iim-raipur/certificate-course-strategic-management',
    'https://talentedge.com/iiit-allahabad/big-data-analytics-machine-learning-course-iiit-allahabad',
    'https://talentedge.com/iit-delhi/certificate-programme-in-5g-iot-ai'
]

# Extract data for each URL and store it in a list
data_list = [extract_course_data(url) for url in urls]

# Create a DataFrame from the collected data
df = pd.DataFrame(data_list)

# Print the DataFrame
df


Unnamed: 0,Course Link,Title,Description,Duration,Course Start,What you will learn,INR Fee,USD Fee,Target Audience,Skills,Institute,Prerequisites/Eligibility criteria,Content
0,https://talentedge.com/golden-gate-university/...,Doctor Of Business Administration,"Go beyond the boundaries of your job, without ...",Duration: 36 Months,"15 Feb, 2024",Data collection techniques | Disruptive Innova...,INR 1226438 + GST,USD 15000,Working professionals who are looking for a tr...,Applied Research | Concept Selection | Critica...,Golden Gate University,Master's Degree (or equivalent) or Bachelors D...,Foundation (12 Credits)Concentration (12 Credi...
1,https://talentedge.com/iim-kozhikode/professio...,Professional Certificate ProgrammeIn HR Manage...,This program deals with the collection and man...,Duration: 5 Months,"15 Feb, 2024",92% Satisfaction Score | 95% Completition Rate...,INR 105932 + GST,USD 1287,Managers/ HR Professionals who need to take cr...,"Development Analytics | Employee retention, | ...",IIM Kozhikode | upGrad,- Minimum 2 years work experience if you have ...,Introduction to HR AnalyticsHR Practices and B...
2,https://talentedge.com/opjindal-global-busines...,Masters Of Business Administration,This program is a one year online MBA designed...,Duration: 12 months,"31 Mar, 2024",78% Referability | 92% Satisfaction score | 95...,INR 127119 + GST,USD 0,This programme is useful for freshers as well ...,Analytical Thinking | Decision Making in busin...,OPJindal Global Business School,Candidates must hold a graduation degree in an...,The Firm and the ConsumerGlobal EconomicsBusin...
3,https://talentedge.com/iim-raipur/executive-ce...,Executive Certificate ProgramIn General Manage...,Gain in-depth understanding of every aspect of...,,,78% Referability | 92% Satisfaction score | 95...,INR 160000 + GST,USD 3200,"Functional Managers, Entrepreneurs, Business H...",Analytical | Communication | Decision-Making |...,IIM Raipur,For Indian Participants – Graduates (10+2+3) o...,Management and Business EconomicsData Analytic...
4,https://talentedge.com/iim-kozhikode/applied-f...,Professional Certificate ProgramIn Applied Fin...,Gain the Foresight and Expertise to Identify a...,,,78% Referability | 92% Satisfaction score | 95...,INR 100000 + GST,USD 2000,"Working Professionals, Finance Professionals, ...",Decision-making | Financial risk management | ...,IIM Kozhikode,For Indian Participants – Graduates (10+2+3) f...,BasicsFinancial Instruments and Their MarketsM...
5,https://talentedge.com/goa-institute-of-manage...,Executive Post Graduate ProgramIn Health Care ...,India's healthcare industry is expected to gro...,Duration: 11 months,"31 Mar, 2024",78% Referablity | 92% Satisfaction Score | 95%...,INR 97458 + GST,USD 1189,"Freshers, Early-age Professionals, Medical Pro...",Benchmarking in Healthcare | Ethical & Legal i...,Goa Institute of Management | upGrad,Professionals/freshers holding any bachelor’s ...,Program IntroductionFundamentals of Management...
6,https://talentedge.com/iim-raipur/certificate-...,Executive Certificate ProgramIn Machine Learni...,"“Machine Learning is the new SQL for Big Data""",,,78% Referablity | 92% Satisfaction Score | 95%...,INR 75000 + GST,USD 1700,"Business Managers & Department Heads, Business...",Data Visualization & Forecasting | Decision Mo...,IIM Raipur,For Indian Participants – Graduates (10+2+3) o...,Foundation ModuleAdvanced Module
7,https://talentedge.com/iim-raipur/certificate-...,Executive Development ProgramIn Advanced Strat...,"Unveil the most powerful strategies, and be on...",,,78% Referablity | 92% Satisfaction Score | 95%...,INR 180000 + GST,USD 3800,"Leaders, Managers & Function Heads, Executive...",Decision-making | Strategic management | Strat...,IIM Raipur,For Indian Participants – Graduates (10+2+3) o...,Understanding Role of Strategy in BusinessWhy ...
8,https://talentedge.com/iiit-allahabad/big-data...,PG Certificate ProgramIn Machine Learning And ...,Organizations today operate in a world surroun...,,,78% Referablity | 92% Satisfaction Score | 95%...,INR 200000 + GST,USD 4000,,Data Manipulation | Data Visualization | Linea...,CSTCP-IIIT Allahabad,For Indian Participants – Graduates (10+2+3) f...,MODULE 1 - FUNDAMENTALS OF PYTHON (By Industry...
9,https://talentedge.com/iit-delhi/certificate-p...,Certificate Programme In 5G And Its Applicatio...,"With the onset of 5G, the dawn of overwhelming...",,,1G to 5G evolution | 5G's impact on Emerging T...,INR 100000 + GST,USD 2000,"Communication Engineers, Technical Heads, Youn...",Data Analytics | Networking | Programming skill,IIT Delhi,"For Indian Participants – B.Tech, B.E, BCA or ...",Basics of CommunicationsUnderlying Technologie...


In [16]:
pip install openpyxl





In [18]:
excel_file_path = 'course_data.xlsx'
df.to_excel(excel_file_path, index=False, engine='openpyxl')

In [20]:
print(f"Data saved to {excel_file_path}")

Data saved to course_data.xlsx
