### Here, I use the Education Platform website to scrape course details. I scrape each of the ten courses individually, merge the results, and save the file as an Excel spreadsheet.

In [85]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://talentedge.com/iim-lucknow/supply-chain-management"

# Define headers
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
}

# Send GET request with headers
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')

# Extract data
data = {
    "Course link": url,
    "Title": soup.find('h1').get_text(strip=True),
    "Description": soup.find('div', class_="desc_less").find('p').get_text(strip=True) if soup.find('div', class_="desc_less") else "Description not found.",
    "Duration": soup.find('p', string=lambda x: x and 'Duration' in x).get_text(strip=True).replace('Duration: ', '') if soup.find('p', string=lambda x: x and 'Duration' in x) else "Duration not found.",
    "Course Start": soup.find('p', string="05 May, 2024").get_text(strip=True) if soup.find('p', string="05 May, 2024") else "Course start date not found.",
    "Key Skills": ", ".join([skill.get_text(strip=True) for skill in soup.find('div', class_='key-skills-sec').find('ul').find_all('li')]) if soup.find('div', class_='key-skills-sec') else "Key skills not found.",
    "What will you learn": ", ".join([item.get_text(strip=True) for item in soup.find('div', class_='pl-deeper-undstnd to_flex_ul').find('ul').find_all('li')]) if soup.find('div', class_='pl-deeper-undstnd to_flex_ul') else "Understanding items not found.",
    "Target Student": soup.find('h4', class_="cs-titlec").get_text(strip=True) if soup.find('h4', class_="cs-titlec") else "Target student information not found.",
    "Eligibility Criteria": soup.find('div', class_='eligible-right-top-list').find('ul').find('li').get_text(strip=True) if soup.find('div', class_='eligible-right-top-list') else "Criteria not found.",
    "Faculty Names": ", ".join([h4.get_text(strip=True) for h4 in soup.find_all('h4', class_="best-fname")]),
    "Institute": soup.find('h4', class_="about-ititle").get_text(strip=True) if soup.find('h4', class_="about-ititle") else "Institute not found.",
    "Fee + GST": None
}

# Extract fee amount and GST
fees = soup.find_all('div', class_='program-details-total-pay-amt-right')
for div in fees:
    text = div.get_text(strip=True, separator=' ')
    if '218000' in text:
        data["Fee + GST"] = ' '.join(text.split()[1:4])
        break

if data["Fee + GST"] is None:
    data["Fee + GST"] = "Fee not found."

# Convert the data to a DataFrame
df = pd.DataFrame([data])

# Save the DataFrame to an Excel file
df.to_excel('course_details.xlsx', index=False)

print("Data has been saved to course_details.xlsx")


Data has been saved to course_details.xlsx


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://talentedge.com/opjindal-global-business-school/masters-of-business-administration-opj-global-university"

# Define headers
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
}

# Send GET request with headers
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')

# Extract data
data = {
    "URL": url,
    "Title": soup.find('h1').get_text(strip=True),
    "Description": soup.find('div', class_="desc_less").find('p').get_text(strip=True) if soup.find('div', class_="desc_less") else "Description not found.",
    "Duration": soup.find('p', string=lambda x: x and 'Duration' in x).get_text(strip=True).replace('Duration: ', '') if soup.find('p', string=lambda x: x and 'Duration' in x) else "Duration not found.",
    "Course Start": soup.find('p', string="").get_text(strip=True) if soup.find('p', string="05 May, 2024") else "Course start date not found.",
    "Key Skills": ", ".join([skill.get_text(strip=True) for skill in soup.find('div', class_='key-skills-sec').find('ul').find_all('li')]) if soup.find('div', class_='key-skills-sec') else "Key skills not found.",
    "What will you learn": ", ".join([item.get_text(strip=True) for item in soup.find('div', class_='pl-deeper-undstnd to_flex_ul').find('ul').find_all('li')]) if soup.find('div', class_='pl-deeper-undstnd to_flex_ul') else "Understanding items not found.",
    "Target Student": soup.find('h4', class_="cs-titlec").get_text(strip=True) if soup.find('h4', class_="cs-titlec") else "Target student information not found.",
    "Eligibility Criteria": soup.find('div', class_='eligible-right-top-list').find('ul').find('li').get_text(strip=True) if soup.find('div', class_='eligible-right-top-list') else "Criteria not found.",
    "Faculty Names": ", ".join([h4.get_text(strip=True) for h4 in soup.find_all('h4', class_="best-fname")]),
    "Institute": soup.find('h4', class_="about-ititle").get_text(strip=True) if soup.find('h4', class_="about-ititle") else "Institute not found.",
    "Fee + GST": None
}

# Extract fee amount and GST
fees = soup.find_all('div', class_='program-details-total-pay-amt-right')
for div in fees:
    text = div.get_text(strip=True, separator=' ')
    if '165000' in text:
        data["Fee + GST"] = ' '.join(text.split()[1:4])
        break

if data["Fee + GST"] is None:
    data["Fee + GST"] = "Fee not found."

# Convert the data to a DataFrame
df = pd.DataFrame([data])

# Save the DataFrame to an Excel file
file_name = 'course_details.xlsx'

try:
    existing_df = pd.read_excel(file_name)
    updated_df = pd.concat([existing_df, df], ignore_index=True)
except FileNotFoundError:
    updated_df = df

updated_df.to_excel(file_name, index=False)

print(f"Data has been saved to {file_name}")


In [94]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://talentedge.com/iim-lucknow/supply-chain-management"

# Define headers
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
}

# Send GET request with headers
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')

# Extract data
data = {
    "Course link": url,
    "Title": soup.find('h1').get_text(strip=True),
    "Description": soup.find('div', class_="desc_less").find('p').get_text(strip=True) if soup.find('div', class_="desc_less") else "Description not found.",
    "Duration": soup.find('p', string=lambda x: x and 'Duration' in x).get_text(strip=True).replace('Duration: ', '') if soup.find('p', string=lambda x: x and 'Duration' in x) else "Duration not found.",
    "Course Start": soup.find('p', string="05 May, 2024").get_text(strip=True) if soup.find('p', string="05 May, 2024") else "Course start date not found.",
    "Key Skills": ", ".join([skill.get_text(strip=True) for skill in soup.find('div', class_='key-skills-sec').find('ul').find_all('li')]) if soup.find('div', class_='key-skills-sec') else "Key skills not found.",
    "What will you learn": ", ".join([item.get_text(strip=True) for item in soup.find('div', class_='pl-deeper-undstnd to_flex_ul').find('ul').find_all('li')]) if soup.find('div', class_='pl-deeper-undstnd to_flex_ul') else "Understanding items not found.",
    "Target Student": soup.find('h4', class_="cs-titlec").get_text(strip=True) if soup.find('h4', class_="cs-titlec") else "Target student information not found.",
    "Eligibility Criteria": soup.find('div', class_='eligible-right-top-list').find('ul').find('li').get_text(strip=True) if soup.find('div', class_='eligible-right-top-list') else "Criteria not found.",
    "Faculty Names": ", ".join([h4.get_text(strip=True) for h4 in soup.find_all('h4', class_="best-fname")]),
    "Institute": soup.find('h4', class_="about-ititle").get_text(strip=True) if soup.find('h4', class_="about-ititle") else "Institute not found.",
    "Fee + GST": None
}

# Extract fee amount and GST
fees = soup.find_all('div', class_='program-details-total-pay-amt-right')
for div in fees:
    text = div.get_text(strip=True, separator=' ')
    if '218000' in text:
        data["Fee + GST"] = ' '.join(text.split()[1:4])
        break

if data["Fee + GST"] is None:
    data["Fee + GST"] = "Fee not found."

# Convert the data to a DataFrame
df = pd.DataFrame([data])

# Save the DataFrame to an Excel file
df.to_excel('course_details3.xlsx', index=False)

print("Data has been saved to course_details3.xlsx")


Data has been saved to course_details3.xlsx


In [101]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://talentedge.com/goa-institute-of-management/exectuive-pg-program-in-health-care-management"

# Define headers
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
}

# Send GET request with headers
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')

# Extract data
data = {
    "Course link": url,
    "Title": soup.find('h1').get_text(strip=True),
    "Description": soup.find('div', class_="desc_less").find('p').get_text(strip=True) if soup.find('div', class_="desc_less") else "Description not found.",
    "Duration": soup.find('p', string=lambda x: x and 'Duration' in x).get_text(strip=True).replace('Duration: ', '') if soup.find('p', string=lambda x: x and 'Duration' in x) else "Duration not found.",
    "Course Start": soup.find('p', string="31 Mar, 2024").get_text(strip=True) if soup.find('p', string="05 May, 2024") else "Course start date not found.",
    "Key Skills": ", ".join([skill.get_text(strip=True) for skill in soup.find('div', class_='key-skills-sec').find('ul').find_all('li')]) if soup.find('div', class_='key-skills-sec') else "Key skills not found.",
    "What will you learn": ", ".join([item.get_text(strip=True) for item in soup.find('div', class_='pl-deeper-undstnd to_flex_ul').find('ul').find_all('li')]) if soup.find('div', class_='pl-deeper-undstnd to_flex_ul') else "Understanding items not found.",
    "Target Student": soup.find('h4', class_="cs-titlec").get_text(strip=True) if soup.find('h4', class_="cs-titlec") else "Target student information not found.",
    "Eligibility Criteria": soup.find('div', class_='eligible-right-top-list').find('ul').find('li').get_text(strip=True) if soup.find('div', class_='eligible-right-top-list') else "Criteria not found.",
    "Faculty Names": ", ".join([h4.get_text(strip=True) for h4 in soup.find_all('h4', class_="best-fname")]),
    "Institute": soup.find('h4', class_="about-ititle").get_text(strip=True) if soup.find('h4', class_="about-ititle") else "Institute not found.",
    "Fee + GST": None
}

# Extract fee amount and GST
fees = soup.find_all('div', class_='program-details-total-pay-amt-right')
for div in fees:
    text = div.get_text(strip=True, separator=' ')
    if '97458' in text:
        data["Fee + GST"] = ' '.join(text.split()[1:4])
        break

if data["Fee + GST"] is None:
    data["Fee + GST"] = "Fee not found."

# Convert the data to a DataFrame
df = pd.DataFrame([data])

# Save the DataFrame to an Excel file
df.to_excel('course_details4.xlsx', index=False)

print("Data has been saved to course_details4.xlsx")


Data has been saved to course_details4.xlsx


In [113]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://talentedge.com/iim-lucknow/advanced-program-in-strategic-management-for-business-excellence"

# Define headers
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
}

# Send GET request with headers
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')

# Extract data
data = {
    "Course link": url,
    "Title": soup.find('h1').get_text(strip=True),
    "Description": soup.find('div', class_="desc_less").find('p').get_text(strip=True) if soup.find('div', class_="desc_less") else "Description not found.",
    "Duration": soup.find('p', string=lambda x: x and 'Duration' in x).get_text(strip=True).replace('Duration: ', '') if soup.find('p', string=lambda x: x and 'Duration' in x) else "Duration not found.",
    "Course Start": soup.find('p', string="30 Jun, 2024").get_text(strip=True) if soup.find('p', string="05 May, 2024") else "Course start date not found.",
    "Key Skills": ", ".join([skill.get_text(strip=True) for skill in soup.find('div', class_='key-skills-sec').find('ul').find_all('li')]) if soup.find('div', class_='key-skills-sec') else "Key skills not found.",
    "What will you learn": ", ".join([item.get_text(strip=True) for item in soup.find('div', class_='pl-deeper-undstnd to_flex_ul').find('ul').find_all('li')]) if soup.find('div', class_='pl-deeper-undstnd to_flex_ul') else "Understanding items not found.",
    "Target Student": soup.find('h4', class_="cs-titlec").get_text(strip=True) if soup.find('h4', class_="cs-titlec") else "Target student information not found.",
    "Eligibility Criteria": soup.find('div', class_='eligible-right-top-list').find('ul').find('li').get_text(strip=True) if soup.find('div', class_='eligible-right-top-list') else "Criteria not found.",
    "Faculty Names": ", ".join([h4.get_text(strip=True) for h4 in soup.find_all('h4', class_="best-fname")]),
    "Institute": soup.find('h4', class_="about-ititle").get_text(strip=True) if soup.find('h4', class_="about-ititle") else "Institute not found.",
    "Fee + GST": None
}

# Extract fee amount and GST
fees = soup.find_all('div', class_='program-details-total-pay-amt-right')
for div in fees:
    text = div.get_text(strip=True, separator=' ')
    if '312000' in text:
        data["Fee + GST"] = ' '.join(text.split()[1:4])
        break

if data["Fee + GST"] is None:
    data["Fee + GST"] = "Fee not found."

# Convert the data to a DataFrame
df = pd.DataFrame([data])

# Save the DataFrame to an Excel file
df.to_excel('course_details5.xlsx', index=False)

print("Data has been saved to course_details5.xlsx")


Data has been saved to course_details5.xlsx


In [120]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://talentedge.com/iim-raipur/executive-certificate-program-in-general-management"

# Define headers
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
}

# Send GET request with headers
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')

# Extract data
data = {
    "Course link": url,
    "Title": soup.find('h1').get_text(strip=True),
    "Description": soup.find('div', class_="desc_less").find('p').get_text(strip=True) if soup.find('div', class_="desc_less") else "Description not found.",
    "Duration": soup.find('p', string=lambda x: x and 'Duration' in x).get_text(strip=True).replace('Duration: ', '') if soup.find('p', string=lambda x: x and 'Duration' in x) else "Duration not found.",
    "Course Start": soup.find('p', string="30 Jun, 2024").get_text(strip=True) if soup.find('p', string="05 May, 2024") else "Course start date not found.",
    "Key Skills": ", ".join([skill.get_text(strip=True) for skill in soup.find('div', class_='key-skills-sec').find('ul').find_all('li')]) if soup.find('div', class_='key-skills-sec') else "Key skills not found.",
    "What will you learn": ", ".join([item.get_text(strip=True) for item in soup.find('div', class_='pl-deeper-undstnd to_flex_ul').find('ul').find_all('li')]) if soup.find('div', class_='pl-deeper-undstnd to_flex_ul') else "Understanding items not found.",
    "Target Student": soup.find('h4', class_="cs-titlec").get_text(strip=True) if soup.find('h4', class_="cs-titlec") else "Target student information not found.",
    "Eligibility Criteria": soup.find('div', class_='eligible-right-top-list').find('ul').find('li').get_text(strip=True) if soup.find('div', class_='eligible-right-top-list') else "Criteria not found.",
    "Faculty Names": ", ".join([h4.get_text(strip=True) for h4 in soup.find_all('h4', class_="best-fname")]),
    "Institute": soup.find('h4', class_="about-ititle").get_text(strip=True) if soup.find('h4', class_="about-ititle") else "Institute not found.",
    "Fee + GST": None
}

# Extract fee amount and GST
fees = soup.find_all('div', class_='program-details-total-pay-amt-right')
for div in fees:
    text = div.get_text(strip=True, separator=' ')
    if '160000' in text:
        data["Fee + GST"] = ' '.join(text.split()[1:4])
        break

if data["Fee + GST"] is None:
    data["Fee + GST"] = "Fee not found."

# Convert the data to a DataFrame
df = pd.DataFrame([data])

# Save the DataFrame to an Excel file
df.to_excel('course_details6.xlsx', index=False)

print("Data has been saved to course_details6.xlsx")


Data has been saved to course_details6.xlsx


In [118]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://talentedge.com/iim-raipur/executive-certificate-program-in-general-management"

# Define headers
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
}

# Send GET request with headers
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')

# Extract data
data = {
    "Course link": url,
    "Title": soup.find('h1').get_text(strip=True),
    "Description": soup.find('div', class_="desc_less").find('p').get_text(strip=True) if soup.find('div', class_="desc_less") else "Description not found.",
    "Duration": soup.find('p', string=lambda x: x and 'Duration' in x).get_text(strip=True).replace('Duration: ', '') if soup.find('p', string=lambda x: x and 'Duration' in x) else "Duration not found.",
    "Course Start": soup.find('p', string="30 Jun, 2024").get_text(strip=True) if soup.find('p', string="05 May, 2024") else "Course start date not found.",
    "Key Skills": ", ".join([skill.get_text(strip=True) for skill in soup.find('div', class_='key-skills-sec').find('ul').find_all('li')]) if soup.find('div', class_='key-skills-sec') else "Key skills not found.",
    "What will you learn": ", ".join([item.get_text(strip=True) for item in soup.find('div', class_='pl-deeper-undstnd to_flex_ul').find('ul').find_all('li')]) if soup.find('div', class_='pl-deeper-undstnd to_flex_ul') else "Understanding items not found.",
    "Target Student": soup.find('h4', class_="cs-titlec").get_text(strip=True) if soup.find('h4', class_="cs-titlec") else "Target student information not found.",
    "Eligibility Criteria": soup.find('div', class_='eligible-right-top-list').find('ul').find('li').get_text(strip=True) if soup.find('div', class_='eligible-right-top-list') else "Criteria not found.",
    "Faculty Names": ", ".join([h4.get_text(strip=True) for h4 in soup.find_all('h4', class_="best-fname")]),
    "Institute": soup.find('h4', class_="about-ititle").get_text(strip=True) if soup.find('h4', class_="about-ititle") else "Institute not found.",
    "Fee + GST": None
}

# Extract fee amount and GST
fees = soup.find_all('div', class_='program-details-total-pay-amt-right')
for div in fees:
    text = div.get_text(strip=True, separator=' ')
    if '160000' in text:
        data["Fee + GST"] = ' '.join(text.split()[1:4])
        break

if data["Fee + GST"] is None:
    data["Fee + GST"] = "Fee not found."

# Convert the data to a DataFrame
df = pd.DataFrame([data])

# Save the DataFrame to an Excel file
df.to_excel('course_details7.xlsx', index=False)

print("Data has been saved to course_details7.xlsx")


Data has been saved to course_details7.xlsx


In [124]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://talentedge.com/iim-kozhikode/applied-financial-risk-management-course"

# Define headers
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
}

# Send GET request with headers
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')

# Extract data
data = {
    "Course link": url,
    "Title": soup.find('h1').get_text(strip=True),
    "Description": soup.find('div', class_="desc_less").find('p').get_text(strip=True) if soup.find('div', class_="desc_less") else "Description not found.",
    "Duration": soup.find('p', string=lambda x: x and 'Duration' in x).get_text(strip=True).replace('Duration: ', '') if soup.find('p', string=lambda x: x and 'Duration' in x) else "Duration not found.",
    "Course Start": soup.find('p', string="30 Jun, 2024").get_text(strip=True) if soup.find('p', string="05 May, 2024") else "Course start date not found.",
    "Key Skills": ", ".join([skill.get_text(strip=True) for skill in soup.find('div', class_='key-skills-sec').find('ul').find_all('li')]) if soup.find('div', class_='key-skills-sec') else "Key skills not found.",
    "What will you learn": ", ".join([item.get_text(strip=True) for item in soup.find('div', class_='pl-deeper-undstnd to_flex_ul').find('ul').find_all('li')]) if soup.find('div', class_='pl-deeper-undstnd to_flex_ul') else "Understanding items not found.",
    "Target Student": soup.find('h4', class_="cs-titlec").get_text(strip=True) if soup.find('h4', class_="cs-titlec") else "Target student information not found.",
    "Eligibility Criteria": soup.find('div', class_='eligible-right-top-list').find('ul').find('li').get_text(strip=True) if soup.find('div', class_='eligible-right-top-list') else "Criteria not found.",
    "Faculty Names": ", ".join([h4.get_text(strip=True) for h4 in soup.find_all('h4', class_="best-fname")]),
    "Institute": soup.find('h4', class_="about-ititle").get_text(strip=True) if soup.find('h4', class_="about-ititle") else "Institute not found.",
    "Fee + GST": None
}

# Extract fee amount and GST
fees = soup.find_all('div', class_='program-details-total-pay-amt-right')
for div in fees:
    text = div.get_text(strip=True, separator=' ')
    if '100000' in text:
        data["Fee + GST"] = ' '.join(text.split()[1:4])
        break

if data["Fee + GST"] is None:
    data["Fee + GST"] = "Fee not found."

# Convert the data to a DataFrame
df = pd.DataFrame([data])

# Save the DataFrame to an Excel file
df.to_excel('course_details8.xlsx', index=False)

print("Data has been saved to course_details8.xlsx")


Data has been saved to course_details8.xlsx


In [137]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://talentedge.com/xlri-jamshedpur/ecommerce-supply-chain-management-and-analytics"

# Define headers
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
}

# Send GET request with headers
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')

# Extract data
data = {
    "Course link": url,
    "Title": soup.find('h1').get_text(strip=True),
    "Description": soup.find('div', class_="desc_less").find('p').get_text(strip=True) if soup.find('div', class_="desc_less") else "Description not found.",
    "Duration": soup.find('p', string=lambda x: x and 'Duration' in x).get_text(strip=True).replace('Duration: ', '') if soup.find('p', string=lambda x: x and 'Duration' in x) else "Duration not found.",
    "Course Start": soup.find('p', string="30 Jun, 2024").get_text(strip=True) if soup.find('p', string="05 May, 2024") else "Course start date not found.",
    "Key Skills": ", ".join([skill.get_text(strip=True) for skill in soup.find('div', class_='key-skills-sec').find('ul').find_all('li')]) if soup.find('div', class_='key-skills-sec') else "Key skills not found.",
    "What will you learn": ", ".join([item.get_text(strip=True) for item in soup.find('div', class_='pl-deeper-undstnd to_flex_ul').find('ul').find_all('li')]) if soup.find('div', class_='pl-deeper-undstnd to_flex_ul') else "Understanding items not found.",
    "Target Student": soup.find('h4', class_="cs-titlec").get_text(strip=True) if soup.find('h4', class_="cs-titlec") else "Target student information not found.",
    "Eligibility Criteria": soup.find('div', class_='eligible-right-top-list').find('ul').find('li').get_text(strip=True) if soup.find('div', class_='eligible-right-top-list') else "Criteria not found.",
    "Faculty Names": ", ".join([h4.get_text(strip=True) for h4 in soup.find_all('h4', class_="best-fname")]),
    "Institute": soup.find('h4', class_="about-ititle").get_text(strip=True) if soup.find('h4', class_="about-ititle") else "Institute not found.",
    "Fee + GST": None
}

# Extract fee amount and GST
fees = soup.find_all('div', class_='program-details-total-pay-amt-right')
for div in fees:
    text = div.get_text(strip=True, separator=' ')
    if '165000' in text:
        data["Fee + GST"] = ' '.join(text.split()[1:4])
        break

if data["Fee + GST"] is None:
    data["Fee + GST"] = "Fee not found."

# Convert the data to a DataFrame
df = pd.DataFrame([data])

# Save the DataFrame to an Excel file
df.to_excel('course_details9.xlsx', index=False)

print("Data has been saved to course_details9.xlsx")


Data has been saved to course_details9.xlsx


In [135]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://talentedge.com/iim-kozhikode/supply-chain-strategy-management-course"

# Define headers
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9",
}

# Send GET request with headers
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')

# Extract data
data = {
    "Course link": url,
    "Title": soup.find('h1').get_text(strip=True),
    "Description": soup.find('div', class_="desc_less").find('p').get_text(strip=True) if soup.find('div', class_="desc_less") else "Description not found.",
    "Duration": soup.find('p', string=lambda x: x and 'Duration' in x).get_text(strip=True).replace('Duration: ', '') if soup.find('p', string=lambda x: x and 'Duration' in x) else "Duration not found.",
    "Course Start": soup.find('p', string="30 Jun, 2024").get_text(strip=True) if soup.find('p', string="05 May, 2024") else "Course start date not found.",
    "Key Skills": ", ".join([skill.get_text(strip=True) for skill in soup.find('div', class_='key-skills-sec').find('ul').find_all('li')]) if soup.find('div', class_='key-skills-sec') else "Key skills not found.",
    "What will you learn": ", ".join([item.get_text(strip=True) for item in soup.find('div', class_='pl-deeper-undstnd to_flex_ul').find('ul').find_all('li')]) if soup.find('div', class_='pl-deeper-undstnd to_flex_ul') else "Understanding items not found.",
    "Target Student": soup.find('h4', class_="cs-titlec").get_text(strip=True) if soup.find('h4', class_="cs-titlec") else "Target student information not found.",
    "Eligibility Criteria": soup.find('div', class_='eligible-right-top-list').find('ul').find('li').get_text(strip=True) if soup.find('div', class_='eligible-right-top-list') else "Criteria not found.",
    "Faculty Names": ", ".join([h4.get_text(strip=True) for h4 in soup.find_all('h4', class_="best-fname")]),
    "Institute": soup.find('h4', class_="about-ititle").get_text(strip=True) if soup.find('h4', class_="about-ititle") else "Institute not found.",
    "Fee + GST": None
}

# Extract fee amount and GST
fees = soup.find_all('div', class_='program-details-total-pay-amt-right')
for div in fees:
    text = div.get_text(strip=True, separator=' ')
    if '180000' in text:
        data["Fee + GST"] = ' '.join(text.split()[1:4])
        break

if data["Fee + GST"] is None:
    data["Fee + GST"] = "Fee not found."

# Convert the data to a DataFrame
df = pd.DataFrame([data])

# Save the DataFrame to an Excel file
df.to_excel('course_details10.xlsx', index=False)

print("Data has been saved to course_details10.xlsx")


Data has been saved to course_details10.xlsx


In [138]:
import pandas as pd

# Define file paths
file1 = 'course_details.xlsx'
file2 = 'course_details3.xlsx'
file3 = 'course_details4.xlsx'
file4 = 'course_details5.xlsx'
file5 = 'course_details6.xlsx'
file6 = 'course_details7.xlsx'
file7 = 'course_details8.xlsx'
file8 = 'course_details9.xlsx'
file9 = 'course_details10.xlsx'

combined_file = 'combined_course_details.xlsx'

# Read the existing Excel files
df1 = pd.read_excel(file1)
df2 = pd.read_excel(file2)
df3 = pd.read_excel(file3)
df4 = pd.read_excel(file4)
df5 = pd.read_excel(file5)
df6 = pd.read_excel(file6)
df8 = pd.read_excel(file8)
df9 = pd.read_excel(file9)

# Combine the DataFrames
combined_df = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8, df9], ignore_index=True)

# Save the combined DataFrame to a new Excel file
combined_df.to_excel(combined_file, index=False)

print(f"Combined data has been saved to {combined_file}")


Combined data has been saved to combined_course_details.xlsx


In [141]:
import pandas as pd
data = pd.read_excel('combined_course_details.xlsx')
data

SyntaxError: invalid syntax (3631966056.py, line 1)