In [None]:
import requests

# Fetch the HTML of the target page
res = requests.get('url')
res.text

In [None]:
from bs4 import BeautifulSoup

# Create a BeautifulSoup object
soup = BeautifulSoup(res.text, 'html.parser')

# Preparation: extract only the skills

In [None]:
# Retrieve span tags
skills = soup.find_all('span', class_='skills')

[skill.string for skill in skills]

# Preparation: extract only the course names

In [None]:
# Retrieve h3 tags
courses = soup.find_all('h3', class_='course')

[course.string for course in courses]

# Extract both course names and skill names

In [None]:
# Retrieve div tags
divs = soup.find_all('div', attrs={'attr-data': 'course'})

Verify data retrieved

In [None]:
len(divs)

In [None]:
# An error occurs here: an element without an h3 (course name) was included as the 11th item
# divs[11].find('h3', class_='course').string

# Remove the problematic element
divs = divs[:-1]

In [None]:
len(divs)

Use a for loop to extract both course names and their skills

In [None]:
data = []

for div in divs:
    course = div.find('h3', class_='course').string
    skills = [skill.string for skill in div.find_all('span', class_='skill')]

    row = [course] + skills
    data.append(row)

print(data)

Normalize the number of elements in each row to convert into a DataFrame

In [None]:
# Find the maximum number of skills
max_len = max(len(row) for row in data)

# Add empty cells to match the maximum number of columns
for row in data:
    while len(row) < max_len:
        row.append('')  # Fill missing values with blanks

Convert to a DataFrame

In [None]:
import pandas as pd

columns = ['Course'] + [f'Skill{i}' for i in range(1, max_len)]
df = pd.DataFrame(data, columns=columns)

Export the DataFrame in Excel format

In [None]:
df.to_excel("CousesAndSkills.xlsx", index=False)