In [22]:
pip install requests beautifulsoup4 pandas

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip




In [23]:
import requests # The requests package provides a user-friendly interface for sending HTTP requests
from bs4 import BeautifulSoup # This imports the BeautifulSoup class from the bs4 module provided by the installed package beautifulsoup4
#The BeautifulSoup class creates a parse tree for parsed pages that can be used to extract data from HTML
import pandas as pd # The pandas package provides data structures and functions needed to efficiently manipulate large datasets

# Define the URL of the ESCAP website
url = "https://unescap.org/events"

# Send a GET request to the URL
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(response.content, "html.parser")
    
    # Find the product listings (see on the html page "inspect")
    events = soup.find_all("div", class_="card-body")
    
    # Create empty lists to store the data
    event_names= []
    event_places = []
    
    # Loop through the event listings and extract the data
    for event in events:
        name_tag = event.find("h5", class_="card-title")   # Find the h5 tag first with class = "card-title" 
        #  Be careful there is a  _ after class, because class is a python reverved word 
        place_tag = event.find("p", class_="sub-text location-icon")  # Find the price tag , here it is "price_color" 

        
        if name_tag:
            name = name_tag.text.strip()  # gets the text content of the element (which in this case can be cut off with '...')
            # name = name_tag['title'].strip()  # Alternative approach - use the title attribute to get the FULL title from the href
        else:
            name = "N/A"
        
        if place_tag:
            place = place_tag.text.strip()
        else:
            place = "N/A"
        
        event_names.append(name)
        event_places.append(place)
    
    # Create a DataFrame from the lists using panda (pd))
    df = pd.DataFrame({
        "Event Name": event_names,
        "Event Place": event_places
    })
    
    # Save the DataFrame to a CSV file
    df.to_csv("Outputs/ESCAP-events.csv", index=False, sep=","  )
    
    print("Data has been written to ESCAP-events.csv")
else:
    print("Failed to retrieve the webpage.")


Data has been written to ESCAP-events.csv
