**Q1 Part1**

In [3]:
import requests
from bs4 import BeautifulSoup
import time

BASE_URL = 'https://press.un.org'
SEED_URL = BASE_URL + '/en'

def get_links_from_page(url):
    # Fetch the content of the given URL and return the links that might be press releases.
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all links containing the word "crisis"
    links = [BASE_URL + link.get('href') for link in soup.find_all('a') if 'crisis' in link.get_text().lower() and link.get('href')]

    return links

def is_press_release(url):
    # Check if the URL points to a press release.
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Check if there's a "PRESS RELEASE" anchor tag
    press_release_tag = soup.find('a', hreflang='en', href='/en/press-release')

    return response.content if press_release_tag else None

def save_content_to_file(content, file_name):
    # Save the given content to a file.
    with open(file_name, 'w', encoding='utf-8') as f:
        f.write(content)

def main():
    press_release_count = 0
    page_number = 0

    while press_release_count < 10:
        current_page_url = f"{SEED_URL}/press-release?page={page_number}"
        links = get_links_from_page(current_page_url)

        for link in links:
            if press_release_count >= 10:  # Check if we already have 10 press releases
                return

            content = is_press_release(link)
            if content:
                press_release_count += 1
                file_name = f"1_{press_release_count}.txt"
                save_content_to_file(content.decode('utf-8'), file_name)
                time.sleep(2)  # Avoid overwhelming the server

        page_number += 1  # Move to the next page

if __name__ == '__main__':
    main()


**Q2 Part2**

In [5]:
import requests
from bs4 import BeautifulSoup
import time

BASE_URL = 'https://www.europarl.europa.eu/news/en/press-room/page/'

def get_links_from_page(page_number):
    # Fetch the content of the given URL and return the links that might be press releases.
    response = requests.get(BASE_URL + str(page_number))
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all links containing the word "crisis"
    links = [link.get('href') for link in soup.find_all('a') if 'crisis' in link.get_text().lower() and link.get('href')]

    return links

def is_valid_press_release(url):
    # Check if the URL points to a press release that covers plenary sessions.
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Check for "Plenary session" text
    plenary_session_tag = soup.find('span', class_='ep_name', string='Plenary session')

    return response.content if plenary_session_tag else None

def save_content_to_file(content, file_name):
    # Save the given content to a file.
    with open(file_name, 'w', encoding='utf-8') as f:
        f.write(content)

def main():
    press_release_count = 0
    page_number = 0

    while press_release_count < 10:
        links = get_links_from_page(page_number)

        for link in links:
            if press_release_count >= 10:  # Check if we already have 10 press releases
                return

            content = is_valid_press_release(link)
            if content:
                press_release_count += 1
                file_name = f"2_{press_release_count}.txt"
                save_content_to_file(content.decode('utf-8'), file_name)
                time.sleep(2)  # Avoid overwhelming the server

        page_number += 1  # Move to the next page

if __name__ == '__main__':
    main()
