In [1]:
pip install requests beautifulsoup4 pillow


Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.



In [2]:
import requests
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO
import os

# Step 1: Scrape a webpage
def scrape_webpage(url):
    print(f"Scraping webpage: {url}")
    response = requests.get(url)
    response.raise_for_status()
    
    soup = BeautifulSoup(response.text, 'html.parser')
    print(f"Page title: {soup.title.string}")
    
    # Retrieve and print all image URLs
    image_tags = soup.find_all('img')
    image_urls = [img['src'] for img in image_tags if 'src' in img.attrs]
    print(f"Found {len(image_urls)} image(s).")
    
    return image_urls

# Step 2: Retrieve an image over HTTP
def retrieve_image(image_url):
    print(f"Retrieving image: {image_url}")
    response = requests.get(image_url)
    response.raise_for_status()
    
    # Open the image with PIL
    img = Image.open(BytesIO(response.content))
    img.show()  # Display the image
    
    # Save the image locally
    if not os.path.exists('images'):
        os.makedirs('images')
    img.save(f"images/retrieved_image.{img.format.lower()}")
    print(f"Image saved as 'images/retrieved_image.{img.format.lower()}'.")
    
# Main program
if __name__ == "__main__":
    try:
        # Example URL to scrape (you can use any public webpage)
        webpage_url = "https://www.wikipedia.org/"
        
        # Scrape the webpage
        image_urls = scrape_webpage(webpage_url)
        
        # Retrieve the first image if available
        if image_urls:
            first_image_url = image_urls[0]
            # Handle relative URLs
            if not first_image_url.startswith("http"):
                first_image_url = webpage_url + first_image_url
            retrieve_image(first_image_url)
        else:
            print("No images found on the webpage.")
    except Exception as e:
        print(f"An error occurred: {e}")


Scraping webpage: https://www.wikipedia.org/
Page title: Wikipedia
Found 2 image(s).
Retrieving image: https://www.wikipedia.org/portal/wikipedia.org/assets/img/Wikipedia-logo-v2.png
Image saved as 'images/retrieved_image.png'.


In [1]:
pip install requests


Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.



In [4]:
import requests
import xml.etree.ElementTree as ET

def fetch_weather_data(city):
    """
    Fetch weather data from a web service returning XML responses.
    """
    # Example URL for a free weather API with XML response (replace with a valid endpoint)
    url = f"http://api.weatherapi.com/v1/current.xml?key=YOUR_API_KEY&q={city}"
    
    # Send a GET request to the API
    response = requests.get(url)
    print("Response Text:", response.text)
    print("Response Headers:", response.headers)

    
    if response.status_code == 200:
        print("Successfully fetched the XML data!")
        return response.content
    else:
        print(f"Failed to fetch data. HTTP Status Code: {response.status_code}")
        return None

def parse_xml_data(xml_data):
    """
    Parse the XML data and extract specific information.
    """
    # Parse the XML content
    root = ET.fromstring(xml_data)
    
    # Extract specific data (customize based on the XML structure)
    location = root.find("location/name").text
    region = root.find("location/region").text
    country = root.find("location/country").text
    temperature = root.find("current/temp_c").text
    condition = root.find("current/condition/text").text
    
    # Print the extracted information
    print(f"Location: {location}, {region}, {country}")
    print(f"Temperature: {temperature}°C")
    print(f"Condition: {condition}")

def main():
    # City for which we want the weather data
    city = "London"
    
    # Fetch and parse the XML data
    xml_data = fetch_weather_data(city)
    if xml_data:
        parse_xml_data(xml_data)
    

if __name__ == "__main__":
    main()


Response Text: <html><head><title>403 Forbidden</title><link href='//fonts.bunny.net/css?family=Rubik:300,400,500' rel='stylesheet' type='text/css'><style>html, body { width: 100%; margin: 0; padding: 0; text-align: center; font-family: 'Rubik'; background-image: url('