<a href="https://colab.research.google.com/github/chepkorirsheila/myproject/blob/main/Week6_Assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import requests
import os
from urllib.parse import urlparse

def main():
    print("Welcome to the Ubuntu Image Fetcher")
    print("A tool for mindfully collecting images from the web\n")

    # Get URL from user
    url = input("Please enter the image URL: ").strip()

    try:
        # Create directory if it doesn't exist
        os.makedirs("Fetched_Images", exist_ok=True)

        # Fetch the image with a User-Agent to avoid being blocked by some servers
        headers = {'User-Agent': 'Mozilla/5.0'}
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()

        # Extract filename from URL
        parsed_url = urlparse(url)
        filename = os.path.basename(parsed_url.path)

        if not filename:
            filename = "downloaded_image.jpg"

        filepath = os.path.join("Fetched_Images", filename)

        # Save the image
        with open(filepath, 'wb') as f:
            f.write(response.content)

        print(f"✓ Successfully fetched: {filename}")
        print(f"✓ Image saved to {filepath}")
        print("\nConnection strengthened. Community enriched.")

    except requests.exceptions.RequestException as e:
        print(f"✗ Connection error: {e}")
    except Exception as e:
        print(f"✗ An error occurred: {e}")

# Fixed the formatting here
if __name__ == "__main__":
    main()

Welcome to the Ubuntu Image Fetcher
A tool for mindfully collecting images from the web

Please enter the image URL: https://drive.google.com/file/d/1Q-olG9uJd4PwARB6ELVM5Tqgm39em4mx/view?usp=drivesdk
✗ Connection error: 401 Client Error: Unauthorized for url: https://drive.google.com/file/d/1Q-olG9uJd4PwARB6ELVM5Tqgm39em4mx/view?usp=drivesdk


In [5]:
import requests
import os
import hashlib
from urllib.parse import urlparse

# Constants for safety
MAX_FILE_SIZE = 10 * 1024 * 1024  # 10MB Limit
ALLOWED_TYPES = ['image/jpeg', 'image/png', 'image/gif', 'image/webp']

def get_file_hash(content):
    """Generates a SHA-256 hash of the content to identify duplicates."""
    return hashlib.sha256(content).hexdigest()

def main():
    print("Welcome to the Ubuntu Image Fetcher 2.0")
    print("Mindfully collecting and securing your digital community.\n")

    # Handle multiple URLs
    raw_input = input("Enter image URLs separated by commas: ")
    urls = [u.strip() for u in raw_input.split(",") if u.strip()]

    os.makedirs("Fetched_Images", exist_ok=True)

    # Track hashes of files already in the folder to prevent duplicates
    existing_hashes = set()

    for url in urls:
        print(f"\nAttempting to connect to: {url}")
        try:
            # 1. Headers: Use a browser-like User-Agent
            headers = {'User-Agent': 'UbuntuFetcher/2.0 (Compatible; Mindful-Bot)'}

            # 2. Pre-check: Use 'stream=True' to look at headers before downloading the whole body
            with requests.get(url, headers=headers, timeout=10, stream=True) as r:
                r.raise_for_status()

                # --- HEADER VALIDATION ---
                content_type = r.headers.get('Content-Type', '')
                content_length = int(r.headers.get('Content-Length', 0))

                if content_type not in ALLOWED_TYPES:
                    print(f"✗ Skipping: Source is not a verified image type ({content_type})")
                    continue

                if content_length > MAX_FILE_SIZE:
                    print(f"✗ Skipping: File is too large ({content_length} bytes)")
                    continue

                # Download content
                content = r.content

                # --- DUPLICATE PREVENTION ---
                file_hash = get_file_hash(content)
                if file_hash in existing_hashes:
                    print("✗ Skipping: This exact image has already been fetched in this session.")
                    continue

                # --- FILENAME SAFETY ---
                parsed_url = urlparse(url)
                filename = os.path.basename(parsed_url.path) or f"img_{file_hash[:8]}.jpg"
                filepath = os.path.join("Fetched_Images", filename)

                # Save file
                with open(filepath, 'wb') as f:
                    f.write(content)

                existing_hashes.add(file_hash)
                print(f"✓ Successfully fetched: {filename}")

        except Exception as e:
            print(f"✗ Connection error with {url}: {e}")

    print("\nBatch complete. Community enriched.")

if __name__ == "__main__":
    main()

Welcome to the Ubuntu Image Fetcher 2.0
Mindfully collecting and securing your digital community.

Enter image URLs separated by commas: https://drive.google.com/file/d/1Q-olG9uJd4PwARB6ELVM5Tqgm39em4mx/view?usp=drivesdk

Attempting to connect to: https://drive.google.com/file/d/1Q-olG9uJd4PwARB6ELVM5Tqgm39em4mx/view?usp=drivesdk
✗ Connection error with https://drive.google.com/file/d/1Q-olG9uJd4PwARB6ELVM5Tqgm39em4mx/view?usp=drivesdk: 401 Client Error: Unauthorized for url: https://drive.google.com/file/d/1Q-olG9uJd4PwARB6ELVM5Tqgm39em4mx/view?usp=drivesdk

Batch complete. Community enriched.
