In [9]:
import requests
from bs4 import BeautifulSoup
import os
import json
from urllib.parse import urljoin
import urllib

# Function to fetch webpage content
def fetch_page(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check for request errors
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Error fetching page: {e}")
        return None

# Function to extract links and images from the webpage
def extract_links_and_images(url, html_content):
    soup = BeautifulSoup(html_content, 'html.parser')

    # Extract all links
    links = set()
    for link in soup.find_all('a', href=True):
        full_url = urljoin(url, link['href'])  # Resolve relative URLs
        links.add(full_url)
    
    # Extract all image sources
    images = set()
    for img in soup.find_all('img', src=True):
        img_url = urljoin(url, img['src'])  # Resolve relative URLs
        images.add(img_url)

    return list(links), list(images)

# Function to save results to a file
def save_to_file(links, images, filename="scraper_results.json"):
    data = {"links": links, "images": images}
    
    # Check if file exists, if not create a new one
    if os.path.exists(filename):
        with open(filename, 'r') as file:
            existing_data = json.load(file)
        existing_data["links"].extend(links)
        existing_data["images"].extend(images)
        existing_data["links"] = list(set(existing_data["links"]))  # Remove duplicates
        existing_data["images"] = list(set(existing_data["images"]))  # Remove duplicates
        with open(filename, 'w') as file:
            json.dump(existing_data, file, indent=4)
    else:
        with open(filename, 'w') as file:
            json.dump(data, file, indent=4)

#Function to download images 
def download_images(images, folder="downloaded_images"):
    if not os.path.exists(folder):
        os.makedirs(folder)
    
    for i, img_url in enumerate(images):
        try:
            response = requests.get(img_url)
            response.raise_for_status()
            img_name = os.path.join(folder, f"image_{i+1}.jpg")
            with open(img_name, 'wb') as file:
                file.write(response.content)
            print(f"Downloaded {img_url} to {img_name}")
        except requests.exceptions.RequestException as e:
            print(f"Failed to download {img_url}: {e}")

# Main function to scrape a webpage and save links/images
def scrape_page():
    url = input("Enter the URL to scrape: ")
    
    html_content = fetch_page(url)
    if html_content:
        print(f"Scraping {url}...")
        links, images = extract_links_and_images(url, html_content)
        print(f"Found {len(links)} links and {len(images)} images.")
        
        # Save results to file
        save_to_file(links, images)
        print(f"Results saved to scraper_results.json")
        
        # Optionally download the images
        #download_images(images)
    else:
        print("Failed to fetch the page.")

if __name__ == "__main__":
    scrape_page()


Scraping https://odatv.com...
Found 163 links and 102 images.
Results saved to scraper_results.json
