#### **1. Multi-Threaded Web Scraper**

Write a Python program to implement a multi-threaded web scraper that respects robots.txt rules.

In [1]:
import requests

In [2]:
import threading
import queue
import requests
from urllib.parse import urlparse
from urllib.robotparser import RobotFileParser
from bs4 import BeautifulSoup

class WebScraperThread(threading.Thread):
    def __init__(self, url_queue, user_agent="MyScraper"):
        threading.Thread.__init__(self)
        self.url_queue = url_queue
        self.user_agent = user_agent

    def run(self):
        while not self.url_queue.empty():
            url = self.url_queue.get()
            if self.allowed_by_robots(url):
                try:
                    response = requests.get(url, headers={"User-Agent": self.user_agent})
                    if response.status_code == 200:
                        soup = BeautifulSoup(response.text, "html.parser")
                        print(f"[{self.name}] Fetched {url} | Title: {soup.title.string if soup.title else 'N/A'}")
                except Exception as e:
                    print(f"[{self.name}] Error fetching {url}: {e}")
            else:
                print(f"[{self.name}] Skipping {url} (disallowed by robots.txt)")
            self.url_queue.task_done()

    def allowed_by_robots(self, url):
        parsed_url = urlparse(url)
        robots_url = f"{parsed_url.scheme}://{parsed_url.netloc}/robots.txt"
        rp = RobotFileParser()
        rp.set_url(robots_url)
        try:
            rp.read()
            return rp.can_fetch(self.user_agent, url)
        except:
            return False

urls = [
    "https://example.com",
    "https://www.python.org",
    "https://www.wikipedia.org"
]

url_queue = queue.Queue()
for url in urls:
    url_queue.put(url)

threads = []
for _ in range(4):
    thread = WebScraperThread(url_queue)
    thread.start()
    threads.append(thread)

for thread in threads:
    thread.join()


[Thread-4] Fetched https://www.python.org | Title: Welcome to Python.org
[Thread-5] Fetched https://www.wikipedia.org | Title: Wikipedia
[Thread-3] Fetched https://example.com | Title: Example Domain


#### **Class-Based Decorator for Execution Time**

Write a Python program to create a class-based decorator that logs the execution time of methods.

In [4]:
import time

class ExecutionTimer:
    def __init__(self, func):
        self.func = func

    def __call__(self, *args, **kwargs):
        start = time.time()
        result = self.func(*args, **kwargs)
        end = time.time()
        print(f"Execution time: {end - start:.6f} seconds")
        return result

In [5]:
@ExecutionTimer
def sample_function():
    total = 0
    for i in range(1000000):
        total += i
    return total

sample_function()

Execution time: 0.169099 seconds


499999500000