In [None]:
import time
start=time.time()
import requests
from bs4 import BeautifulSoup
from queue import Queue, Empty
from concurrent.futures import ThreadPoolExecutor
from urllib.parse import urljoin, urlparse
class MultiThreadScraper:
    def __init__(self, base_url):
        self.base_url = base_url
        self.root_url = '{}://{}'.format(urlparse(self.base_url).scheme, urlparse(self.base_url).netloc)
        self.pool = ThreadPoolExecutor(max_workers=10)
        self.scraped_pages = set([])
        self.to_crawl = Queue()
        self.to_crawl.put(self.base_url)
    def parse_links(self, html):
        soup = BeautifulSoup(html, 'html.parser')
        links = soup.find_all('a', href=True)
        for link in links:
            url = link['href']
            if url.startswith('/') or url.startswith(self.root_url):
                url = urljoin(self.root_url, url)
                if url not in self.scraped_pages:
                    self.to_crawl.put(url)
    def scrape_info(self, html):
        return
    def post_scrape_callback(self, res):
        result = res.result()
        if result and result.status_code == 200:
            self.parse_links(result.text)
            self.scrape_info(result.text)
    def scrape_page(self, url):
        try:
            res = requests.get(url, timeout=(3, 30))
            return res
        except requests.RequestException:
            return 
    def run_scraper(self):
        while True:
            try:
                target_url = self.to_crawl.get(timeout=60)
                if target_url not in self.scraped_pages:
                    print("Scraping URL: {}".format(target_url))
                    self.scraped_pages.add(target_url)
                    job = self.pool.submit(self.scrape_page, target_url)
                    job.add_done_callback(self.post_scrape_callback)

            except Empty:
                return
            except Exception as e:
                print(e)
                continue
if __name__ == '__main__':
    s = MultiThreadScraper("https://www.geeksforgeeks.org/")
    s.run_scraper()
end=time.time()
comptime=end-start
print("COMPUTATION TIME: ",comptime,"seconds")

Scraping URL: https://en.wikipedia.org/wiki/Web_crawler
Scraping URL: https://en.wikipedia.org/wiki/Main_Page
Scraping URL: https://en.wikipedia.org/wiki/Wikipedia:Contents
Scraping URL: https://en.wikipedia.org/wiki/Portal:Current_events
Scraping URL: https://en.wikipedia.org/wiki/Special:Random
Scraping URL: https://en.wikipedia.org/wiki/Wikipedia:About
Scraping URL: https://en.wikipedia.org/wiki/Wikipedia:Contact_us
Scraping URL: https://en.wikipedia.org/wiki/Help:Contents
Scraping URL: https://en.wikipedia.org/wiki/Help:Introduction
Scraping URL: https://en.wikipedia.org/wiki/Wikipedia:Community_portal
Scraping URL: https://en.wikipedia.org/wiki/Special:RecentChanges
Scraping URL: https://en.wikipedia.org/wiki/Wikipedia:File_upload_wizard
Scraping URL: https://en.wikipedia.org/wiki/Special:Search
Scraping URL: https://en.wikipedia.org/w/index.php?title=Special:CreateAccount&returnto=Web+crawler
Scraping URL: https://en.wikipedia.org/w/index.php?title=Special:UserLogin&returnto=Web+

Scraping URL: https://en.wikipedia.org/wiki/Google_hacking
Scraping URL: https://en.wikipedia.org/wiki/Robots_exclusion_standard
Scraping URL: https://en.wikipedia.org/w/index.php?title=Web_crawler&action=edit&section=16
Scraping URL: https://en.wikipedia.org/wiki/User_agent
Scraping URL: https://en.wikipedia.org/wiki/Spambots
Scraping URL: https://en.wikipedia.org/w/index.php?title=Web_crawler&action=edit&section=17
Scraping URL: https://en.wikipedia.org/wiki/Deep_Web_(search_indexing)
Scraping URL: https://en.wikipedia.org/wiki/Sitemaps
Scraping URL: https://en.wikipedia.org/wiki/Mod_oai
Scraping URL: https://en.wikipedia.org/wiki/Googlebot
Scraping URL: https://en.wikipedia.org/wiki/Screen_scraping
Scraping URL: https://en.wikipedia.org/wiki/AJAX
Scraping URL: https://en.wikipedia.org/w/index.php?title=Web_crawler&action=edit&section=18
Scraping URL: https://en.wikipedia.org/w/index.php?title=Web_crawler&action=edit&section=19
Scraping URL: https://en.wikipedia.org/wiki/List_of_sear