In [1]:
import time
start=time.time()
import requests
from bs4 import BeautifulSoup
from queue import Queue, Empty
from concurrent.futures import ThreadPoolExecutor
from urllib.parse import urljoin, urlparse
class MultiThreadScraper:
    def __init__(self, base_url):
        self.base_url = base_url
        self.root_url = '{}://{}'.format(urlparse(self.base_url).scheme, urlparse(self.base_url).netloc)
        self.pool = ThreadPoolExecutor(max_workers=20)
        self.scraped_pages = set([])
        self.to_crawl = Queue()
        self.to_crawl.put(self.base_url)
    def parse_links(self, html):
        soup = BeautifulSoup(html, 'html.parser')
        links = soup.find_all('a', href=True)
        for link in links:
            url = link['href']
            if url.startswith('/') or url.startswith(self.root_url):
                url = urljoin(self.root_url, url)
                if url not in self.scraped_pages:
                    self.to_crawl.put(url)
    def scrape_info(self, html):
        return
    def post_scrape_callback(self, res):
        result = res.result()
        if result and result.status_code == 200:
            self.parse_links(result.text)
            self.scrape_info(result.text)
    def scrape_page(self, url):
        try:
            res = requests.get(url, timeout=(3, 30))
            return res
        except requests.RequestException:
            return 
    def run_scraper(self):
        while True:
            try:
                target_url = self.to_crawl.get(timeout=60)
                if target_url not in self.scraped_pages:
                    print("Scraping URL: {}".format(target_url))
                    self.scraped_pages.add(target_url)
                    job = self.pool.submit(self.scrape_page, target_url)
                    job.add_done_callback(self.post_scrape_callback)

            except Empty:
                return
            except Exception as e:
                print(e)
                continue
if __name__ == '__main__':
    s = MultiThreadScraper("https://www.geeksforgeeks.org/")
    s.run_scraper()
end=time.time()
comptime=end-start
print("COMPUTATION TIME: ",comptime,"seconds")

Scraping URL: https://vit.ac.in
Scraping URL: https://vit.ac.in/btech-induction-schedule-2023
Scraping URL: https://vit.ac.in/pg-induction-programme-schedule-2023
Scraping URL: https://vit.ac.in/applications-open-2023-24
Scraping URL: https://vit.ac.in/
Scraping URL: https://vit.ac.in/about-vit
Scraping URL: https://vit.ac.in/about/vision-mission
Scraping URL: https://vit.ac.in/vit-milestones
Scraping URL: https://vit.ac.in/about/leadership
Scraping URL: https://vit.ac.in/governance
Scraping URL: https://vit.ac.in/about/administrative-offices
Scraping URL: https://vit.ac.in/about/infrastructure
Scraping URL: https://vit.ac.in/about/sustainability
Scraping URL: https://vit.ac.in/true-green
Scraping URL: https://vit.ac.in/about/community-outreach
Scraping URL: https://vit.ac.in/about/communityradio
Scraping URL: https://vit.ac.in/all-news-archieved
Scraping URL: https://vit.ac.in/all-events
Scraping URL: https://vit.ac.in/national-institutional-ranking-framework-nirf
Scraping URL: https:

Scraping URL: https://vit.ac.in/sites/default/files/PG-Programme-Schedule/2-Year-PG-Programmes-M.Tech-MCA-MDes-MSc-and-MSW.pdf
Scraping URL: https://vit.ac.in/sites/default/files/PG-Programme-Schedule/Integrated-M.Tech-and-MSc-Programmes.pdf
Scraping URL: https://vit.ac.in/all-courses/ug/b.des-industrial-design
Scraping URL: https://vit.ac.in/school/course/v-sparc/ug
Scraping URL: https://vit.ac.in/all-courses/ug/b.sc(hons.)-agriculture
Scraping URL: https://vit.ac.in/all-courses/ug/ug-science-and-humanities-programmes
Scraping URL: https://vit.ac.in/all-courses/ug/b.sc-hospitality-hotel-administration
Scraping URL: https://vit.ac.in/all-courses/ug/b.sc.programmes
Scraping URL: https://vit.ac.in/all-courses/ug/bba(hons.)-and-b.com.(hons.)
Scraping URL: https://vit.ac.in/all-courses/ug/b.tech.fashion-technology
Scraping URL: https://vit.ac.in/all-courses/ug/ba.ll.b(hons.)-and-bball.b(hons.)
Scraping URL: https://vit.ac.in/all-courses/pg/master-of-business-administration
Scraping URL: ht

Scraping URL: https://vit.ac.in/about-us/news-letter
Scraping URL: https://vit.ac.in/system/files/July_Newsletter-2018.pdf
Scraping URL: https://vit.ac.in/system/files/June_Newsletter-2018.pdf
Scraping URL: https://vit.ac.in/system/files/May_Newsletter-2018.pdf
Scraping URL: https://vit.ac.in/system/files/April_Newsletter-2018.pdf
Scraping URL: https://vit.ac.in/system/files/MarchNewsletter_0.pdf
Scraping URL: https://vit.ac.in/system/files/Feb_Newletter-2018.pdf
Scraping URL: https://vit.ac.in/system/files/Jan_Newletter-2018.pdf
Scraping URL: https://vit.ac.in/system/files/Dec_Newletter-2017.pdf
Scraping URL: https://vit.ac.in/system/files/November_Newletter-2017.pdf
Scraping URL: https://vit.ac.in/node/7532
Scraping URL: https://vit.ac.in/schools-list
Scraping URL: https://vit.ac.in/academics/centers
Scraping URL: https://vit.ac.in/sites/default/files/Winter-semester-2021-22-Revised.pdf
Scraping URL: https://vit.ac.in/sites/default/files/Winter-semester-2021-22(PG-Freshers).pdf
Scrap

Scraping URL: https://vit.ac.in/school/course/vitbs/ug
Scraping URL: https://vit.ac.in/school/course/ssl/ug
Scraping URL: https://vit.ac.in/schools/school-of-information-technology-and-engineering-for-pg-courses
Scraping URL: https://vit.ac.in/school/course/sbst/pg
Scraping URL: https://vit.ac.in/schools/school-of-electronics-engineering-for-pg-courses
Scraping URL: https://vit.ac.in/school/course/smec/pg
Scraping URL: https://vit.ac.in/school/course/sce/pg
Scraping URL: https://vit.ac.in/schools/school-of-electrical-engineering-for-pg-courses
Scraping URL: https://vit.ac.in/schools/school-of-computer-science-and-engineering-for-pg-courses
Scraping URL: https://vit.ac.in/school/course/vsign/pg
Scraping URL: https://vit.ac.in/school/course/vitbs/pg
Scraping URL: https://vit.ac.in/school/course/sas/pg
Scraping URL: https://vit.ac.in/schools/vitbs
Scraping URL: https://vit.ac.in/files/accreditation.pdf
Scraping URL: https://vit.ac.in/admissions/international/fee
Scraping URL: https://vit.

Scraping URL: https://vit.ac.in/sites/default/files/VIT-TEAM-SAMMARD.jpg
Scraping URL: https://vit.ac.in/sites/default/files/VIT-TEAM-UAV.jpg
Scraping URL: https://vit.ac.in/sites/default/files/RoverX.jpg
Scraping URL: https://vit.ac.in/node/10551
Scraping URL: https://vit.ac.in/policy/Equality-Diversity-and-Inclusion.pdf
Scraping URL: https://vit.ac.in/policy/Ethical-Behaviour.pdf
Scraping URL: https://vit.ac.in/policy/Anti-Corruption.pdf
Scraping URL: https://vit.ac.in/node/9688
Scraping URL: https://vit.ac.in/node/9689
Scraping URL: https://vit.ac.in/campus/Chapters/Indian Societies
Scraping URL: https://vit.ac.in/campus/Chapters/International Societies
Scraping URL: https://vit.ac.in/campus/Chapters/IEEE Chapters
Scraping URL: https://vit.ac.in/sites/default/files/innovations/National-Innovation-Startup-Policy-NISP-2019.pdf
Scraping URL: https://vit.ac.in/sites/default/files/innovations/VIT-Innovation-Startup-and-Incubation-Policy-2022.pdf
Scraping URL: https://vit.ac.in/sites/defa

Scraping URL: https://vit.ac.in/detailview/transform-institutional-practices-through-innovations-research-and-education
Scraping URL: https://vit.ac.in/detailview/welcoming-new-faculty-fop-2021-asc-office
Scraping URL: https://vit.ac.in/detailview/vit-iit-madras-teaching-learning-workshop-july-17-18-2021
Scraping URL: https://vit.ac.in/detailview/fdp-2020
Scraping URL: https://vit.ac.in/detailview/war-mind-ai-context
Scraping URL: https://vit.ac.in/detailview/cybersecurity-and-its-threats-ar-vr-platforms
Scraping URL: https://vit.ac.in/detailview/hands-recent-deep-learning-models-real-world-problems
Scraping URL: https://vit.ac.in/detailview/iot-security-testing-single-board-computers-sbcs
Scraping URL: https://vit.ac.in/detailview/non-communicable-disease-ncd
Scraping URL: https://vit.ac.in/detailview/backend-implementation-vlsi-circuits
Scraping URL: https://vit.ac.in/detailview/insight-research-tools-budding-researchers
Scraping URL: https://vit.ac.in/detailview/blockchain-research-

Scraping URL: https://vit.ac.in/detailview/optimization-and-partial-differential-equation-avenue-new-dimension
Scraping URL: https://vit.ac.in/detailview/emerging-trends-data-science-artificial-intelligence-and-machine-learning-0
Scraping URL: https://vit.ac.in/detailview/building-sustainable-tomorrow-role-energy-and-transportation-green-future
Scraping URL: https://vit.ac.in/detailview/recent-advancement-and-challenges-rf-microwave-and-photonics
Scraping URL: https://vit.ac.in/detailview/optimization-soft-computing-and-numerical-modeling-advanced-manufacturing
Scraping URL: https://vit.ac.in/detailview/computational-fluid-dynamics-beginners-modeling-and-applications
Scraping URL: https://vit.ac.in/detailview/real-world-problem-analysis-using-deep-learning-and-federated-learning
Scraping URL: https://vit.ac.in/detailview/recent-trends-mathematics-applications-sciences-engineering-and-technology
Scraping URL: https://vit.ac.in/detailview/unwind-mind-campaign-mental-wellness
Scraping URL

Scraping URL: https://vit.ac.in/node/4678
Scraping URL: https://vit.ac.in/node/6672
Scraping URL: https://vit.ac.in/node/5858
Scraping URL: https://vit.ac.in/detailview/book-donation-drive
Scraping URL: https://vit.ac.in/system/files/1_19.jpg
Scraping URL: https://vit.ac.in/system/files/2-Hands_on_R-Final-2023_0.pdf
Scraping URL: https://vit.ac.in/system/files/Value-added-Program.pdf
Scraping URL: https://vit.ac.in/sites/default/files/Capability/Introduction-Soft-Skills.pdf
Scraping URL: https://vit.ac.in/sites/default/files/Capability/Introduction-Business-Communication1.pdf
Scraping URL: https://vit.ac.in/sites/default/files/Capability/Introduction-Soft-Skills2.pdf
Scraping URL: https://vit.ac.in/sites/default/files/Capability/Introduction-Personal-Skills.pdf
Scraping URL: https://vit.ac.in/sites/default/files/Capability/Reasoning-Skill-Enhancement.pdf
Scraping URL: https://vit.ac.in/sites/default/files/Capability/Introduction-Etiquette.pdf
Scraping URL: https://vit.ac.in/sites/defau

Scraping URL: https://vit.ac.in/system/files/11-06-23%20%28VIT%29%20Chancellor%20Dr.%20G.%20Viswanathan%20and%20Vice%20President%20Dr.%20G.%20V.%20Selvam%20met%20%20Shri.%20Amit%20sha%20%286%29_0.jpg
Scraping URL: https://vit.ac.in/about/ranking-and-accreditation
Scraping URL: https://vit.ac.in/mhrdugc
Scraping URL: https://vit.ac.in/school/overview/sas
Scraping URL: https://vit.ac.in/school/course/sas/ug
Scraping URL: https://vit.ac.in/school/course/sas/ph.d
Scraping URL: https://vit.ac.in/school/overview/vaial
Scraping URL: https://vit.ac.in/school/overview/v-sparc
Scraping URL: https://vit.ac.in/school/overview/sbst
Scraping URL: https://vit.ac.in/school/vitbs
Scraping URL: https://vit.ac.in/school/overview/vitbs
Scraping URL: https://vit.ac.in/school/course/VitBS/ph.d
Scraping URL: https://vit.ac.in/school/overview/sce
Scraping URL: https://vit.ac.in/school/course/sce/ph.d
Scraping URL: https://vit.ac.in/schools/overview/scope
Scraping URL: https://vit.ac.in/schools/school-of-compu