Updated file for leetcode scrapping

AshuKV · AshuKV · commit beedbf2476bf · 2021-04-20T00:21:59.000+05:30
diff --git a/LeetCode-Scrapper/README.md b/LeetCode-Scrapper/README.md
@@ -1,5 +1,5 @@
 # LeetCode Scraper
-This python script will let the user to scrape 'n' number of LeetCode problems from any category/difficulty in [Leetcode](https://leetcode.com/problemset/all) ,as provided by the user. The functionality of the script is to gain the information regarding particular codechef problem in different PDFs.
+This python script will let the user to scrape 'n' number of LeetCode problems from any category/difficulty in [Leetcode](https://leetcode.com/problemset/all), as provided by the user. The functionality of the script is to gain the information regarding particular leetcode problem in different PDFs.
 
 ## Prerequisites:
 Download the required packages from the following command in you terminal.(Make sure you're in the same project directory)
@@ -10,7 +10,7 @@ To run this script, you need to have selenium installed and configure webdriver
 
 ` driver = webdriver.Chrome('/path/to/chromedriver') ` 
 
-## To Run the script:
+## Running the script:
 After installing all the requirements,run this command in your terminal.
 
 ` python3 ques.py `
diff --git a/LeetCode-Scrapper/ques.py b/LeetCode-Scrapper/ques.py
@@ -1,52 +1,53 @@
 from selenium import webdriver
-import os
-options = webdriver.ChromeOptions()
-options.add_argument("--headless")
 from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.common.by import By
 from selenium.common.exceptions import NoSuchElementException
 from selenium.common.exceptions import TimeoutException
+import os
 from fpdf import FPDF
 
+options = webdriver.ChromeOptions()
+options.add_argument("--headless")
+
 
 capa = DesiredCapabilities.CHROME
 capa["pageLoadStrategy"] = "none"
 
-driver = webdriver.Chrome("C:/Softwares/chromedriver_win32/chromedriver") 
-#driver = webdriver.Chrome(desired_capabilities=capa,options=options)
+print("Enter Chrome Driver path")
+inp = input()
+driver = webdriver.Chrome(inp)
+#the base url of leetcode problem set page
 baseurl="https://leetcode.com/problemset/all"
 wait = WebDriverWait(driver, 15)
 
-# map to get url from its problem difficulty
+#the difficulty level of all the of all the problems
 problem_difficulty = {"Easy": "?difficulty=Easy", "Medium": "?difficulty=Medium", "Hard": "?difficulty=hard"}
 
-# get_problems returns the name and links of the problems
-def get_problems(category, no_of_problems):
+def get_problem(category, no_of_problems):
 
-# A map to store problem name and problem url
-    problem_info = {}
+    prblm_info = {}
     try:
+        #checking if there is no network or any other iisue
         driver.get(baseurl + '/' + category)
-        # wait till the  first element is loaded
         wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[1]")))
     except TimeoutException as exception:
         print("Couldn't fetch problem. Network issue or page slow to render. Try again")
         os._exit(-1)
-
-
     
     for problem_index in range(1, no_of_problems + 1):
+        #set problem name
         problem_name = driver.find_element_by_xpath("//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[{}]/td[3]".format(problem_index)).text
+        #set problem url
         problem_url = driver.find_element_by_xpath("//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[{}]/td[3]/div/a".format(problem_index)).get_attribute('href')
         print(problem_name," ",problem_url)
-        problem_info[problem_name] = problem_url
-    return problem_info
+        prblm_info[problem_name] = problem_url
+    return prblm_info
     
-# get_problem_desciption returns content of the problem
-def get_problem_description(problem_url,problem_name):
+def get_description(problem_url,problem_name):
     try:
+        #check if the element is founded, and located in the correct format 
         driver.get(problem_url)
         wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='app']/div/div[2]/div/div/div[1]/div/div[1]/div[1]/div/div[2]/div/div[2]/div/p[1]")))
         problem_title= problem_name
@@ -59,60 +60,52 @@ def get_problem_description(problem_url,problem_name):
             problem_test_cases+="\nOutput\n"
             problem_test_cases +=  driver.find_element_by_xpath("//*[@id='problem-statement']/pre[2]").text
             
-        
-        else:
-            
-            
+        else:            
             driver.execute_script("window.stop();")
         problem={'title':problem_title,'statement':problem_statement,'test_case':problem_test_cases,'url':problem_url}
         return problem
     
-    #Handling exceptions
     except NoSuchElementException as e:
         print("Couldn't scrap the element, Unable to locate it")
         problem=None
     except TimeoutException as exception:
         print("Couldn't scrap the element, Unable to locate it")
         problem=None
         
-
-
-
-
-#storing the information in the pdf
-def convert_to_pdf(problem):
+def to_pdf(problem):
     pdf = FPDF()
     pdf.add_page()
     pdf.set_font("Arial", size = 15)
-    # Replace character that aren't in latin-1 character set
+    #set title 
     title=problem["title"].encode('latin-1', 'replace').decode('latin-1')
+    #set statement
     statement=problem["statement"].encode('latin-1', 'replace').decode('latin-1')
+    #set test cases
     test_case=problem["test_case"].encode('latin-1', 'replace').decode('latin-1')
+    #set url
     url=problem["url"]
-    # add sections to pdf
     pdf.cell(200, 10, txt =title, ln = 1, align = 'C')
     pdf.multi_cell(200, 10, txt =statement, align = 'L')
     pdf.multi_cell(200, 10, txt =test_case, align = 'L')
     pdf.write(5, 'Problem_Link: ')
     pdf.write(5,url,url)
-
     title = title.rstrip()
     pdf.output(title+".pdf")   
     
     
-#main function
 def main():
-    category=input("Enter the difficulty level from the following \n Easy \n Medium \n Hard \n\n")
-    no_of_problems=int(input("\n Enter the number of problems to be scrapped: \n"))
-    info = get_problems(problem_difficulty[category],no_of_problems)
+    category=input("Choose difficulty level from \n Easy \n Medium \n Hard \n\n : ")
+    no_of_problems=int(input("Enter the number of problems to be scrapped : "))
+    info = get_problem(problem_difficulty[category], no_of_problems)
     for name, url in info.items():
-        problem=get_problem_description(url,name)
+        problem=get_description(url,name)
         if(problem is not None ):
-            convert_to_pdf(problem)
+            to_pdf(problem)
         else:
             pass
             
 if __name__ == '__main__':
     main()
 
+#Close the driver path
 driver.close()