Skip to content

Commit beedbf2

Browse files
committed
Updated file for leetcode scrapping
1 parent 068d694 commit beedbf2

File tree

2 files changed

+32
-39
lines changed

2 files changed

+32
-39
lines changed

LeetCode-Scrapper/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# LeetCode Scraper
2-
This python script will let the user to scrape 'n' number of LeetCode problems from any category/difficulty in [Leetcode](https://leetcode.com/problemset/all) ,as provided by the user. The functionality of the script is to gain the information regarding particular codechef problem in different PDFs.
2+
This python script will let the user to scrape 'n' number of LeetCode problems from any category/difficulty in [Leetcode](https://leetcode.com/problemset/all), as provided by the user. The functionality of the script is to gain the information regarding particular leetcode problem in different PDFs.
33

44
## Prerequisites:
55
Download the required packages from the following command in you terminal.(Make sure you're in the same project directory)
@@ -10,7 +10,7 @@ To run this script, you need to have selenium installed and configure webdriver
1010

1111
` driver = webdriver.Chrome('/path/to/chromedriver') `
1212

13-
## To Run the script:
13+
## Running the script:
1414
After installing all the requirements,run this command in your terminal.
1515

1616
` python3 ques.py `

LeetCode-Scrapper/ques.py

Lines changed: 30 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,53 @@
11
from selenium import webdriver
2-
import os
3-
options = webdriver.ChromeOptions()
4-
options.add_argument("--headless")
52
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
63
from selenium.webdriver.support.ui import WebDriverWait
74
from selenium.webdriver.support import expected_conditions as EC
85
from selenium.webdriver.common.by import By
96
from selenium.common.exceptions import NoSuchElementException
107
from selenium.common.exceptions import TimeoutException
8+
import os
119
from fpdf import FPDF
1210

11+
options = webdriver.ChromeOptions()
12+
options.add_argument("--headless")
13+
1314

1415
capa = DesiredCapabilities.CHROME
1516
capa["pageLoadStrategy"] = "none"
1617

17-
driver = webdriver.Chrome("C:/Softwares/chromedriver_win32/chromedriver")
18-
#driver = webdriver.Chrome(desired_capabilities=capa,options=options)
18+
print("Enter Chrome Driver path")
19+
inp = input()
20+
driver = webdriver.Chrome(inp)
21+
#the base url of leetcode problem set page
1922
baseurl="https://leetcode.com/problemset/all"
2023
wait = WebDriverWait(driver, 15)
2124

22-
# map to get url from its problem difficulty
25+
#the difficulty level of all the of all the problems
2326
problem_difficulty = {"Easy": "?difficulty=Easy", "Medium": "?difficulty=Medium", "Hard": "?difficulty=hard"}
2427

25-
# get_problems returns the name and links of the problems
26-
def get_problems(category, no_of_problems):
28+
def get_problem(category, no_of_problems):
2729

28-
# A map to store problem name and problem url
29-
problem_info = {}
30+
prblm_info = {}
3031
try:
32+
#checking if there is no network or any other iisue
3133
driver.get(baseurl + '/' + category)
32-
# wait till the first element is loaded
3334
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[1]")))
3435
except TimeoutException as exception:
3536
print("Couldn't fetch problem. Network issue or page slow to render. Try again")
3637
os._exit(-1)
37-
38-
3938

4039
for problem_index in range(1, no_of_problems + 1):
40+
#set problem name
4141
problem_name = driver.find_element_by_xpath("//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[{}]/td[3]".format(problem_index)).text
42+
#set problem url
4243
problem_url = driver.find_element_by_xpath("//*[@id='question-app']/div/div[2]/div[2]/div[2]/table/tbody[1]/tr[{}]/td[3]/div/a".format(problem_index)).get_attribute('href')
4344
print(problem_name," ",problem_url)
44-
problem_info[problem_name] = problem_url
45-
return problem_info
45+
prblm_info[problem_name] = problem_url
46+
return prblm_info
4647

47-
# get_problem_desciption returns content of the problem
48-
def get_problem_description(problem_url,problem_name):
48+
def get_description(problem_url,problem_name):
4949
try:
50+
#check if the element is founded, and located in the correct format
5051
driver.get(problem_url)
5152
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='app']/div/div[2]/div/div/div[1]/div/div[1]/div[1]/div/div[2]/div/div[2]/div/p[1]")))
5253
problem_title= problem_name
@@ -59,60 +60,52 @@ def get_problem_description(problem_url,problem_name):
5960
problem_test_cases+="\nOutput\n"
6061
problem_test_cases += driver.find_element_by_xpath("//*[@id='problem-statement']/pre[2]").text
6162

62-
63-
else:
64-
65-
63+
else:
6664
driver.execute_script("window.stop();")
6765
problem={'title':problem_title,'statement':problem_statement,'test_case':problem_test_cases,'url':problem_url}
6866
return problem
6967

70-
#Handling exceptions
7168
except NoSuchElementException as e:
7269
print("Couldn't scrap the element, Unable to locate it")
7370
problem=None
7471
except TimeoutException as exception:
7572
print("Couldn't scrap the element, Unable to locate it")
7673
problem=None
7774

78-
79-
80-
81-
82-
#storing the information in the pdf
83-
def convert_to_pdf(problem):
75+
def to_pdf(problem):
8476
pdf = FPDF()
8577
pdf.add_page()
8678
pdf.set_font("Arial", size = 15)
87-
# Replace character that aren't in latin-1 character set
79+
#set title
8880
title=problem["title"].encode('latin-1', 'replace').decode('latin-1')
81+
#set statement
8982
statement=problem["statement"].encode('latin-1', 'replace').decode('latin-1')
83+
#set test cases
9084
test_case=problem["test_case"].encode('latin-1', 'replace').decode('latin-1')
85+
#set url
9186
url=problem["url"]
92-
# add sections to pdf
9387
pdf.cell(200, 10, txt =title, ln = 1, align = 'C')
9488
pdf.multi_cell(200, 10, txt =statement, align = 'L')
9589
pdf.multi_cell(200, 10, txt =test_case, align = 'L')
9690
pdf.write(5, 'Problem_Link: ')
9791
pdf.write(5,url,url)
98-
9992
title = title.rstrip()
10093
pdf.output(title+".pdf")
10194

10295

103-
#main function
10496
def main():
105-
category=input("Enter the difficulty level from the following \n Easy \n Medium \n Hard \n\n")
106-
no_of_problems=int(input("\n Enter the number of problems to be scrapped: \n"))
107-
info = get_problems(problem_difficulty[category],no_of_problems)
97+
category=input("Choose difficulty level from \n Easy \n Medium \n Hard \n\n : ")
98+
no_of_problems=int(input("Enter the number of problems to be scrapped : "))
99+
info = get_problem(problem_difficulty[category], no_of_problems)
108100
for name, url in info.items():
109-
problem=get_problem_description(url,name)
101+
problem=get_description(url,name)
110102
if(problem is not None ):
111-
convert_to_pdf(problem)
103+
to_pdf(problem)
112104
else:
113105
pass
114106

115107
if __name__ == '__main__':
116108
main()
117109

110+
#Close the driver path
118111
driver.close()

0 commit comments

Comments
 (0)