-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrapper.py
33 lines (25 loc) · 899 Bytes
/
scrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from bs4 import BeautifulSoup
import requests
def scrape_problems():
url = "https://sih.gov.in/sih2022PS"
req = requests.get(url)
# print(req.content)
soup = BeautifulSoup(req.text, "html.parser")
# print(soup.prettify())
content = soup.find("table", {"id": "dataTablePS"})
# print(content)
table = content.find("tbody")
problems = []
for row in table.findChildren("tr", recursive=False):
data = []
for table_data in row.find_all("td"):
data.append(table_data.text.strip().replace("\n", "").replace(" ", ""))
desc = row.find("table", {"id": "settings"}).find("tr").find("td").text.strip()
problem_dict = {
"id": data[0],
"code": data[10],
"institution": data[1],
"problem_statement": desc,
}
problems.append(problem_dict)
return problems