Update scraper.py

aloviza · May 19, 2019 · 68d0907 · 68d0907
1 parent 77c3a3f
commit 68d0907
Showing 1 changed file with 5 additions and 5 deletions.
diff --git a/scraper.py b/scraper.py
@@ -126,8 +126,8 @@ def get_start_range(what = WHAT, where = WHERE):
 		search_count = content.text.lstrip()
 	#problem area:
 	# Extract total number of jobs found from the search_count string.
-	# pattern = r'\s[0-9]+$'
-	# total_jobs = int(re.findall(pattern, search_count)[0].lstrip())
+	pattern = r'\s[0-9]+$'
+	total_jobs = int(re.findall(pattern, search_count)[0].lstrip())
 
 	# Set start range.
 	start_range = list(np.arange(0, total_jobs, 10))
@@ -139,7 +139,7 @@ def get_start_range(what = WHAT, where = WHERE):
 def scrape_all_pages(what = WHAT, where = WHERE):
 	# TODO: docstring
 # problem line:
-	# start_range = get_start_range(what, where)
+	start_range = get_start_range(what, where)
 
 	all_job_data = defaultdict(str)
 
@@ -168,7 +168,7 @@ def scrape_indeed(what=WHAT, where=WHERE, record_csv=RECORD_CSV, record_db=RECOR
 
 	print("Starting to scrape Indeed: {0} in {1}.".format(what, where))
 #problem line:
-	# data = scrape_all_pages(what, where)
+	data = scrape_all_pages(what, where)
 	scrape_date = EXECTIME.strftime("%Y%m%d")
 
 	if record_csv:
@@ -184,6 +184,6 @@ def scrape_indeed(what=WHAT, where=WHERE, record_csv=RECORD_CSV, record_db=RECOR
 
 	print("Scraping completed.")
 
-
+# problem line 189
 if START_SCRAPING:
 	scrape_indeed()