Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
aloviza committed May 19, 2019
1 parent 77c3a3f commit 68d0907
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions scraper.py
Expand Up @@ -126,8 +126,8 @@ def get_start_range(what = WHAT, where = WHERE):
search_count = content.text.lstrip()
#problem area:
# Extract total number of jobs found from the search_count string.
# pattern = r'\s[0-9]+$'
# total_jobs = int(re.findall(pattern, search_count)[0].lstrip())
pattern = r'\s[0-9]+$'
total_jobs = int(re.findall(pattern, search_count)[0].lstrip())

# Set start range.
start_range = list(np.arange(0, total_jobs, 10))
Expand All @@ -139,7 +139,7 @@ def get_start_range(what = WHAT, where = WHERE):
def scrape_all_pages(what = WHAT, where = WHERE):
# TODO: docstring
# problem line:
# start_range = get_start_range(what, where)
start_range = get_start_range(what, where)

all_job_data = defaultdict(str)

Expand Down Expand Up @@ -168,7 +168,7 @@ def scrape_indeed(what=WHAT, where=WHERE, record_csv=RECORD_CSV, record_db=RECOR

print("Starting to scrape Indeed: {0} in {1}.".format(what, where))
#problem line:
# data = scrape_all_pages(what, where)
data = scrape_all_pages(what, where)
scrape_date = EXECTIME.strftime("%Y%m%d")

if record_csv:
Expand All @@ -184,6 +184,6 @@ def scrape_indeed(what=WHAT, where=WHERE, record_csv=RECORD_CSV, record_db=RECOR

print("Scraping completed.")


# problem line 189
if START_SCRAPING:
scrape_indeed()

0 comments on commit 68d0907

Please sign in to comment.