In [1]:
import re
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
from time import sleep
from functools import reduce

accounts: 
* rosette.scraper@gmail.com 15k
* jeunesse.scraper@gmail.com 20k
* krizia.scraper@gmail.com 25k
* kated.scraper@gmail.com 30k
* celina.scraper@gmail.com 35k

passwords:
* gmail: datascience
* jobstreet Datascience1

In [9]:
def totalpg(sesh, url):
    '''Scrape the first page of the search index for the total page count.
    
    Parameters:
        url: url of the page to scrape from.
        
    Returns: 
        pg_cnt: total page count.
    '''
    
#     url = '''https://www.jobstreet.com.ph/en/job-search/job-vacancy.php?area=1&option=1&location=60100%2C60200%2C60300%2C60400%2C61400%2C60700%2C60500%2C60800%2C61300%2C60900%2C61000%2C61100%2C61200%2C60600%2C61600%2C61500&specialization=192%2C193%2C191&job-source=1%2C64&classified=0&job-posted=0&sort=1&order=0&pg=1&src=16&srcr=1&ojs=4'''
    jobs = sesh.get(url)
    print(jobs)
    jobs_sp = BeautifulSoup(jobs.text, 'lxml')
    pg_cnt = jobs_sp.find('span',{'id':'job_count_range'}).text
    pg_cnt = pg_cnt.replace(',','')
    pg_cnt = int(np.ceil(int(re.findall('.*of (\d+) jobs', pg_cnt)[0])/20))
    print('Initializing Scrape in 3 seconds...')
    sleep(3)
    
    return pg_cnt

def job_scraper(head, url, salreq, start=1, t=5):
    '''Scrape and parse through each page of the job index and
    return a summary dataframe for all the jobs that met the 
    requirement (all IT jobs).
    
    The summary dataframe is also saved in the subdirectory named
    after the specified salary requirement.
    
    Parameters:
        sesh: session object with necessary headers for the scrape
        url: original url to scrape from
        salreq: salary requirement of the account
        t: sleep time
        
    Returns:
        df_sum: summary dataframe.
    '''
    sesh = requests.Session()
    sesh.headers.update(head)
    # Initialize summary list and end of page.
    summary_list = []
    end = totalpg(sesh, url)+1

    # Loop through the pages from start specified to x given by totalpg function.
    for pg in range(start, end):
        
        pg = str(pg)
        print(f'Scraping page: {pg} of {end}')
        
        # Create new url from the old url
        tmp = url.split('pg=1')
        n_url = f'pg={pg}'.join(tmp)
        
        # Scrape the new url and convert to soup.
        jobs = sesh.get(n_url)
        print(jobs)
        jobs_sp = BeautifulSoup(jobs.text, 'lxml')
        
        # Parse through soup using parse_i then append.
        summary_list.append(parse_i(jobs_sp, salreq, pg))
        sleep(t)
    
    # Aggregate into 1 summary dataframe
    df_sum = pd.DataFrame(reduce(lambda x, y: x.append(y), summary_list))
    df_sum.to_csv(str(salreq)+r'/summary')

    return df_sum

def salary_asked(df, salary):
    '''Appends the salary requirement of each account in the dataframe.
    
    Parameters:
        df: dataframe to append salary information on.
        salary: minimum salary requirement of the account.
    
    Returns:
        dataframe with salary requirement information.
    '''
    
    tmp_fill = (str(salary)+' ')*len(df)
    fill = tmp_fill.strip(' ').split(' ')
    df_sal = pd.DataFrame(fill, columns=['acc_salary'])

    return pd.concat([df, df_sal], axis=1)

def parse_i(jobs_sp, salreq, page):
    '''Parse through the index page for the general job details of
    each job_ad panel and return them as a dataframe.
    
    Each page is converted to a dataframe of job details. Which are
    also saved for backup purposes. Sub directories must be made and
    named after the salary requirement.
    
    Parameters:
        jobs_sp: soup element of the page to scrape.
        salreq: salary requirement set on the account.
        page: page number of the scraped page

    Returns:
        df_jobs: dataframe of job details for the given page.

    '''

    # Initialize storage, set default value for missing values.
    job_tit = []
    job_url = []
    job_com = []
    job_loc = []
    job_sal = []
    default = 'Not Specified'

    # Get panels with job_ads
    panels = jobs_sp.findAll('div',{'class':'panel-body'})
    panels = [item for item in panels
             if (len(item.attrs.values()) == 2) and
             (re.match('job_ad_\d+$',list(item.attrs.values())[1]))]

    # Iterate through the panel list and retrieve general job information.
    for panel in panels:
        titleurl = panel.find('a',{'class':"position-title-link"})
        company = panel.find('a',{'class':'company-name'})
        location = panel.find('li',{'class':'job-location'})
        salary = panel.find('font',{'class':''})
        
        # Append job information to lists.
        job_tit.append(titleurl.text)
        job_url.append(titleurl['href'])

        # Fill blank values with default value
        if company is None:
            job_com.append(default)
        else:
            job_com.append(company.text)
        if location is None:
            job_loc.append(default)
        else:
            job_loc.append(location.text)
        if salary is None:
            job_sal.append(default)
        else:
            job_sal.append(salary.text)
    
    # Concatenate into 1 dataframe
    df_jobs = pd.DataFrame(list(zip(job_tit, job_com, job_loc, job_sal, job_url)),
                           columns=['title', 'url', 'company', 'location', 'salary'])
    
    # Append the salary requirement using salary_asked function
    df_jobs = salary_asked(df_jobs, salreq)
    df_jobs.to_csv(str(salreq)+r'/'+str(page))

    return df_jobs

In [10]:
# Always get the headers for the account before scraping:

# 15000
# rosette_h = {x[0]:x[1] for x in re.findall('(.*?): (.*?)$',
# '''accept-language: en-US,en;q=0.9
# cookie: hfsc=ncp; __cfduid=d631955d99c1feebd976ef8d09f37ea991552360459; _fbp=fb.2.1552360461894.1126984972; s_fid=61C99ED1D8B4EEC2-0F4DBFCD8C800D60; _gcl_au=1.1.531132442.1552360462; s_vi=[CS]v1|2E439206852A06F6-60000104A00013E7[CE]; __gads=ID=70ffd9433aec64cb:T=1553156077:S=ALNI_MZA-XH_iL3lYNUzv7SJCTBJevHmyQ; D_SID=119.92.176.131:HY+GmwIq8CEp92W1nUdjktKtSW+Y1lRJy9i5AA4ER2A; sol_id=8fb83ff9-7502-4a1e-ae1d-1896c5f2255c; _ga=GA1.3.752998390.1553238686; ins-mig-done=1; D_IID=B16AF4B7-52D2-32B5-9EC8-0C365AF443DF; D_UID=4EDF59C4-1D9D-3B8B-B0FF-592D04DFEC43; gatcmr=PHLOGGED; scs=%7B%22t%22%3A1%7D; fbHide=true; _gid=GA1.3.1637734095.1559930026; current-currency=; ___utmvm=###########; s_cc=true; __utmc=1; __cfruid=6381b2d8d4092e77dee718671e56cd7c67358501-1559964167; LDSESSIONID=ddhf4rrtd0nnkcei9grklfsns4; __utmz=1.1559978227.12.7.utmcsr=myjobstreet.jobstreet.com.ph|utmccn=(referral)|utmcmd=referral|utmcct=/home/index.php; YROTSIH=h%3Au%3A%7Bm%3AD%3A%22SFUa_bU9P%22%3Bm%3Au0%3A%223zmlnnl.mj3h1l3%40WxhYw.jzx%22%3Bm%3AuE%3A%22QMGU9FP7J_U9UUKML_K8%22%3Bm%3AuA%3A%22kkXV633nkEyyvjlYDW3vwVmym6%22%3B%7D; EMUSER=h%3Atu%3A%7Bm%3AD%3A%223lmoxl_Yk%22%3Bm%3AC%3A%226uBtCuBE%22%3Bm%3AtA%3A%223lmoxl_n41l_jzkl%22%3Bm%3Au%3A%2250%22%3Bm%3Au6%3A%223lmoxl_jzx1wlnlylmm_jzkl%22%3BY%3AtA0tD%3Bm%3Au6%3A%223lmoxl_pl3YVYjhnYzy_jzkl%22%3Bm%3At%3A%22E%22%3Bm%3Aut%3A%22lr1l3Ylyjl_wlplw_jzkl%22%3Bm%3At%3A%22u%22%3Bm%3Au0%3A%223lmoxl_Yyjzx1wlnl_mljnYzy%22%3Bh%3A5%3A%7Bm%3At0%3A%22hkkYnYzyhw_YyVz%22%3Bm%3A6%3A%22n3ol%22%3Bm%3A0%3A%22mvYww%22%3Bm%3A6%3A%22n3ol%22%3Bm%3AC%3A%22whyWohWl%22%3Bm%3A6%3A%22n3ol%22%3B%7Dm%3At0%3A%223lmoxl_jzx1wlnl%22%3Bi%3At%3Bm%3Au0%3A%223lmoxl_mlh3jX_mnhnom_jzkl%22%3Bm%3At%3A%22u%22%3Bm%3Atu%3A%223lmoxl_1Xznz%22%3Bi%3AE%3Bm%3AtC%3A%223lmoxl_whmn_o1khnl%22%3Bm%3At%3A%22E%22%3Bm%3At6%3A%22phwYkhnl_lxhYw%22%3Bm%3At%3A%22t%22%3Bm%3Auu%3A%223lmoxl_zywYyl_jzx1wlnl%22%3Bi%3AE%3B%7D; OGLAETATSTRATSBA=B; GCI=53826956; NOITACILPPA=m%3AE%3A%22%22%3B; MOTSUC=rosette; LASPXE=%7B%22ccid%22%3A3%2C%22ccname%22%3A%22PHP%22%2C%22csval%22%3A%2215000.00%22%2C%22uval%22%3A%22288.19%22%2C%22cid%22%3A9440738619%7D; SDNERTBEW=100_9440738619%7CPhilippines%7CFresh+%2F+Entry+Level%7CIT%2FComputer+-+Hardware%7COWAInfoStart%7CStudent%7C0%7CBachelor%27s%2FCollege+Degree+%7CIT%2FComputer+-+Hardware%7C6%7C170%7C335.26; solUID=3c001b4a-89cb-11e9-9a8e-0050568048f8; D_ZID=E3E5B850-A92F-3C25-9C5D-5457F3CDD4C1; D_ZUID=803BE668-060B-306D-87A6-6A374641A0FB; D_HID=4E8EEC7A-6A2A-3359-8A76-F253247B9647; XXX=m%3AuA%3A%22nZABtkCWjwnXiVDCiZv5Xlh0Vu%22%3B; HTUA=m%3Au66%3A%22ylvzn%3D%3D%3DXMBUKiDhrcoW3PUzNlJ1fLvhWehOLY3WjuEDPGwNj6w%2Bd6zp%2Frpb8QkUXYxGVCVYOk6pPgT5l10Upw%2FznfN%2BkxbmZW6Ukd8U1mFKcOXF%2BQswIuGqAjnO3EFDMgNYsdBC9qvVIiSBkDi%2BJLYwY%2F%2BanSWogyIa9ekQiaSmyLy9M8MgG06EreKhOFKNC6quqiZnqKaGYTfB8s9Ief3KWPqNcKFSh50dg1w84lrzzZfjpcW%3D%22%3B; TNETSISREP=h%3At%3A%7Bm%3AuB%3A%22SFUa_bU9P_N9PUKUa9La_7MMRK9%22%3Bh%3A0%3A%7Bm%3AB%3A%22oml3_Yk%22%3Bm%3AtE%3A%22D66EB5CAtD%22%3Bm%3AD%3A%22ml3pl3_Yk%22%3BY%3AtEE%3Bm%3A0%3A%22nzvly%22%3Bm%3A5A%3A%22ij0CEVt0-500C-6BCl-h5V5-hu6jjjhl6BhB%22%3Bm%3AD%3A%22ml3Ylm_Yk%22%3Bm%3A5A%3A%22lV5u6Ekl-A6E5-6hjA-i5Dk-6uAEBBt5CEC6%22%3Bm%3AC%3A%223lxlxil3%22%3Bm%3At%3A%22t%22%3B%7D%7D; NIGOL_LAICOS=h%3A5%3A%7Bm%3AtC%3A%22F7aKc9_UM7KFS_bU9P%22%3Bm%3At%3A%22E%22%3Bm%3At0%3A%22F7aKc9_HG_aMR9L%22%3Bm%3At%3A%22E%22%3Bm%3Att%3A%22P9H9P9P_bPS%22%3Bm%3At6%3A%22Xzxl%2FYyklr.1X1%22%3B%7D; REFERP=h%3At5%3A%7Bm%3AtB%3A%22moimj3Y1nYzy_jzkl%22%3Bm%3A6%3A%22t66t%22%3Bm%3AtB%3A%22x4Zm_Vlhno3l_jzkl%22%3Bm%3A5%3A%22t5E%22%3Bm%3Au6%3A%22hkkYnYzyhw_Vlhno3l_jzklt%22%3Bm%3At%3A%22E%22%3Bm%3AtA%3A%22mnhnYj_mYnl_jzkl%22%3Bm%3At%3A%22E%22%3Bm%3AtC%3A%22mYnl_whyWohWl_jzkl%22%3Bm%3At%3A%225%22%3Bm%3A6%3A%22mYnl%22%3Bm%3Au%3A%221X%22%3Bm%3AtA%3A%22xYmj_mnhnom_jzkl%22%3Bm%3At%3A%22E%22%3Bm%3Atu%3A%22whykYyW_mYnl%22%3Bm%3Au%3A%221X%22%3Bm%3AC%3A%22whyWohWl%22%3Bm%3A0%3A%22ly_bU%22%3Bm%3Atu%3A%22Wlyl3Yj_mYnl%22%3Bm%3AE%3A%22%22%3Bm%3AD%3A%22yz_3lmoxl%22%3Bi%3AE%3Bm%3AuE%3A%221lhv_13zVYwl_lr1Y3lk%22%3Bm%3A6%3A%22N9eN%22%3Bm%3AtC%3A%22hwwzqlk_qz3v_4lh3m%22%3BY%3AtA%3B%7D; SEARCH_SUMMARY=+Across+Philippines%2C+Computer%2FInformation+Technology; __utma=1.752998390.1553238686.1560007851.1560010628.19; ins-gaSSId=4aaae472-84dd-ffd4-4c8a-39dae8306407_1560010629; HCRSEABOJ=h%3At%3A%7Bm%3AA%3A%22mhplk2%22%3Bm%3AuE0%3A%22++%2B%28x4Zm_qz3v_wzjhnYzy_jzkl_jmp%3A%28AEtEE+AEuEE+AE5EE+AE6EE+At6EE+AEBEE+AE0EE+AECEE+At5EE+AEDEE+AtEEE+AttEE+AtuEE+AEAEE+AtAEE+At0EE%29%29+%2B%28m1ljYhwYshnYzy%3A%28tDu+tD5+tDt%29%29+%2B%28Zzi_mzo3jl_jzkl%3A%28t%29%29+%2BZzi_1X_VwhW%3A1XVwWt%22%3B%7D; TBMCookie_15303015096444475054=439381001560013074/qxSP7WvVqPL8a7apVG5A+7LXpo=; ins-product-id=9178836; __utmt=1; __utmb=1.17.9.1560013755612; _gat_UA-82223804-8=1; insdrSV=173; inslastVisitedUrlit=https%3A%2F%2Fwww.jobstreet.com.ph%2Fen%2Fjob-search%2Fjob-vacancy.php%3Farea%3D1%26option%3D1%26location%3D60100%252C60200%252C60300%252C60400%252C61400%252C60700%252C60500%252C60800%252C61300%252C60900%252C61000%252C61100%252C61200%252C60600%252C61600%252C61500%26specialization%3D192%252C193%252C191%26job-source%3D1%252C64%26classified%3D0%26job-posted%3D0%26sort%3D1%26order%3D0%26pg%3D3%26src%3D16%26srcr%3D1%26ojs%3D4
# upgrade-insecure-requests: 1
# user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36
# ''', re.M)}

jeunesse_h = {x[0]:x[1] for x in re.findall('(.*?): (.*?)$',
'''cookie: hfsc=ncp; __cfduid=d631955d99c1feebd976ef8d09f37ea991552360459; _fbp=fb.2.1552360461894.1126984972; s_fid=61C99ED1D8B4EEC2-0F4DBFCD8C800D60; _gcl_au=1.1.531132442.1552360462; s_vi=[CS]v1|2E439206852A06F6-60000104A00013E7[CE]; __gads=ID=70ffd9433aec64cb:T=1553156077:S=ALNI_MZA-XH_iL3lYNUzv7SJCTBJevHmyQ; D_SID=119.92.176.131:HY+GmwIq8CEp92W1nUdjktKtSW+Y1lRJy9i5AA4ER2A; sol_id=8fb83ff9-7502-4a1e-ae1d-1896c5f2255c; _ga=GA1.3.752998390.1553238686; ins-mig-done=1; D_IID=B16AF4B7-52D2-32B5-9EC8-0C365AF443DF; D_UID=4EDF59C4-1D9D-3B8B-B0FF-592D04DFEC43; gatcmr=PHLOGGED; scs=%7B%22t%22%3A1%7D; fbHide=true; _gid=GA1.3.1637734095.1559930026; current-currency=; D_ZID=E3E5B850-A92F-3C25-9C5D-5457F3CDD4C1; D_ZUID=803BE668-060B-306D-87A6-6A374641A0FB; D_HID=4E8EEC7A-6A2A-3359-8A76-F253247B9647; ins-product-id=9178836; s_cc=true; ___utmvm=###########; __cfruid=2821d58f174fb9564b1e4d20f35b71b5f8609fb5-1560046372; __utmc=1; __utmz=1.1560046374.20.8.utmcsr=myjobstreet.jobstreet.com.ph|utmccn=(referral)|utmcmd=referral|utmcct=/home/index.php; LDSESSIONID=i3jdc8rkv4fcuh1fbqe8rsfk30; __utma=1.752998390.1553238686.1560046374.1560050105.21; ins-gaSSId=2ff9a0aa-25ab-d3f7-e27c-806f7749a353_1560050106; PHPSESSID=n0nf16jo0pjfpvn60bn7dcn396; TawkConnectionTime=0; __tawkuuid=e::jobstreet.com.ph::FfgL/bTBiVwrsrfbF9Y5jehLn6ScsOb0/6Ah2Ow01fn2yh8ZCCexzYOMfEdlvADB::2; _hjIncludedInSample=1; EMUSER=h%3Atu%3A%7Bm%3AD%3A%223lmoxl_Yk%22%3Bm%3AC%3A%226uBtCAtu%22%3Bm%3AtA%3A%223lmoxl_n41l_jzkl%22%3Bm%3Au%3A%2250%22%3Bm%3Au6%3A%223lmoxl_jzx1wlnlylmm_jzkl%22%3BY%3AtA0tD%3Bm%3Au6%3A%223lmoxl_pl3YVYjhnYzy_jzkl%22%3Bm%3At%3A%22E%22%3Bm%3Aut%3A%22lr1l3Ylyjl_wlplw_jzkl%22%3Bm%3At%3A%22u%22%3Bm%3Au0%3A%223lmoxl_Yyjzx1wlnl_mljnYzy%22%3Bh%3A5%3A%7Bm%3At0%3A%22hkkYnYzyhw_YyVz%22%3Bm%3A6%3A%22n3ol%22%3Bm%3A0%3A%22mvYww%22%3Bm%3A6%3A%22n3ol%22%3Bm%3AC%3A%22whyWohWl%22%3Bm%3A6%3A%22n3ol%22%3B%7Dm%3At0%3A%223lmoxl_jzx1wlnl%22%3Bi%3At%3Bm%3Au0%3A%223lmoxl_mlh3jX_mnhnom_jzkl%22%3Bm%3At%3A%22u%22%3Bm%3Atu%3A%223lmoxl_1Xznz%22%3Bi%3AE%3Bm%3AtC%3A%223lmoxl_whmn_o1khnl%22%3Bm%3At%3A%22t%22%3Bm%3At6%3A%22phwYkhnl_lxhYw%22%3Bm%3At%3A%22t%22%3Bm%3Auu%3A%223lmoxl_zywYyl_jzx1wlnl%22%3Bi%3AE%3B%7D; MOTSUC=jeunesse; LASPXE=%7B%22ccid%22%3A3%2C%22ccname%22%3A%22PHP%22%2C%22csval%22%3A%2220000.00%22%2C%22uval%22%3A%22384.26%22%2C%22cid%22%3A9440814201%7D; SDNERTBEW=100_9440814201%7CPhilippines%7CFresh+%2F+Entry+Level%7CProcess+Design+%26+Control%2FInstrumentation%7COWAInfoStart%7Cprogrammer%7C0%7CBachelor%27s%2FCollege+Degree+%7CProcess+Design+%26+Control%2FInstrumentation%7C6%7C170%7C447.01; OGLAETATSTRATSBA=B; GCI=53827325; YROTSIH=h%3Au%3A%7Bm%3AD%3A%22SFUa_bU9P%22%3Bm%3AuA%3A%22Zloylmml.mj3h1l3%40WxhYw.jzx%22%3Bm%3AuE%3A%22QMGU9FP7J_U9UUKML_K8%22%3Bm%3AuA%3A%22Y5ZkjC3vp6VjoXtVi2lC3mVv5E%22%3B%7D; solUID=72f9feae-89d0-11e9-8906-0050568048f8; XXX=m%3AuA%3A%22ou1XmZlWlYlEmXnCjnnj2uB1BB%22%3B; HTUA=m%3Au0A%3A%22ylvzn%3D%3D%3DU7IKzQxxJph3j0mXE9bdIt5EEnF4%2FLPTQ%2B0wSxQjRzSbmF1Va2Y7TO%2B7Z6mBjrwhK1EodzbkmRGnGVfEa5juNVhphY8MgwDQIt4wAstrhRSgXbJzLsPSd6SlJynOvzOQHMToHmRbTvPreZF6ee0hRltOgBYK764SMyt0wdsH6iWwPW%2Bvydns8HCmP4HiVN8JwRSCOAwTb%2FehemqyEoeTdjVzbjzw3%2FNMiHemUtWOx4lJBwI6iZO0RF%3D%3D%22%3B; TNETSISREP=h%3At%3A%7Bm%3AuB%3A%22SFUa_bU9P_N9PUKUa9La_7MMRK9%22%3Bh%3A0%3A%7Bm%3AB%3A%22oml3_Yk%22%3Bm%3AtE%3A%22D66ECt6uEt%22%3Bm%3AD%3A%22ml3pl3_Yk%22%3BY%3AtEE%3Bm%3A0%3A%22nzvly%22%3Bm%3A5A%3A%22EA6Bh6C6-uEju-6kkk-iEtl-00k5i0EjCi5E%22%3Bm%3AD%3A%22ml3Ylm_Yk%22%3Bm%3A5A%3A%22tEBE0DDV-AEVi-6ikj-hj06-0BE0uCDhV00E%22%3Bm%3AC%3A%223lxlxil3%22%3Bm%3At%3A%22t%22%3B%7D%7D; NIGOL_LAICOS=h%3A5%3A%7Bm%3AtC%3A%22F7aKc9_UM7KFS_bU9P%22%3Bm%3At%3A%22E%22%3Bm%3At0%3A%22F7aKc9_HG_aMR9L%22%3Bm%3At%3A%22E%22%3Bm%3Att%3A%22P9H9P9P_bPS%22%3Bm%3At6%3A%22Xzxl%2FYyklr.1X1%22%3B%7D; REFERP=h%3At5%3A%7Bm%3AtB%3A%22moimj3Y1nYzy_jzkl%22%3Bm%3A6%3A%22t66t%22%3Bm%3AtB%3A%22x4Zm_Vlhno3l_jzkl%22%3Bm%3A5%3A%22t5E%22%3Bm%3Au6%3A%22hkkYnYzyhw_Vlhno3l_jzklt%22%3Bm%3At%3A%22E%22%3Bm%3AtA%3A%22mnhnYj_mYnl_jzkl%22%3Bm%3At%3A%22E%22%3Bm%3AtC%3A%22mYnl_whyWohWl_jzkl%22%3Bm%3At%3A%225%22%3Bm%3A6%3A%22mYnl%22%3Bm%3Au%3A%221X%22%3Bm%3AtA%3A%22xYmj_mnhnom_jzkl%22%3Bm%3At%3A%22E%22%3Bm%3Atu%3A%22whykYyW_mYnl%22%3Bm%3Au%3A%221X%22%3Bm%3AC%3A%22whyWohWl%22%3Bm%3A0%3A%22ly_bU%22%3Bm%3Atu%3A%22Wlyl3Yj_mYnl%22%3Bm%3AE%3A%22%22%3Bm%3AD%3A%22yz_3lmoxl%22%3Bi%3AE%3Bm%3AuE%3A%221lhv_13zVYwl_lr1Y3lk%22%3Bm%3A6%3A%22N9eN%22%3Bm%3AtC%3A%22hwwzqlk_qz3v_4lh3m%22%3BY%3AtA%3B%7D; HCRSEABOJ=h%3At%3A%7Bm%3AA%3A%22mhplk2%22%3Bm%3A0t%3A%22++%2B%28Zzi_mzo3jl_jzkl%3A%28t+A6+tuC%29%29+%2BZzi_1X_VwhW%3A1XVwWt%22%3B%7D; TBMCookie_15303015096444475054=128310001560055034EFzz2014QgEd0fOo1Idffv6N6I4=; __utmt=1; _gat_UA-82223804-8=1; insdrSV=225; inslastVisitedUrlit=https%3A%2F%2Fwww.jobstreet.com.ph%2Fen%2Fjob-search%2Fjob-vacancy.php%3Fojs%3D1; __utmb=1.53.9.1560055062927
upgrade-insecure-requests: 1
user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36
''', re.M)}

krizia_h = {x[0]:x[1] for x in re.findall('(.*?): (.*?)$',
'''''', re.M)}
kated_h = {x[0]:x[1] for x in re.findall('(.*?): (.*?)$',
'''''', re.M)}
celina_h = {x[0]:x[1] for x in re.findall('(.*?): (.*?)$',
'''''', re.M)}

# Generate URL List:


In [11]:
# Initiate Session. Note: cookies change per user.

url = '''https://www.jobstreet.com.ph/en/job-search/job-vacancy.php?area=1&option=1&location=60100%2C60200%2C60300%2C60400%2C61400%2C60700%2C60500%2C60800%2C61300%2C60900%2C61000%2C61100%2C61200%2C60600%2C61600%2C61500&specialization=192%2C193%2C191&job-source=1%2C64&classified=0&job-posted=0&sort=1&order=0&pg=1&src=16&srcr=1&ojs=4'''
job_scraper(sesh, url, salreq=20000, start=1, t=5)


<Response [200]>
Initializing Scrape in 3 seconds...
Scraping page: 1 of 418
<Response [200]>
Scraping page: 2 of 418
<Response [200]>
Scraping page: 3 of 418
<Response [200]>
Scraping page: 4 of 418
<Response [200]>
Scraping page: 5 of 418
<Response [200]>
Scraping page: 6 of 418
<Response [200]>
Scraping page: 7 of 418
<Response [200]>
Scraping page: 8 of 418
<Response [200]>
Scraping page: 9 of 418
<Response [200]>
Scraping page: 10 of 418
<Response [200]>
Scraping page: 11 of 418
<Response [200]>
Scraping page: 12 of 418
<Response [200]>
Scraping page: 13 of 418
<Response [200]>
Scraping page: 14 of 418
<Response [200]>
Scraping page: 15 of 418
<Response [200]>
Scraping page: 16 of 418
<Response [200]>
Scraping page: 17 of 418
<Response [200]>
Scraping page: 18 of 418
<Response [200]>
Scraping page: 19 of 418
<Response [200]>
Scraping page: 20 of 418
<Response [200]>
Scraping page: 21 of 418
<Response [200]>
Scraping page: 22 of 418
<Response [200]>
Scraping page: 23 of 418
<Respon

Scraping page: 193 of 418
<Response [200]>
Scraping page: 194 of 418
<Response [200]>
Scraping page: 195 of 418
<Response [200]>
Scraping page: 196 of 418
<Response [200]>
Scraping page: 197 of 418
<Response [200]>
Scraping page: 198 of 418
<Response [200]>
Scraping page: 199 of 418
<Response [200]>
Scraping page: 200 of 418
<Response [200]>
Scraping page: 201 of 418
<Response [200]>
Scraping page: 202 of 418
<Response [200]>
Scraping page: 203 of 418
<Response [200]>
Scraping page: 204 of 418
<Response [200]>
Scraping page: 205 of 418
<Response [200]>
Scraping page: 206 of 418
<Response [200]>
Scraping page: 207 of 418
<Response [200]>
Scraping page: 208 of 418
<Response [200]>
Scraping page: 209 of 418
<Response [200]>
Scraping page: 210 of 418
<Response [200]>
Scraping page: 211 of 418
<Response [200]>
Scraping page: 212 of 418
<Response [200]>
Scraping page: 213 of 418
<Response [200]>
Scraping page: 214 of 418
<Response [200]>
Scraping page: 215 of 418
<Response [200]>
Scraping pa

Scraping page: 384 of 418
<Response [200]>
Scraping page: 385 of 418
<Response [200]>
Scraping page: 386 of 418
<Response [200]>
Scraping page: 387 of 418
<Response [200]>
Scraping page: 388 of 418
<Response [200]>
Scraping page: 389 of 418
<Response [200]>
Scraping page: 390 of 418
<Response [200]>
Scraping page: 391 of 418
<Response [200]>
Scraping page: 392 of 418
<Response [200]>
Scraping page: 393 of 418
<Response [200]>
Scraping page: 394 of 418
<Response [200]>
Scraping page: 395 of 418
<Response [200]>
Scraping page: 396 of 418
<Response [200]>
Scraping page: 397 of 418
<Response [200]>
Scraping page: 398 of 418
<Response [200]>
Scraping page: 399 of 418
<Response [200]>
Scraping page: 400 of 418
<Response [200]>
Scraping page: 401 of 418
<Response [200]>
Scraping page: 402 of 418
<Response [200]>
Scraping page: 403 of 418
<Response [200]>
Scraping page: 404 of 418
<Response [200]>
Scraping page: 405 of 418
<Response [200]>
Scraping page: 406 of 418
<Response [200]>
Scraping pa

Unnamed: 0,title,url,company,location,salary,acc_salary
0,ENTRY-LEVEL SOFTWARE DEVELOPER ASSOCIATE JOB |...,"Siegen HR Solutions, Inc.",National Capital Reg,"PHP 20,000 - 25,000",https://www.jobstreet.com.ph/en/job/entry-leve...,20000
1,IT Staff,Bank of the Philippine Islands (BPI),Makati City (National Capital Reg),Below Expected Salary,https://www.jobstreet.com.ph/en/job/it-staff-9...,20000
2,UI / UX -Developer,Asticom Technology Inc,National Capital Reg - Makati City,Above Expected Salary,https://www.jobstreet.com.ph/en/job/uiux-devel...,20000
3,Senior Software Developer,Asticom Technology Inc,National Capital Reg - Makati City,Above Expected Salary,https://www.jobstreet.com.ph/en/job/senior-sof...,20000
4,Network Troubleshooting - IT Helpdesk | Shaw,"Sykes Asia, Inc.","National Capital Reg - Ayala, Makati",Around Expected Salary,https://www.jobstreet.com.ph/en/job/network-tr...,20000
5,Hardware Troubleshooting - IT Helpdesk | Shaw,"Sykes Asia, Inc.",National Capital Reg,Around Expected Salary,https://www.jobstreet.com.ph/en/job/hardware-t...,20000
6,Associate Software Engineer | Alabang Recruitm...,Accenture,"Calabarzon & Mimaropa, Central Luzon, National...",Around Expected Salary,https://www.jobstreet.com.ph/en/job/associate-...,20000
7,Information Officer II,Intellectual Property Office of the Philippine...,National Capital Reg,Above Expected Salary,https://www.jobstreet.com.ph/en/job/informatio...,20000
8,IT STAFF,CHAVESNET ENTERPRISES,"C.A.R, National Capital Reg","PHP 13,500 - 17,600",https://www.jobstreet.com.ph/en/job/it-staff-9...,20000
9,Business Analyst (Insurance background),Accenture,"Calabarzon & Mimaropa, Central Luzon, National...",Above Expected Salary,https://www.jobstreet.com.ph/en/job/business-a...,20000


In [None]:
pg = 100
o_url = '''https://www.jobstreet.com.ph/en/job-search/job-vacancy.php?area=1&option=1&location=60100%2C60200%2C60300%2C60400%2C61400%2C60700%2C60500%2C60800%2C61300%2C60900%2C61000%2C61100%2C61200%2C60600%2C61600%2C61500&specialization=192%2C193%2C191&job-source=1%2C64&classified=0&job-posted=0&sort=1&order=0&pg=1&src=16&srcr=1&ojs=4'''
tmp = o_url.split('pg=1')
n_url = f'pg={pg}'.join(tmp)
print(n_url)

In [None]:
test = sesh.get('https://www.jobstreet.com.ph/en/job/php-developer-9178746?fr=J&src=16&srcr=1&searchRequestToken=665e04a8-5887-4fbb-f75e-f4b948c7ac33&sectionRank=29')
pg = BeautifulSoup(test.text,'lxml')

In [None]:
pg.findAll('div',{'class':'unselectable wrap-text','id':'job_description'})

In [25]:
df = pd.read_csv('15000/4')
display(df)

Unnamed: 0.1,Unnamed: 0,title,url,company,location,salary,acc_salary
0,0,,,,,,
