# Web Scraping (Static Website)

In [1]:
from bs4 import BeautifulSoup
import requests

html_text = requests.get('https://www.timesjobs.com/candidate/job-search.html?searchType=personalizedSearch&from=submit&txtKeywords=Python&txtLocation=').text

soup = BeautifulSoup(html_text, 'lxml')

jobs = soup.find_all('li', class_="clearfix job-bx wht-shd-bx")
for job in jobs:
    published_date = job.find('span', class_="sim-posted").span.text.strip()
    if 'few' in published_date:
        company_name = job.find('h3', class_="joblist-comp-name").text.replace(' ', '')
        skills_needed = job.find('span', class_="srp-skills").text.replace(' ', '')
        # the link to the full-description of the job is under header-->h2-->a tag
        # with the link being under the "href"
        more_info = job.header.h2.a['href']

        print(f"Company Name: {company_name.strip()}")
        print(f"Required Skills: {skills_needed.strip()}")
        print(f"More Information: {more_info}")

        print('')

Company Name: PureTechCodexPrivateLimited
Required Skills: rest,python,database,django,debugging,mongodb
More Information: https://www.timesjobs.com/job-detail/python-pure-tech-codex-private-limited-pune-2-to-3-yrs-jobid-OHwfF0d6EhNzpSvf__PLUS__uAgZw==&source=srp

Company Name: GeminiSolutions
Required Skills: python,mobile,svn,nosql,pythonscripting,git,api,sqldatabase
More Information: https://www.timesjobs.com/job-detail/qa-python-python-sdet-gemini-solutions-gurgaon-4-to-7-yrs-jobid-eGMLzwOk2QlzpSvf__PLUS__uAgZw==&source=srp

Company Name: TandAHRSolutions
Required Skills: Djangoframework,PythonDeveloper,corepython
More Information: https://www.timesjobs.com/job-detail/python-developer-tanda-hr-solutions-mohali-3-to-5-yrs-jobid-GTT0grHZP1tzpSvf__PLUS__uAgZw==&source=srp

Company Name: eastindiasecuritiesltd.
Required Skills: python,hadoop,machinelearning
More Information: https://www.timesjobs.com/job-detail/python-engineer-east-india-securities-ltd-kolkata-2-to-5-yrs-jobid-KEkE19Wq

## Filtering Data
- Filter with owned skills

In [3]:
from bs4 import BeautifulSoup
import requests

print('What are the skills you\'re not familiar with?')
print('When you\'re done, press ENTER:')
unfamiliar_skills = []
skill = input('>>')
while(skill != ''):
    unfamiliar_skills.append(skill)
    skill = input('>>')

print(f'Filtering out {unfamiliar_skills} from job search')


html_text = requests.get('https://www.timesjobs.com/candidate/job-search.html?searchType=personalizedSearch&from=submit&txtKeywords=Python&txtLocation=').text

soup = BeautifulSoup(html_text, 'lxml')

jobs = soup.find_all('li', class_="clearfix job-bx wht-shd-bx")
for job in jobs:
    published_date = job.find('span', class_="sim-posted").span.text.strip()
    if 'few' in published_date:
        company_name = job.find('h3', class_="joblist-comp-name").text.replace(' ', '').strip()
        skills_needed = job.find('span', class_="srp-skills").text.replace(' ', '').strip()

        skills = skills_needed.split(',')
        if(len(set(skills) & set(unfamiliar_skills)) == 0):

        # the link to the full-description of the job is under header-->h2-->a tag
        # with the link being under the "href"
            more_info = job.header.h2.a['href']

        #if unfamiliar_skills not in skills_needed:

            print(f"Company Name: {company_name}")
            print(f"Required Skills: {skills_needed}")
            print(f"More Information: {more_info}")

            print('')

What are the skills you're not familiar with?
When you're done, press ENTER:
>>django
>>javascript
>>
Filtering out ['django', 'javascript'] from job search
Company Name: GeminiSolutions
Required Skills: python,mobile,svn,nosql,pythonscripting,git,api,sqldatabase
More Information: https://www.timesjobs.com/job-detail/qa-python-python-sdet-gemini-solutions-gurgaon-4-to-7-yrs-jobid-eGMLzwOk2QlzpSvf__PLUS__uAgZw==&source=srp

Company Name: TandAHRSolutions
Required Skills: Djangoframework,PythonDeveloper,corepython
More Information: https://www.timesjobs.com/job-detail/python-developer-tanda-hr-solutions-mohali-3-to-5-yrs-jobid-GTT0grHZP1tzpSvf__PLUS__uAgZw==&source=srp

Company Name: eastindiasecuritiesltd.
Required Skills: python,hadoop,machinelearning
More Information: https://www.timesjobs.com/job-detail/python-engineer-east-india-securities-ltd-kolkata-2-to-5-yrs-jobid-KEkE19WqPbFzpSvf__PLUS__uAgZw==&source=srp

Company Name: YMGlobalTechnologiesPteLtd
Required Skills: python,apache,

## Automating 

In [None]:
from bs4 import BeautifulSoup
import requests
import time

print('What are the skills you\'re not familiar with?')
print('When you\'re done, press ENTER:')
unfamiliar_skills = []
skill = input('>>')
while(skill != ''):
    unfamiliar_skills.append(skill)
    skill = input('>>')

print(f'Filtering out {unfamiliar_skills} from job search')


def find_jobs(f, version):
    html_text = requests.get('https://www.timesjobs.com/candidate/job-search.html?searchType=personalizedSearch&from=submit&txtKeywords=Python&txtLocation=').text
    soup = BeautifulSoup(html_text, 'lxml')

    jobs = soup.find_all('li', class_="clearfix job-bx wht-shd-bx")
    for job in jobs:
        published_date = job.find('span', class_="sim-posted").span.text.strip()
        if 'few' in published_date:
            company_name = job.find('h3', class_="joblist-comp-name").text.replace(' ', '').strip()
            skills_needed = job.find('span', class_="srp-skills").text.replace(' ', '').strip()

            skills = skills_needed.split(',')
            if(len(set(skills) & set(unfamiliar_skills)) == 0):

                # the link to the full-description of the job is under header-->h2-->a tag
                # with the link being under the "href"
                more_info = job.header.h2.a['href']

                # put the job information every 30 min in a .txt file
                # formatted as jobs_version{version}.txt
                f.write(f"Company Name: {company_name} \n")
                f.write(f"Required Skills: {skills_needed} \n")
                f.write(f"More Information: {more_info} \n")
                f.write('\n')

    print(f'File Saved: jobs_version{version}.txt')


if __name__ == "__main__":
    version = 1
    while True:
        with open(f'jobs_pool/jobs_version{version}.txt', 'w') as f:
            find_jobs(f, version)
        # run every 30 minutes
        sleep_time = 30
        print(f"Waiting for {sleep_time} minutes ...")
        time.sleep(sleep_time * 60)
        version += 1