# Scraping job data from Indeed

In [4]:
from bs4 import BeautifulSoup
import requests
from datetime import datetime
import csv

In [7]:
# Function to get the url for a particular job in a particular location
def url_builder(position, location):
    template = 'https://in.indeed.com/jobs?q={}&l={}'
    url = template.format(position, location)
    return url

# Function to extract the necessary information from a job posting
def job_information_extractor(card):
    atag = card.h2.span
    job_title = atag.get('title')
    job_url = 'https://in.indeed.com/' + card.h2.a.get('href')
    company = card.find('span', 'companyName').text.strip()
    job_location = card.find('div', 'companyLocation').text.strip()
    job_summary = card.find('div', 'job-snippet').text.strip()
    post_date = card.find('span', 'date').text
    try:
        job_salary = card.find('div', 'attribute_snippet').text.strip()
    except AttributeError:
        job_salary = ''
        
    record = [job_title, company, job_location, job_summary, post_date, job_salary, job_url]
    return record

# Function to be called for generating a CSV file with all the relevant information
def main_job_postings_information(position, location):
    records = []
    url = url_builder(position, location)
    
    # The while loop is meant for moving through the pages of job postings.
    while True:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        cards = soup.find_all('div', 'slider_container css-11g4k3a eu4oa1w0')
        
    # This for loop is meant to move through the job postings in a particular page.
        for card in cards:
            record = job_information_extractor(card)
            records.append(record)
        
        # This block of code checks if there is a next page
        try:
            url = 'https://in.indeed.com/' + soup.find('a', {'aria-label':'Next'}).get('href')
        except AttributeError:
            break
        
    # This block of code writes to a CSV file.    
    with open('jobs.csv', 'w', newline = '', encoding = 'utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Job Title', 'Company', 'Location', 'Summary', 'Post Date', 'Salary', 'Job Link'])
        writer.writerows(records)

In [6]:
# Run this function to generate a CSV file with Data Analyst jobs in Chennai, India. 
# You can change the arguments as you wish as long as the location is in India. 
# One potential change would be to allow a country argument as well, since Indeed has different URLs for different countries.
main_job_postings_information('Data Analyst', 'chennai, tn')