In [124]:
import sqlite3
import requests
import csv
from bs4 import BeautifulSoup

In [82]:
#Requesting html file and reading it:
file = requests.get('https://realpython.github.io/fake-jobs')
data = BeautifulSoup(file.content)

In [111]:
#1. **Scraping Requirements**:
job_listings = []
jobs = data.find_all('div',class_ = "card-content")
for job in jobs:
    title = job.find('h2').text
    company = job.find('h3').text
    location = job.find(class_="location").text.strip()
    #to scrap job description, we need to request for html file using job application link.
    applicattion_link = job.find_all('a',class_="card-footer-item")[1]['href']
    soup = BeautifulSoup(requests.get(applicattion_link).content)
    description = soup.find(class_='content').find('p').text.strip()
    job_listings.append((title,company,location,description,applicattion_link))

In [115]:
#2. **Data Storage**:
db_name = 'jobs.db'
with sqlite3.connect(db_name) as conn:
    cursor = conn.cursor()
    cursor.execute('DROP TABLE IF EXISTS jobs')
    
    # Creating the jobs table
    cursor.execute("""
    CREATE TABLE jobs (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        title TEXT NOT NULL,
        company TEXT NOT NULL,
        location TEXT NOT NULL,
        description TEXT,
        link TEXT,
        UNIQUE(title, company, location)
    );
    """)

print("Table created successfully!")

Table created successfully!


In [None]:
#3. Store the scraped data into an SQLite database:
#Insert new jobs and update existing ones if details change:
for title, company, location, description, link in job_listings:
    cursor.execute("SELECT description, link FROM jobs WHERE title = ? AND company = ? AND location = ?",(title,company,location))
    existing_job = cursor.fetchone()
    if existing_job:
       #4.Check if description or application link has changed
       if existing_job[0] != description or existing_job[1] != link:
           cursor.execute("UPDATE jobs SET description = ? AND link = ? WHERE title = ? AND company = ? AND location = ?",(description,link,title,company,location))
    else:
        cursor.execute("INSERT INTO jobs (title,company,location,description,link) Values(?,?,?,?,?);",(title,company,location,description,link))
conn.commit()       

In [144]:
#5. **Filtering and Exporting**:
def filter_export(location=None,company=None):
    with sqlite3.connect(db_name) as conn:
        cursor = conn.cursor()
        query = "SELECT title, company, location, description, link FROM jobs WHERE 1=1"
        params = []
    
        if location:
            query += " AND location LIKE ?"
            params.append(f"%{location}%")
        if company:
            query += " AND company LIKE ?"
            params.append(f"%{company}%")

        cursor.execute(query, params)
        results = cursor.fetchall()
        filename = 'filtered_results.csv'
        with open(filename, mode="w", newline="", encoding="utf-8") as file:
            writer = csv.writer(file)
            writer.writerow(["Title", "Company", "Location", "Description", "Application Link"])
            writer.writerows(results)
    return results

In [146]:
# Let's ask the user to filter jobs
print("Do you want to filter jobs by location or company name?")
choice = int(input("Please choose 1 for location or 2 for company name: "))

if choice == 1:
    loc = input("Please enter location: ")
    filtered_jobs = filter_export(location=loc)  # Assuming this function filters and exports
    if filtered_jobs:
        print("Jobs found successfully! Please check the new CSV file.")
    else:
        print("There are no jobs with the given location.")

elif choice == 2:
    com = input("Please enter company name: ")
    filtered_jobs = filter_export(company=com)
    if filtered_jobs:
        print("Jobs found successfully! Please check the new CSV file!")
    else:
        print("There is no job with the given company name!")

else:
    print("Invalid choice! Please enter 1 or 2.")


Do you want to filter jobs by location or company name?
Jobs found successfully! Please check the new CSV file!
