In [1]:
import requests
from bs4 import BeautifulSoup
import json
import os
from datetime import datetime

In [2]:
company_urls = {
    "APC": "https://approcess.com/careers",
    "Abbvie": "https://careers.abbvie.com/en/jobs?q=&options=&page=1&la=53.3498053&lo=-6.2603097&ln=Dublin,%20Ireland&lr=100",
    "Astrazeneca": "https://careers.astrazeneca.com/location/ireland-jobs/7684/2963597/2",
    "Pfizer": "https://pfizer.wd1.myworkdayjobs.com/en-US/PfizerCareers?Location_Country=04a05835925f45b3a59406a2a6b72c8a&locations=e2d3979e3af101cb6c9c1a59076c3890",
    "BMS": "https://jobs.bms.com/careers?location=ireland",
    "MSD": "https://jobs.msd.com/gb/en/ireland-job-search?utm_source=google&utm_medium=sea&utm_campaign=emea-ie&utm_content=branded&gclid=CjwKCAjwodC2BhAHEiwAE67hJBtaWGHg5w7tWTOeXFukL141m02EHQ2NEu7zg4139IxtTg1M7wxPsRoC9JcQAvD_BwE",
    "Takeda": "https://jobs.takeda.com/search-jobs/Ireland/1113/2/2963597/53/-8/50/2",
    "Amgen": "https://www.amgen.jobs/irl/jobs/",
    "Icon plc": "https://careers.iconplc.com/jobs?options=1469&page=1",
    "Vle therapeutics": "https://www.vletherapeutics.com/careers",
    "Astellas": "https://astellas.avature.net/en_GB/careers/SearchJobs/?1329=%5B180801%5D&1329_format=1348&listFilterMode=1"
}

In [3]:
def APC():
    try:
        response = requests.get(company_urls["APC"])
        response.raise_for_status()
        soup = BeautifulSoup(response.content,'lxml')
        table = soup.find('table')
        rows = table.find_all('tr')[1:]
        job_details = []
        for row in rows:
            title = row.find('td',class_='title title--quaternary').text.strip()
            closing_date = row.find('td',class_='title title--senary').text.strip()
            link = row.find('a')['href']
            job_details.append({
                'company':'APC',
                'title':title,
                'application link':link,
                'closing_date':closing_date,
                'job portal link':company_urls['APC']
            })
        return job_details
    except requests.exceptions.RequestException as e:
        print(f"Error fetching job details: {e}")
        return []
        

In [4]:
def Abbvie():
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        response = requests.get(company_urls['Abbvie'],headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'lxml')
        job_tiles = soup.find_all('a', class_='attrax-vacancy-tile__title')
        jobs = []
        for tile in job_tiles:
            job_title = tile.get_text(strip=True)
            job_url = tile['href']
            jobs.append({
                'company':'Abbvie',
                'title': job_title,
                'application link': 'https://careers.abbvie.com'+job_url,
                'job portal link':company_urls['Abbvie']
                })
        return jobs
    except requests.exceptions.RequestException as e:
        print(f"Error fetching job details: {e}")
        return []

In [5]:
def Astrazeneca():
    try:   
        base_url = company_urls['Astrazeneca']
        jobs = []
        page=1
        while True:
            url=f'{base_url}/{page}'
            response = requests.get(url)
            response.raise_for_status()
            if response.status_code != 200:
                break
            soup = BeautifulSoup(response.content,'lxml')
            job_tiles = soup.find_all('a',class_='search-results-link')
            if not job_tiles:
                break
            for tile in job_tiles:
                job_title = tile.text.strip().split('\n')[0]
                job_url = tile['href']
                jobs.append({
                    'company':'Astrazeneca',
                    'title':job_title,
                    'application url':'https://careers.astrazeneca.com/'+job_url,
                    'job portal link': company_urls['Astrazeneca']
                })
            page+=1
        return jobs
    except requests.exceptions.RequestException as e:
        print(f"Error fetching job details: {e}")
        return []

In [6]:
response = requests.get(company_urls['MSD'])
response.raise_for_status()
soup = BeautifulSoup(response.content, 'lxml')
jobs = soup.find_all('h3', class_='job-card-title')
soup

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1.0, maximum-scale=6.0" name="viewport"/>
<meta content="strict-origin-when-cross-origin" name="referrer"/>
<meta content="ImZjOGY0ZDMyMWJhM2Y4ZWM2YjY4ZTA3NThhOWZjMTg3NzQ2NzQ0YmMi.GbpjYA.6utTWXINEPebda3G30u_nEQxLEE" name="_csrf"/>
<link href="https://www.bms.com/favicon.ico" rel="icon" sizes="32x32"/>
<meta content="IE=edge, chrome=1" http-equiv="X-UA-Compatible"/>
<meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1, maximum-scale=6, minimum-scale=1" name="viewport"/>
<meta content="Careers @BMS" name="description"/>
<meta content="https://static.vscdn.net/images/careers/demo/bms/1719339615::PCS-Header-GPS.jpg" name="og:image" property="og:image"/>
<meta content="Careers @BMS" name="og:description" property="og:description"/>
<meta content="Careers @BMS" name="og:title" property="og:title"/>
<meta content="http://jobs.bms.com/careers?location=ireland" 

In [7]:
def get_all_job_postings():
    all_jobs = {}

    all_jobs["APC"] = APC()
    all_jobs["Abbvie"] = Abbvie()
    all_jobs["Astrazeneca"] = Astrazeneca()

    return all_jobs

def load_previous_jobs(filename):
    if os.path.exists(filename):
        with open(filename, 'r') as f:
            return json.load(f)
    else:
        return {}

def find_new_jobs(previous_jobs, current_jobs):
    new_jobs = {}
    
    for company, jobs in current_jobs.items():
        if company not in previous_jobs:
            new_jobs[company] = jobs
        else:
            previous_titles = {job["title"] for job in previous_jobs[company]}
            new_jobs_for_company = [job for job in jobs if job["title"] not in previous_titles]
            
            if new_jobs_for_company:
                new_jobs[company] = new_jobs_for_company
    
    return new_jobs

def update_json_file(filename, current_jobs):
    with open(filename, 'w') as f:
        json.dump(current_jobs, f, indent=4)

In [8]:
def main():
    # Define the JSON file path
    json_file = "jobs.json"
    
    # Get the current job postings from all companies
    current_jobs = get_all_job_postings()
    
    # Load the previous job postings from the JSON file
    previous_jobs = load_previous_jobs(json_file)
    
    # Identify new job postings
    new_jobs = find_new_jobs(previous_jobs, current_jobs)
    
    # Print new jobs
    if new_jobs:
        print("New job postings found:")
        for company, jobs in new_jobs.items():
            print(f"\n{company}:")
            for job in jobs:
                print(job)
    else:
        print("No new job postings found.")
    
    # Update the JSON file with the current job postings
    update_json_file(json_file, current_jobs)

In [9]:
main()

New job postings found:

Abbvie:
{'company': 'Abbvie', 'title': 'Associate Director of Technical Operations', 'application link': 'https://careers.abbvie.com/en/job/associate-director-of-technical-operations-in-dublin-county-dublin-jid-7601', 'job portal link': 'https://careers.abbvie.com/en/jobs?q=&options=&page=1&la=53.3498053&lo=-6.2603097&ln=Dublin,%20Ireland&lr=100'}


In [10]:
main()

No new job postings found.
