In [1]:
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning

requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

class UMichJob:
    def __init__(self, job_id):
        self.url = f'https://careers.umich.edu/job_detail/{job_id}/'
        self.job_id = job_id
        self.title = ''
        self.location = ''
        self.reg_temp = ''
        self.dept = ''
        self.start_dt = ''
        self.end_dt = ''
        self.salary_low = ''
        self.salary_high = ''
        self.career_interest = ''
    
    def __str__(self):
        return self.url
    
    def __repr__(self):
        return self.__str__()


def reached_end(soup):
    end_text = 'There are currently no posted jobs fitting the criteria you selected'
    p_tags = soup.find_all('p')
    for p in p_tags:
        if p != None and end_text in p.text:
            return True
    return False

def get_jobs(career_interest = 'All', page_limit = 50, job_limit = None):
    jobs = []
    
    for pageNum in range(0,page_limit):
        print(f'Scanning page {pageNum+1}...')
        
        url = f'https://careers.umich.edu/search-jobs?career_interest={career_interest}&page={pageNum}'
        response = requests.get(url, verify=False) # TODO: remove this once their site starts working again
        soup = BeautifulSoup(response.text, 'html.parser')

        if reached_end(soup):
            print(f'\nReached the end on page number {pageNum+1}')
            break

        a_tags = soup.find_all('a')
        for a in a_tags:
            href = a.get('href')
            if href != None and 'job_detail' in href:
                job_id = href.split('/')[2]
                jobs.append(UMichJob(job_id))
        
        if job_limit != None and len(jobs) >= job_limit:
            break

    return jobs

def get_job_info(job):
        response = requests.get(job.url, verify=False)
        soup = BeautifulSoup(response.text, 'html.parser')
        
        div_tags = soup.find_all('div')
        
        for div in div_tags:
            h3 = div.find('h3')
            if h3 == None:
                continue
                
            p = div.find('p')
            if p == None:
                continue
                
            h3_text = h3.text.lower()
            p_text = p.text
                    
            if 'working title' in h3_text:
                job.title = p_text
            elif 'work location' in h3_text:
                job.location = p_text
            elif 'regular/temporary' in h3_text:
                job.reg_temp = p_text
            elif 'department' in h3_text:
                job.dept = p_text
            elif 'date' in h3_text:
                date_range = p_text.split(' - ')
                try:
                    job.start_dt = date_range[0]
                    job.end_dt = date_range[1]
                except:
                    print('\tError scraping dates')
            elif 'salary' in h3_text:
                salary_range = p_text.split(' - ')
                try:
                    job.salary_low = salary_range[0]
                    job.salary_high = salary_range[1]
                except:
                    print('\tError scraping salary')
            elif 'interest' in h3_text:
                interests = div.find_all('p')
                for i in interests:
                    job.career_interest += ';' + i.text
                job.career_interest = job.career_interest[1:]
                       

In [7]:
# IT = 210
jobs = get_jobs(career_interest=210)

print(f'{len(jobs)} jobs found')
print('Scraping job info...')
count = 0
for job in jobs:
    count += 1
    print(f'{count}: {job}')
    get_job_info(job)

job_dicts = []

for job in jobs:
    job_dicts.append(vars(job))
    
df = pd.DataFrame(job_dicts)
df

Scanning page 1...
Scanning page 2...

Reached the end on page number 2
21 jobs found
Scraping job info...
1: https://careers.umich.edu/job_detail/237148/
2: https://careers.umich.edu/job_detail/237266/
3: https://careers.umich.edu/job_detail/237215/
4: https://careers.umich.edu/job_detail/237182/
5: https://careers.umich.edu/job_detail/237155/
6: https://careers.umich.edu/job_detail/236058/
7: https://careers.umich.edu/job_detail/237143/
8: https://careers.umich.edu/job_detail/237152/
9: https://careers.umich.edu/job_detail/237078/
10: https://careers.umich.edu/job_detail/237121/
11: https://careers.umich.edu/job_detail/237003/
12: https://careers.umich.edu/job_detail/236894/
13: https://careers.umich.edu/job_detail/236903/
14: https://careers.umich.edu/job_detail/236815/
15: https://careers.umich.edu/job_detail/236840/
16: https://careers.umich.edu/job_detail/236747/
17: https://careers.umich.edu/job_detail/236758/
18: https://careers.umich.edu/job_detail/233405/
19: https://careers.

Unnamed: 0,url,job_id,title,location,reg_temp,dept,start_dt,end_dt,salary_low,salary_high,career_interest
0,https://careers.umich.edu/job_detail/237148/,237148,ERP Business Systems Analyst,Ann Arbor Campus,Regular,ITS EAS HR Admin,7/24/2023,8/07/2023,"$88,000.00","$100,000.00",Information Technology
1,https://careers.umich.edu/job_detail/237266/,237266,Programmer Senior/Intermediate,Ann Arbor Campus,Regular,Biostatistics Department,7/21/2023,8/18/2023,"$87,242.00","$107,770.00",Information Technology
2,https://careers.umich.edu/job_detail/237215/,237215,Senior Full Stack and Systems Programmer,Ann Arbor Campus,Regular,MM Pathology Department,7/20/2023,7/27/2023,,,Information Technology
3,https://careers.umich.edu/job_detail/237182/,237182,Service Center Technician Associate,Ann Arbor Campus,Regular,DENT Informatics,7/20/2023,8/03/2023,"$43,000.00","$50,000.00",Information Technology
4,https://careers.umich.edu/job_detail/237155/,237155,Application Developer Intermediate,Ann Arbor Campus,Regular,ITS T&L Teaching and Learning,7/20/2023,8/03/2023,"$70,916.00","$78,795.00",Information Technology
5,https://careers.umich.edu/job_detail/236058/,236058,Data Integration Specialist for BIM,Ann Arbor Campus,Regular,Facilities & Operations - IS,7/20/2023,8/03/2023,"$90,000.00","$105,000.00",Information Technology
6,https://careers.umich.edu/job_detail/237143/,237143,Business Systems Analyst Associate,Ann Arbor Campus,Regular,ITS SS Support Services,7/19/2023,8/02/2023,"$55,800.00","$62,000.00",Information Technology
7,https://careers.umich.edu/job_detail/237152/,237152,Research Cloud Solution Designer,Ann Arbor Campus,Regular,ITS ARC Advanced Research Comp,7/19/2023,8/16/2023,"$70,000.00","$80,000.00",Information Technology
8,https://careers.umich.edu/job_detail/237078/,237078,Database Analyst/Programmer Sr,Ann Arbor Campus,Regular,MM Ophthal. & Visual Science,7/19/2023,8/18/2023,,,Information Technology
9,https://careers.umich.edu/job_detail/237121/,237121,Drupal Developer Intermediate,Dearborn Campus,Regular,Dbn External Relations,7/18/2023,8/01/2023,"$80,000.00","$90,000.00",Information Technology


In [8]:
df.to_csv('umich_jobs.csv', index=False)