In [1]:
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning

requests.packages.urllib3.disable_warnings(InsecureRequestWarning)



In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

class UMichJob:
    def __init__(self, job_id):
        self.url = f'https://careers.umich.edu/job_detail/{job_id}/'
        self.job_id = job_id
        self.title = ''
        self.location = ''
        self.reg_temp = ''
        self.dept = ''
        self.start_dt = ''
        self.end_dt = ''
        self.salary_low = ''
        self.salary_high = ''
        self.career_interest = ''
    
    def __str__(self):
        return self.url
    
    def __repr__(self):
        return self.__str__()


def reached_end(soup):
    end_text = 'There are currently no posted jobs fitting the criteria you selected'
    p_tags = soup.find_all('p')
    for p in p_tags:
        if p != None and end_text in p.text:
            return True
    return False

def get_jobs(career_interest = 'All'):
    jobs = []
    for pageNum in range(0,100):
        print(f'Scanning page {pageNum}...')
        
        url = f'https://careers.umich.edu/search-jobs?career_interest={career_interest}&page={pageNum}'
        response = requests.get(url, verify=False) # TODO: remove this once their site starts working again
        soup = BeautifulSoup(response.text, 'html.parser')

        if reached_end(soup):
            print(f'\nReached the end on page number {pageNum}')
            break

        a_tags = soup.find_all('a')
        for a in a_tags:
            href = a.get('href')
            if href != None and 'job_detail' in href:
                job_id = href.split('/')[2]
                jobs.append(UMichJob(job_id))
    return jobs

def get_job_info(job):
        response = requests.get(job.url, verify=False)
        soup = BeautifulSoup(response.text, 'html.parser')
        
        div_tags = soup.find_all('div')
        
        for div in div_tags:
            h3 = div.find('h3')
            if h3 == None:
                continue
                
            p = div.find('p')
            if p == None:
                continue
                
            h3_text = h3.text.lower()
            p_text = p.text
                    
            if 'working title' in h3_text:
                job.title = p_text
            elif 'work location' in h3_text:
                job.location = p_text
            elif 'regular/temporary' in h3_text:
                job.reg_temp = p_text
            elif 'department' in h3_text:
                job.dept = p_text
            elif 'date' in h3_text:
                date_range = p_text.split(' - ')
                job.start_dt = date_range[0]
                job.end_dt = date_range[1]
            elif 'salary' in h3_text:
                salary_range = p_text.split(' - ')
                job.salary_low = salary_range[0]
                job.salary_high = salary_range[1]
            elif 'interest' in h3_text:
                interests = div.find_all('p')
                for i in interests:
                    job.career_interest += ';' + i.text
                job.career_interest = job.career_interest[1:]
                       

# IT = 210
jobs = get_jobs(210)

print(f'{len(jobs)} jobs found')
print('Scraping job info...')
count = 0
for job in jobs:
    count += 1
    if count > 100:
        break
    print(f'{count}: {job}')
    get_job_info(job)

job_dicts = []

for job in jobs:
    job_dicts.append(vars(job))
    
df = pd.DataFrame(job_dicts)
df

Scanning page 0...
Scanning page 1...

Reached the end on page number 1
20 jobs found
Scraping job info...
1: https://careers.umich.edu/job_detail/236980/
2: https://careers.umich.edu/job_detail/236894/
3: https://careers.umich.edu/job_detail/236903/
4: https://careers.umich.edu/job_detail/236815/
5: https://careers.umich.edu/job_detail/236840/
6: https://careers.umich.edu/job_detail/233693/
7: https://careers.umich.edu/job_detail/236758/
8: https://careers.umich.edu/job_detail/236749/
9: https://careers.umich.edu/job_detail/236710/
10: https://careers.umich.edu/job_detail/236747/
11: https://careers.umich.edu/job_detail/236668/
12: https://careers.umich.edu/job_detail/236642/
13: https://careers.umich.edu/job_detail/236560/
14: https://careers.umich.edu/job_detail/233405/
15: https://careers.umich.edu/job_detail/232686/
16: https://careers.umich.edu/job_detail/236436/
17: https://careers.umich.edu/job_detail/231143/
18: https://careers.umich.edu/job_detail/229396/
19: https://careers.

Unnamed: 0,url,job_id,title,location,reg_temp,dept,start_dt,end_dt,salary_low,salary_high,career_interest
0,https://careers.umich.edu/job_detail/236980/,236980,OS Programmer Intermediate,Ann Arbor Campus,Regular,ITS ARC Advanced Research Comp,7/17/2023,8/14/2023,"$70,000.00","$80,000.00",Information Technology
1,https://careers.umich.edu/job_detail/236894/,236894,MiSOC Technical Program Manager,Ann Arbor Campus,Regular,Merit Network,7/13/2023,7/27/2023,"$110,000.00","$120,000.00",Information Technology
2,https://careers.umich.edu/job_detail/236903/,236903,Systems Administrator Intermediate,Ann Arbor Campus,Regular,"LSA Dean: TS Res,Comp&InfrSvcs",7/13/2023,7/31/2023,"$69,000.00","$75,000.00",Information Technology
3,https://careers.umich.edu/job_detail/236815/,236815,Epic Technical Specialist,Michigan Medicine - Ann Arbor,Regular,MM HITS ETS InfraSvcs-MiChart,7/13/2023,7/30/2023,,,Information Technology
4,https://careers.umich.edu/job_detail/236840/,236840,Merit Support Center (MSC) Network Engineer,Ann Arbor Campus,Regular,Merit Network,7/12/2023,8/02/2023,"$65,000.00","$75,000.00",Information Technology
5,https://careers.umich.edu/job_detail/233693/,233693,MLabs Connectivity Team Business Systems Analyst,Michigan Medicine - Ann Arbor,Regular,MM Path MLabs Division,7/12/2023,7/19/2023,,,Information Technology;Pathology/Laboratory Se...
6,https://careers.umich.edu/job_detail/236758/,236758,DevOps Engineer,Ann Arbor Campus,Regular,ICPSR,7/10/2023,8/06/2023,"$75,000.00","$95,000.00",Information Technology
7,https://careers.umich.edu/job_detail/236749/,236749,Business Process Analyst,Ann Arbor Campus,Regular,Procurement Services,7/10/2023,7/24/2023,"$75,000.00","$85,000.00",Information Technology
8,https://careers.umich.edu/job_detail/236710/,236710,Software Engineer Integrations,Ann Arbor Campus,Regular,SL Technology Solutions,7/10/2023,7/24/2023,"$77,100.00","$90,000.00",Information Technology
9,https://careers.umich.edu/job_detail/236747/,236747,Software Programmer Intermediate,Ann Arbor Campus,Regular,"LSA Dean: TS Res,Comp&InfrSvcs",7/10/2023,8/07/2023,"$70,000.00","$77,000.00",Information Technology


In [3]:
df.to_csv('umich_jobs.csv', index=False)