In [1]:
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning

requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

class UMichJob:
    def __init__(self, job_id):
        self.url = f'https://careers.umich.edu/job_detail/{job_id}/'
        self.job_id = job_id
        self.title = ''
        self.location = ''
        self.reg_temp = ''
        self.dept = ''
        self.start_dt = ''
        self.end_dt = ''
        self.salary_low = ''
        self.salary_high = ''
        self.career_interest = ''
    
    def __str__(self):
        return self.url
    
    def __repr__(self):
        return self.__str__()


def reached_end(soup):
    end_text = 'There are currently no posted jobs fitting the criteria you selected'
    p_tags = soup.find_all('p')
    for p in p_tags:
        if p != None and end_text in p.text:
            return True
    return False

def get_jobs(career_interest = 'All', page_limit = 50, job_limit = None):
    jobs = []
    
    for pageNum in range(0,page_limit):
        print(f'Scanning page {pageNum+1}...')
        
        url = f'https://careers.umich.edu/search-jobs?career_interest={career_interest}&page={pageNum}'
        response = requests.get(url, verify=False) # TODO: remove this once their site starts working again
        soup = BeautifulSoup(response.text, 'html.parser')

        if reached_end(soup):
            print(f'\nReached the end on page number {pageNum+1}')
            break

        a_tags = soup.find_all('a')
        for a in a_tags:
            href = a.get('href')
            if href != None and 'job_detail' in href:
                job_id = href.split('/')[2]
                jobs.append(UMichJob(job_id))
        
        if job_limit != None and len(jobs) >= job_limit:
            break

    return jobs

def get_job_info(job):
        response = requests.get(job.url, verify=False)
        soup = BeautifulSoup(response.text, 'html.parser')
        
        div_tags = soup.find_all('div')
        
        for div in div_tags:
            h3 = div.find('h3')
            if h3 == None:
                continue
                
            p = div.find('p')
            if p == None:
                continue
                
            h3_text = h3.text.lower()
            p_text = p.text
                    
            if 'working title' in h3_text:
                job.title = p_text
            elif 'work location' in h3_text:
                job.location = p_text
            elif 'regular/temporary' in h3_text:
                job.reg_temp = p_text
            elif 'department' in h3_text:
                job.dept = p_text
            elif 'date' in h3_text:
                date_range = p_text.split(' - ')
                try:
                    job.start_dt = date_range[0]
                    job.end_dt = date_range[1]
                except:
                    print('\tError scraping dates')
            elif 'salary' in h3_text:
                salary_range = p_text.split(' - ')
                try:
                    job.salary_low = salary_range[0]
                    job.salary_high = salary_range[1]
                except:
                    print('\tError scraping salary')
            elif 'interest' in h3_text:
                interests = div.find_all('p')
                for i in interests:
                    job.career_interest += ';' + i.text
                job.career_interest = job.career_interest[1:]
                       

In [5]:
# IT = 210
jobs = get_jobs(career_interest=210)

print(f'{len(jobs)} jobs found')
print('Scraping job info...')
count = 0
for job in jobs:
    count += 1
    print(f'{count}: {job}')
    get_job_info(job)

job_dicts = []

for job in jobs:
    job_dicts.append(vars(job))
    
df = pd.DataFrame(job_dicts)
df

Scanning page 1...
Scanning page 2...
Scanning page 3...
Scanning page 4...
Scanning page 5...
Scanning page 6...
Scanning page 7...
Scanning page 8...
Scanning page 9...
Scanning page 10...
Scanning page 11...
Scanning page 12...
Scanning page 13...
Scanning page 14...
Scanning page 15...
Scanning page 16...
Scanning page 17...
Scanning page 18...
Scanning page 19...
Scanning page 20...
Scanning page 21...
Scanning page 22...
Scanning page 23...
Scanning page 24...
Scanning page 25...
Scanning page 26...
Scanning page 27...
Scanning page 28...
Scanning page 29...
Scanning page 30...
Scanning page 31...
Scanning page 32...
Scanning page 33...
Scanning page 34...
Scanning page 35...
Scanning page 36...
Scanning page 37...
Scanning page 38...
Scanning page 39...
Scanning page 40...

Reached the end on page number 40
973 jobs found
Scraping job info...
1: https://careers.umich.edu/job_detail/237410/
2: https://careers.umich.edu/job_detail/237411/
3: https://careers.umich.edu/job_detail/23

149: https://careers.umich.edu/job_detail/233562/
150: https://careers.umich.edu/job_detail/230837/
151: https://careers.umich.edu/job_detail/231120/
152: https://careers.umich.edu/job_detail/230837/
153: https://careers.umich.edu/job_detail/230365/
154: https://careers.umich.edu/job_detail/230367/
155: https://careers.umich.edu/job_detail/230117/
156: https://careers.umich.edu/job_detail/229533/
157: https://careers.umich.edu/job_detail/228866/
158: https://careers.umich.edu/job_detail/229206/
159: https://careers.umich.edu/job_detail/228218/
160: https://careers.umich.edu/job_detail/227324/
161: https://careers.umich.edu/job_detail/223992/
162: https://careers.umich.edu/job_detail/213584/
163: https://careers.umich.edu/job_detail/215603/
164: https://careers.umich.edu/job_detail/205758/
165: https://careers.umich.edu/job_detail/237101/
166: https://careers.umich.edu/job_detail/237291/
167: https://careers.umich.edu/job_detail/237283/
168: https://careers.umich.edu/job_detail/237286/


313: https://careers.umich.edu/job_detail/237116/
314: https://careers.umich.edu/job_detail/237097/
315: https://careers.umich.edu/job_detail/237083/
316: https://careers.umich.edu/job_detail/237071/
317: https://careers.umich.edu/job_detail/237086/
318: https://careers.umich.edu/job_detail/237072/
	Error scraping salary
319: https://careers.umich.edu/job_detail/237073/
320: https://careers.umich.edu/job_detail/237089/
321: https://careers.umich.edu/job_detail/237074/
322: https://careers.umich.edu/job_detail/237058/
323: https://careers.umich.edu/job_detail/237077/
324: https://careers.umich.edu/job_detail/237079/
325: https://careers.umich.edu/job_detail/237080/
326: https://careers.umich.edu/job_detail/237045/
327: https://careers.umich.edu/job_detail/237047/
328: https://careers.umich.edu/job_detail/237008/
329: https://careers.umich.edu/job_detail/236949/
330: https://careers.umich.edu/job_detail/236830/
331: https://careers.umich.edu/job_detail/236561/
332: https://careers.umich.

477: https://careers.umich.edu/job_detail/233319/
478: https://careers.umich.edu/job_detail/231112/
479: https://careers.umich.edu/job_detail/229608/
480: https://careers.umich.edu/job_detail/227702/
481: https://careers.umich.edu/job_detail/225766/
482: https://careers.umich.edu/job_detail/224779/
483: https://careers.umich.edu/job_detail/213460/
484: https://careers.umich.edu/job_detail/236890/
485: https://careers.umich.edu/job_detail/236886/
486: https://careers.umich.edu/job_detail/236842/
487: https://careers.umich.edu/job_detail/236872/
488: https://careers.umich.edu/job_detail/236873/
489: https://careers.umich.edu/job_detail/236874/
490: https://careers.umich.edu/job_detail/236849/
491: https://careers.umich.edu/job_detail/236879/
492: https://careers.umich.edu/job_detail/236860/
493: https://careers.umich.edu/job_detail/236861/
494: https://careers.umich.edu/job_detail/236884/
495: https://careers.umich.edu/job_detail/236864/
496: https://careers.umich.edu/job_detail/236840/


640: https://careers.umich.edu/job_detail/236396/
641: https://careers.umich.edu/job_detail/236165/
642: https://careers.umich.edu/job_detail/236069/
643: https://careers.umich.edu/job_detail/235650/
644: https://careers.umich.edu/job_detail/234513/
645: https://careers.umich.edu/job_detail/233405/
646: https://careers.umich.edu/job_detail/206785/
647: https://careers.umich.edu/job_detail/236596/
648: https://careers.umich.edu/job_detail/236605/
649: https://careers.umich.edu/job_detail/236574/
650: https://careers.umich.edu/job_detail/236584/
651: https://careers.umich.edu/job_detail/236570/
652: https://careers.umich.edu/job_detail/236555/
653: https://careers.umich.edu/job_detail/236556/
654: https://careers.umich.edu/job_detail/236515/
655: https://careers.umich.edu/job_detail/236559/
656: https://careers.umich.edu/job_detail/236564/
657: https://careers.umich.edu/job_detail/236526/
658: https://careers.umich.edu/job_detail/236446/
659: https://careers.umich.edu/job_detail/236390/


804: https://careers.umich.edu/job_detail/235760/
805: https://careers.umich.edu/job_detail/231943/
806: https://careers.umich.edu/job_detail/231494/
807: https://careers.umich.edu/job_detail/228001/
808: https://careers.umich.edu/job_detail/235673/
809: https://careers.umich.edu/job_detail/235718/
810: https://careers.umich.edu/job_detail/235731/
811: https://careers.umich.edu/job_detail/233728/
812: https://careers.umich.edu/job_detail/227733/
813: https://careers.umich.edu/job_detail/227736/
814: https://careers.umich.edu/job_detail/225892/
815: https://careers.umich.edu/job_detail/225446/
816: https://careers.umich.edu/job_detail/225445/
817: https://careers.umich.edu/job_detail/235669/
818: https://careers.umich.edu/job_detail/235678/
819: https://careers.umich.edu/job_detail/235658/
820: https://careers.umich.edu/job_detail/235061/
821: https://careers.umich.edu/job_detail/219515/
822: https://careers.umich.edu/job_detail/219769/
823: https://careers.umich.edu/job_detail/235127/


	Error scraping dates
963: https://careers.umich.edu/job_detail/229378/
964: https://careers.umich.edu/job_detail/229410/
965: https://careers.umich.edu/job_detail/227529/
	Error scraping dates
966: https://careers.umich.edu/job_detail/228850/
967: https://careers.umich.edu/job_detail/228264/
968: https://careers.umich.edu/job_detail/207966/
969: https://careers.umich.edu/job_detail/227471/
970: https://careers.umich.edu/job_detail/227095/
	Error scraping dates
971: https://careers.umich.edu/job_detail/205316/
972: https://careers.umich.edu/job_detail/196632/
973: https://careers.umich.edu/job_detail/221046/
	Error scraping dates


Unnamed: 0,url,job_id,title,location,reg_temp,dept,start_dt,end_dt,salary_low,salary_high,career_interest
0,https://careers.umich.edu/job_detail/237410/,237410,Patron Services Assistant,Ann Arbor Campus,Temporary,UMS Patron Services,7/25/2023,8/15/2023,,,Temporary Job Opening
1,https://careers.umich.edu/job_detail/237411/,237411,Patient Care Tech Associate,Michigan Medicine - Ann Arbor,Regular,MM UH CVC 7C,7/25/2023,8/08/2023,,,"Patient Care Clinical Support (MA, Tech);Patie..."
2,https://careers.umich.edu/job_detail/237412/,237412,Patient Care Tech Associate (Temp),Michigan Medicine - Ann Arbor,Temporary,MM UH CVC 7C,7/25/2023,8/08/2023,,,"Patient Care Clinical Support (MA, Tech);Patie..."
3,https://careers.umich.edu/job_detail/237419/,237419,CLERK B (TEMP),Michigan Medicine - Ann Arbor,Temporary,MM Occupational Hlth Services,7/25/2023,8/04/2023,,,Temporary Job Opening
4,https://careers.umich.edu/job_detail/237389/,237389,RESEARCH FELLOW,Ann Arbor Campus,Regular,MM Michigan Neuroscience Inst,7/25/2023,8/11/2023,,,Research Fellows
...,...,...,...,...,...,...,...,...,...,...,...
968,https://careers.umich.edu/job_detail/227471/,227471,Spanish On-Call Interpreter,Michigan Medicine - Ann Arbor,Regular,MM Interpreters Program,12/01/2022,12/01/2023,$19.20,$28.79,Patient Care Services
969,https://careers.umich.edu/job_detail/227095/,227095,ASST IN RESEARCH (TEMP),Ann Arbor Campus,Temporary,MM Family Medicine,11/20/2022 -,,,,Temporary Job Opening
970,https://careers.umich.edu/job_detail/205316/,205316,IGCB Postdoctoral Fellow,Ann Arbor Campus,Regular,SEAS Institute of Global Chg,11/17/2022,11/16/2023,,,Research Fellows
971,https://careers.umich.edu/job_detail/196632/,196632,RESEARCH ASST I (TEMP),Ann Arbor Campus,Temporary,MM Anesthesiology Department,9/16/2022,7/29/2023,,,Temporary Job Opening


In [6]:
df.to_csv('umich_jobs.csv', index=False)