In [36]:
import requests
from bs4 import BeautifulSoup 
import re

In [5]:
# Monster

def get_careerbuilder_url(posted='1', radius_tuple_index=2, cb_apply='false', keywords='engineer', location_city='', location_state='', pay='60', emp_tuple_index=0, cb_veterans='false', cb_workhome_tuple_index=0):
    # Base URL for CareerBuilder job search
    base_url = 'https://www.careerbuilder.com/jobs?'

    # Set of all valid U.S. state abbreviations
    valid_states = {'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 
                    'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 
                    'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 
                    'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 
                    'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY'}

    # Validate state abbreviation
    if location_state not in valid_states:
        raise ValueError(f"Invalid state: {location_state}. Please provide a valid state abbreviation (e.g., NY, PA, NC).")

    # Validate index ranges
    if not (0 <= radius_tuple_index <= 3):
        raise ValueError("radius_tuple_index must be between 0 and 3.")
    
    if not (0 <= emp_tuple_index <= 5):
        raise ValueError("emp_tuple_index must be between 0 and 5.")
    
    if not (0 <= cb_workhome_tuple_index <= 3):
        raise ValueError("cb_workhome_tuple_index must be between 0 and 3.")

    # Radius options (miles)
    radius_tuple = ('5', '10', '30', '50')
    radius = radius_tuple[radius_tuple_index]

    # Format location
    location = f'{location_city.replace(" ", "+")}%2C+{location_state}'

    # Employment type options
    emp_tuple = ('jtft%2Cjtfp', 'jtpt%2Cjtfp', 'jtct%2Cjtc2%2Cjtcc', 'jtch', 'jtse%2Cjttf%2Cjttp', 'jtfl')
    emp = emp_tuple[emp_tuple_index]

    # Work-from-home options
    cb_workhome_tuple = ('all', 'onsite', 'remote', 'hybrid')
    cb_workhome = cb_workhome_tuple[cb_workhome_tuple_index]

    # Assemble dictionary of URL parameters
    extension_dict = {
        'posted': posted,
        'radius': radius,
        'cb_apply': cb_apply,
        'keywords': keywords.replace(" ", "+"),
        'location': location,
        'pay': pay,
        'emp': emp,
        'cb_veterans': cb_veterans,
        'cb_workhome': cb_workhome
    }

    # Construct the final URL
    url = base_url + '&'.join(f"{key}={value}" for key, value in extension_dict.items() if value)

    return url

# Inputs

posted = '1' # days
radius_tuple_index = 2
cb_apply='false'
keywords = 'engineer'
location_city = 'New York'
location_state = 'NY'
pay = '60' # 10^3 USD
emp_tuple_index = 0
cb_veterans='false'
cb_workhome_tuple_index = 0

get_careerbuilder_url(posted=posted, radius_tuple_index=radius_tuple_index, cb_apply=cb_apply, keywords=keywords, location_city=location_city, location_state=location_state, pay=pay, emp_tuple_index=emp_tuple_index, cb_veterans=cb_veterans, cb_workhome_tuple_index=cb_workhome_tuple_index)

'https://www.careerbuilder.com/jobs?posted=1&radius=30&cb_apply=false&keywords=engineer&location=New+York%2C+NY&pay=20&emp=jtft%2Cjtfp&cb_veterans=false&cb_workhome=all'

In [46]:
def get_url(search_value='', location_city='', location_state='', company='', refine_by_location_index=0, radius='', days='', refine_by_salary='', refine_by_employment_index=0):
    base_url = 'https://www.ziprecruiter.com/jobs-search?form=jobs-landing&'

    # Error 
    valid_states = {'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 
                        'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 
                        'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 
                        'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 
                        'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY'}
    
    if location_state not in valid_states:
        raise ValueError(f"Invalid state: {location_state}. Please provide a valid state abbreviation (e.g., NY, PA, NC).")
    
    # Check if the provided indexes are within the valid range
    if not (0 <= refine_by_location_index <= 2):
        raise ValueError("refine_by_location_index must be 0, 1, or 2.")
    
    if not (0 <= refine_by_employment_index <= 2):
        raise ValueError("refine_by_employment_index must be 0, 1, or 2.")

    search_value = search_value.replace(" ", "+")

    # Encode the location to handle spaces and commas
    location = f'{location_city.replace(" ", "+")}%2C+{location_state}'

    # Define location and employment type options
    refine_by_location_type_tuple = ('', 'no_remote', 'only_remote')
    refine_by_employment_tuple = tuple('employment_type%3A' + item for item in ('all', 'full_time', 'work_from_home'))

    # Select options based on provided indexes
    refine_by_location_type = refine_by_location_type_tuple[refine_by_location_index]
    refine_by_employment = refine_by_employment_tuple[refine_by_employment_index]

    # Build the extension dictionary
    extension_dict = {
        'search': search_value,
        'location': location,
        'company': company,
        'refine_by_location_type': refine_by_location_type,
        'radius': radius,
        'days': days,
        'refine_by_salary': refine_by_salary,
        'refine_by_employment': refine_by_employment
    }

    # Construct the URL with parameters
    url = base_url + '&'.join(f"{key}={value}" for key, value in extension_dict.items() if value)

    return url

# Input variables
search_value = 'engineer'
location_city = 'New York'
location_state = 'NY'
company = ''
radius = '5'  # 5 miles
days = '30'  # jobs posted in the last x days
refine_by_salary = '100000'  # salary in USD

# Calling the function with the provided inputs
url = get_url(search_value, location_city, location_state, company, 0, radius, days, refine_by_salary, 0)
print(url)


https://www.ziprecruiter.com/jobs-search?form=jobs-landing&search=engineer&location=New+York%2C+NY&radius=5&days=30&refine_by_salary=100000&refine_by_employment=employment_type%3Aall


In [11]:
import undetected_chromedriver as uc

options = uc.ChromeOptions() 
options.headless = False

driver = uc.Chrome(use_subprocess=True, options=options) 


search_value = 'engineer'
location_city = 'New York'
location_state = 'NY'
location = f'{location_city.replace(" ", "+")}%2C+{location_state}'

company = ''
refine_by_location_type_tuple = ('', 'no_remote', 'only_remote')
refine_by_location_type = refine_by_location_type_tuple[0]

radius = '5' # 5 miles

extension_dict = {
    'search': search_value,
    'location': location,
    'company': company,

}

url = f'https://www.ziprecruiter.com/jobs-search?form=jobs-landing&search={search_value}&location={location}'

try:
    driver.get(url)
    html_content = driver.page_source
    driver.quit()
    
except Exception as e:
    print(f"An error occurred: {e}")
    driver.quit()

<html lang="en" style="overflow: hidden; position: fixed; height: 100%; width: 100%;"><head>
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
    <title>38,521 Engineer Jobs (NOW HIRING) in New York, NY - ZipRecruiter</title>
    <meta name="description" content="Browse 38,521 NEW YORK, NY ENGINEER job listings hiring now from companies with openings. Find your next job opportunity near you &amp; 1-Click Apply!">
    <meta name="robots" content="follow,noindex">
    <link rel="canonical" href="https://www.ziprecruiter.com/Jobs/Engineer/-in-New-York,NY">
    <link rel="manifest" href="/manifest.json">

    

    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
    <meta name="apple-mobile-web-app-capable" content="yes">
    <meta name="apple-mobile-web-app-status-bar-style" content="black">
    <meta name="apple-mobile-web-app-title" content="

In [12]:
with open("ziprecruiter.txt", 'a') as file:
    file.write(html_content)

In [15]:
soup = BeautifulSoup(html_content, 'html.parser')

In [21]:
# Empty list to hold job titles and links
job_listings = []

# Find all article elements, assuming each job posting is within an article tag
articles = soup.find_all('article', limit=20)  # Limit the number of articles processed to 10

for article in articles:
    # Find the h2 tag which contains the job title and link
    h2_tag = article.find('h2')
    if h2_tag:
        # Extract the job title
        job_title = h2_tag.get_text(strip=True)
        
        # Find the anchor tag within the h2 tag to extract the job link
        link_tag = h2_tag.find('a')
        if link_tag and link_tag.has_attr('href'):
            job_link = link_tag['href']
            # Check if link is relative and prepend base URL if needed
            if job_link.startswith('/'):
                job_link = f"https://www.ziprecruiter.com{job_link}"
            job_listings.append((job_title, job_link))
    
    # Break out of the loop if we have already added 10 job listings
#     if len(job_listings) == 10:
#         break

# Print job titles and links
for title, link in job_listings:
    print(f"Job Title: {title}\nJob Link: {link}\n")

In [28]:
job_url = job_listings[0][1]

In [29]:
driver.get(job_url)
html_content = driver.page_source

In [31]:
with open("ziprecruiter2.txt", 'a') as file:
    file.write(html_content)

In [33]:
soup = BeautifulSoup(html_content, 'html.parser')

In [34]:
job_description_div = soup.find('div', class_='job_description')
job_description_div

<div class="job_description">
<div><div><div><p>We offer a hybrid work environment. Most US-based positions can also<strong> </strong>be<strong> </strong>performed remotely (any exceptions will be noted in the Minimum Qualifications below.)</p></div><p><strong>Our Mission:</strong> </p><p>To actively connect people to their next great opportunity. </p><p><strong>Who We Are: </strong></p><p>ZipRecruiter is a leading online employment marketplace. Powered by AI-driven intelligent matching technology, the company actively connects job seekers with millions of businesses of all sizes through innovative mobile app, web, and email services, as well as partnerships with the best job sites on the web. ZipRecruiter has the #1 rated job search app on iOS &amp; Android. </p><p><strong>Summary</strong>:</p><p>As a BI Engineer II, you will redefine data consumption at ZipRecruiter. You will work cross functionally with Decision Science, Product, and Engineering teams to change the way we use data a

In [40]:
job_description_div = soup.find('div', class_='job_description')
job_description_text = ' '.join(job_description_div.get_text().split())
job_description_text = job_description_text.replace('&nbsp;', ' ')
job_description_text = re.sub(r'\s+', ' ', job_description_text)

with open('test.txt', 'a') as file:
    file.write(job_description_text)