In [11]:
# pip install -U python-jobspy

In [1]:
# Install required dependencies for the local jobspy module (using --user to avoid permission issues)
import subprocess
import sys

# Install markdownify and other dependencies that might be missing
packages = ['markdownify', 'tls-client', 'regex']
for pkg in packages:
    try:
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', pkg], 
                             stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        print(f"✓ {pkg} installed/updated")
    except subprocess.CalledProcessError:
        print(f"⚠ {pkg} installation had issues (may already be installed)")
print("Dependencies check complete.")

✓ markdownify installed/updated
✓ tls-client installed/updated
✓ regex installed/updated
Dependencies check complete.


In [None]:
import csv
import sys
import os
import subprocess
import site
import markdownify

# Get the workspace directory (one level up from Scrapers/)
workspace_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
jobspy_dir = os.path.join(workspace_dir, 'jobspy')

# Verify the jobspy folder exists
if not os.path.exists(jobspy_dir):
    raise ImportError(f"jobspy folder not found at: {jobspy_dir}")

# Add workspace directory to the beginning of sys.path to prioritize local module
if workspace_dir not in sys.path:
    sys.path.insert(0, workspace_dir)
elif sys.path.index(workspace_dir) != 0:
    # Move it to the front if it's already in the path
    sys.path.remove(workspace_dir)
    sys.path.insert(0, workspace_dir)

# Remove jobspy from sys.modules if it was already imported (from installed package)
# This ensures we import the local version from workspace/jobspy
modules_to_remove = [key for key in list(sys.modules.keys()) if key.startswith('jobspy')]
for module_name in modules_to_remove:
    del sys.modules[module_name]

# Now import the local version from workspace/jobspy
from jobspy import scrape_jobs

# Verify we're using the local version
import jobspy
local_jobspy_path = os.path.abspath(os.path.dirname(jobspy.__file__))
expected_path = os.path.abspath(jobspy_dir)
if local_jobspy_path == expected_path:
    print(f"✓ Successfully loaded jobspy from: {local_jobspy_path}")
else:
    print(f"⚠ Warning: jobspy loaded from {local_jobspy_path}, expected {expected_path}")


jobs = scrape_jobs(
    site_name=["indeed", "linkedin", "zip_recruiter", "google"], # "glassdoor", "bayt", "naukri", "bdjobs"
    search_term="software engineer",
    google_search_term="software engineer jobs near San Francisco, CA since yesterday",
    location="San Francisco, CA",
    results_wanted=20,
    hours_old=72,
    country_indeed='USA',
    
    # linkedin_fetch_description=True # gets more info such as description, direct job url (slower)
    # proxies=["208.195.175.46:65095", "208.195.175.45:65095", "localhost"],
)
print(f"Found {len(jobs)} jobs")
print(jobs.head())
jobs.to_csv("jobs.csv", quoting=csv.QUOTE_NONNUMERIC, escapechar="\\", index=False) # to_excel

