In [1]:
import os
import json
import pandas as pd
import logging
from datetime import datetime
from jobspy import scrape_jobs

# Configure logging
logging.basicConfig(
    filename="job_scraper.log", level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

# Load search parameters from config file
def load_config():
    with open("config.json", "r") as file:
        return json.load(file)

# Remove duplicates based on job title and company
def remove_duplicates(new_jobs, history_file="job_history.csv"):
    if os.path.exists(history_file):
        old_jobs = pd.read_csv(history_file)
        combined = pd.concat([old_jobs, new_jobs]).drop_duplicates(subset=["title", "company"], keep="last")
    else:
        combined = new_jobs
    
    combined.to_csv(history_file, index=False)  # Update history file
    return combined[new_jobs.columns]  # Return new unique jobs only

# Run job scraper
def scrape_and_save_jobs():
    config = load_config()
    jobs = scrape_jobs(
        site_name=config["site_name"],
        search_term=config["search_term"],
        google_search_trm=config["google_search_term"],
        location=config["location"],
        results_wanted=config["results_wanted"],
        hours_old=config["hours_old"],
        country_indeed=config["country_indeed"],
    )
    
    if jobs.empty:
        logging.info("No new jobs found.")
        return
    
    jobs = remove_duplicates(jobs)
    
    # Save with timestamp
    filename = f"job_leads_{datetime.now().strftime('%Y-%m-%d')}.xlsx"
    jobs.to_excel(filename, index=False)
    
    logging.info(f"Saved {len(jobs)} new job leads to {filename}")
    print(f"Saved {len(jobs)} new job leads to {filename}")

if __name__ == "__main__":
    scrape_and_save_jobs()

  from pandas.core import (


FileNotFoundError: [Errno 2] No such file or directory: 'config.json'