<a href="https://colab.research.google.com/github/Vasanthnaik11/Movie-Recommendation/blob/main/AI_Bot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
#!/usr/bin/env python3
"""
job_bot_all_in_one.py

Single-file semi-automated job application assistant + sample dataset.

What it does:
- Creates a sample dataset 'jobs_dataset.csv' (if not already present).
- Reads job listings from that CSV.
- For each job, generates a tailored cover letter and resume bullets using OpenAI.
- Saves outputs in outputs/<job_id>/ (cover_letter.txt, resume_bullets.txt, job_meta.json).
- Optionally opens the job URL in a Chrome window and attempts to prefill common fields using Selenium (stops before final submit).
- Stores job status in a local SQLite DB 'job_bot.sqlite'.

Configuration:
- Edit CONFIG dictionary below (or create config.json and it will be read if present).
- Provide OPENAI_API_KEY via env var or .env.

Important:
- This script is semi-automated and will NOT click final "Submit".
- Respect job site TOS. Use automation responsibly.
"""

import os
import json
import csv
import time
import hashlib
import sqlite3
import traceback
from pathlib import Path
from typing import List, Dict, Optional, Any

import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv

# Selenium imports
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.common.exceptions import WebDriverException

# OpenAI
import openai

# Load .env if present
load_dotenv()
# Used to securely store your API key
from google.colab import userdata

# -----------------------
# Configuration (edit these settings)
# -----------------------
CONFIG: Dict[str, Any] = {
    # If chromedriver is not on PATH, set absolute path here, e.g. "C:/chromedriver/chromedriver.exe" or "/usr/local/bin/chromedriver"
    "CHROME_DRIVER_PATH": None,
    "OPENAI_MODEL": "gpt-4o-mini",
    # If you have a resume PDF you want uploaded when a file input is found, set the path; else leave empty.
    "RESUME_PDF_PATH": "", # <-- Replace "" with the actual path to your resume PDF, e.g., "/content/my_resume.pdf"
    # If true, the script will attempt to open each job URL in Chrome and prefill fields (requires chromedriver).
    "ENABLE_PREFILL": True,
    # How many seconds to keep the browser open for manual review after prefill (set to 0 to close immediately)
    "PAUSE_AFTER_PREFILL_SECONDS": 45,
    # Output directory
    "OUTPUT_DIR": "outputs",
    # Dataset filename (CSV)
    "DATASET_CSV": "jobs_dataset.csv",
    # Max jobs to process from dataset (0 or None means all)
    "MAX_JOBS": 0,
    # Headless mode for Selenium (False = visible browser; recommended visible so you can review)
    "SELENIUM_HEADLESS": False,
    # Optional Chrome user-data-dir (useful if you want to reuse a logged in session for LinkedIn, etc.)
    "CHROME_USER_DATA_DIR": None,
}

# If there's a config.json in the folder, load & merge (so user can edit a file instead of code)
CONFIG_PATH = Path("config.json")
if CONFIG_PATH.exists():
    try:
        with CONFIG_PATH.open("r", encoding="utf-8") as f:
            user_cfg = json.load(f)
            # Remove CHROME_USER_DATA_DIR if present in config.json to avoid conflict
            if "CHROME_USER_DATA_DIR" in user_cfg:
                del user_cfg["CHROME_USER_DATA_DIR"]
            CONFIG.update(user_cfg)
            print("[config] Loaded config.json")
    except Exception as e:
        print("[config] Failed to read config.json — continuing with built-in CONFIG. Error:", e)

# -----------------------
# Environment / OpenAI setup
# -----------------------
#OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") or os.getenv("OPENAI_API")
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')

if not OPENAI_API_KEY:
    print("WARNING: OPENAI_API_KEY not found in environment. You must set it for OpenAI generation to work.")
else:
    openai.api_key = OPENAI_API_KEY
OPENAI_MODEL = CONFIG.get("OPENAI_MODEL", "gpt-4o-mini")

# -----------------------
# Utilities
# -----------------------
def ensure_dir(path: Path):
    path.mkdir(parents=True, exist_ok=True)

def job_id_from_string(s: str) -> str:
    if not s:
        s = "no-url"
    return hashlib.sha1(s.encode("utf-8")).hexdigest()[:12]

def save_text(path: Path, text: str):
    ensure_dir(path.parent)
    path.write_text(text, encoding="utf-8")

def print_banner(msg: str):
    print("\n" + "="*8 + " " + msg + " " + "="*8 + "\n")

# -----------------------
# Sample dataset creation
# -----------------------
SAMPLE_JOBS = [
    # Each record: title, company, location, url (optional), snippet
    {
        "title": "Software Engineer (Backend)",
        "company": "Acme Tech",
        "location": "Bengaluru, India",
        "url": "https://in.indeed.com/viewjob?jk=sample1",  # sample URL (not necessarily live)
        "snippet": "Build and maintain scalable APIs in Python. Experience with Docker and AWS preferred."
    },
    {
        "title": "Data Analyst",
        "company": "DataSense Analytics",
        "location": "Remote",
        "url": "",
        "snippet": "Analyze large datasets using SQL and Python. Experience with visualization tools."
    },
    {
        "title": "AI Engineer",
        "company": "NeuroLabs",
        "location": "Bengaluru, India",
        "url": "https://in.indeed.com/viewjob?jk=sample2",
        "snippet": "Develop and deploy ML models in production. Familiarity with PyTorch/TensorFlow."
    },
    {
        "title": "Frontend Developer",
        "company": "BrightUI",
        "location": "Pune, India",
        "url": "",
        "snippet": "Create responsive React interfaces and collaborate with designers."
    },
    {
        "title": "DevOps Engineer",
        "company": "CloudWorks",
        "location": "Bengaluru, India",
        "url": "https://in.indeed.com/viewjob?jk=sample3",
        "snippet": "Implement CI/CD, manage Kubernetes clusters, and automate infrastructure."
    }
]

def create_dataset_if_missing(csv_path: Path):
    if csv_path.exists():
        print(f"[dataset] Found existing dataset at {csv_path}")
        return
    print(f"[dataset] Creating sample dataset at {csv_path} ...")
    ensure_dir(csv_path.parent)
    with csv_path.open("w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["title","company","location","url","snippet"])
        writer.writeheader()
        for r in SAMPLE_JOBS:
            writer.writerow(r)
    print("[dataset] Sample dataset created. You can edit jobs_dataset.csv to add/remove jobs.")

# -----------------------
# Storage: SQLite
# -----------------------
DB_PATH = Path("job_bot.sqlite")

def init_db():
    conn = sqlite3.connect(DB_PATH)
    cur = conn.cursor()
    cur.execute("""
    CREATE TABLE IF NOT EXISTS jobs (
        id TEXT PRIMARY KEY,
        title TEXT,
        company TEXT,
        location TEXT,
        url TEXT,
        snippet TEXT,
        status TEXT,
        out_dir TEXT,
        generated_at TEXT,
        meta TEXT
    )
    """)
    conn.commit()
    conn.close()

def upsert_job(job: Dict[str, Any], status: str, out_dir: Optional[Path] = None):
    conn = sqlite3.connect(DB_PATH)
    cur = conn.cursor()
    job_id = job["job_id"]
    meta_json = json.dumps(job, ensure_ascii=False)
    cur.execute("""
    INSERT OR REPLACE INTO jobs (id, title, company, location, url, snippet, status, out_dir, generated_at, meta)
    VALUES (?, ?, ?, ?, ?, ?, ?, ?, datetime('now'), ?)
    """, (job_id, job.get("title"), job.get("company"), job.get("location"), job.get("url"), job.get("snippet"), status, str(out_dir or ""), meta_json))
    conn.commit()
    conn.close()

# -----------------------
# OpenAI generator
# -----------------------
def generate_documents(profile: Dict[str, Any], job: Dict[str, Any], model: str = OPENAI_MODEL):
    """
    Returns (cover_text, bullets_text)
    """
    if not OPENAI_API_KEY:
        print("[openai] Skipping generation because OPENAI_API_KEY is not set.")
        return ("", "")

    job_title = job.get("title", "")
    company = job.get("company", "")
    snippet = job.get("snippet", "")

    prompt_cover = (
        f"You are a professional career assistant. Write a one-page cover letter (200-350 words) "
        f"for {profile.get('full_name')} applying to the role '{job_title}' at '{company}'. "
        f"Use the following profile:\n{json.dumps(profile, indent=2)}\n\n"
        f"Job snippet:\n{snippet}\n\n"
        "Make the cover letter tailored, professional, and include a one-sentence call-to-action at the end."
    )

    prompt_bullets = (
        f"Generate 6 concise, achievement-oriented resume bullet points for {profile.get('full_name')} tailored to the '{job_title}' role at '{company}'. "
        f"Use the profile:\n{json.dumps(profile, indent=2)}\n\n"
        "Bullets should be 8-20 words, use strong action verbs, and include plausible metrics where appropriate."
    )

    cover_text, bullets_text = "", ""
    try:
        print("[openai] Generating cover letter...")
        resp = openai.ChatCompletion.create(
            model=model,
            messages=[{"role":"system","content":"You are a helpful assistant."},
                      {"role":"user","content":prompt_cover}],
            temperature=0.2,
            max_tokens=700
        )
        cover_text = resp["choices"][0]["message"]["content"].strip()
    except Exception as e:
        print("[openai] Cover generation error:", e)
        cover_text = f"[error generating cover] {e}"

    try:
        print("[openai] Generating resume bullets...")
        resp2 = openai.ChatCompletion.create(
            model=model,
            messages=[{"role":"system","content":"You are a helpful assistant."},
                      {"role":"user","content":prompt_bullets}],
            temperature=0.2,
            max_tokens=400
        )
        bullets_text = resp2["choices"][0]["message"]["content"].strip()
    except Exception as e:
        print("[openai] Bullets generation error:", e)
        bullets_text = f"[error generating bullets] {e}"

    return cover_text, bullets_text

# -----------------------
# Selenium autofill utilities
# -----------------------
def init_driver(chrome_driver_path: Optional[str] = None, headless: bool = False, user_data_dir: Optional[str] = None):
    options = webdriver.ChromeOptions()
    if headless:
        options.add_argument("--headless=new")
    options.add_argument("--start-maximized")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    if user_data_dir:
        options.add_argument(f"--user-data-dir={user_data_dir}")
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option("useAutomationExtension", False)

    try:
        if chrome_driver_path:
            service = ChromeService(chrome_driver_path)
            driver = webdriver.Chrome(service=service, options=options)
        else:
            driver = webdriver.Chrome(options=options)
    except WebDriverException as e:
        print("[selenium] Could not start Chrome WebDriver. Ensure chromedriver is installed & matching your Chrome version.")
        raise e
    return driver

def fill_inputs_heuristic(driver, profile: Dict[str, Any]) -> int:
    """
    Heuristic fill: attempts to fill many common input fields using label/placeholder/name attributes.
    Returns number of fields filled (approx).
    """
    filled = 0
    try:
        elems = driver.find_elements(By.TAG_NAME, "input")
        for el in elems:
            try:
                typ = (el.get_attribute("type") or "").lower()
                if typ in ["hidden","submit","button","checkbox","radio","file"]:
                    continue
                name = (el.get_attribute("name") or "").lower()
                placeholder = (el.get_attribute("placeholder") or "").lower()
                aria = (el.get_attribute("aria-label") or "").lower()
                id_attr = el.get_attribute("id") or ""
                label_text = ""
                if id_attr:
                    try:
                        lbl = driver.find_element(By.CSS_SELECTOR, f"label[for='{id_attr}']")
                        label_text = (lbl.text or "").lower()
                    except Exception:
                        label_text = ""
                if not label_text:
                    # try parent label
                    try:
                        parent_lbl = el.find_element(By.XPATH, "ancestor::label")
                        label_text = (parent_lbl.text or "").lower()
                    except Exception:
                        label_text = label_text or ""
                # determine value
                value = None
                if "email" in name or "email" in placeholder or "email" in aria or "email" in label_text:
                    value = profile.get("email")
                elif "first" in name or "first" in placeholder or "first" in label_text:
                    value = profile.get("first_name")
                elif "last" in name or "last" in placeholder or "last" in label_text:
                    value = profile.get("last_name")
                elif "phone" in name or "phone" in placeholder or "phone" in label_text:
                    value = profile.get("phone")
                elif ("full" in label_text and "name" in label_text) or ("fullname" in name) or ("name" in name and "first" not in name and "last" not in name):
                    value = profile.get("full_name")
                elif "location" in name or "location" in placeholder or "city" in name:
                    value = profile.get("location")
                # fill if we have a value
                if value:
                    try:
                        el.clear()
                        el.send_keys(value)
                    except Exception:
                        driver.execute_script("arguments[0].value = arguments[1];", el, value)
                    filled += 1
            except Exception:
                continue
    except Exception:
        pass
    return filled

def upload_resume_if_found(driver, resume_pdf_path: Optional[str]) -> bool:
    if not resume_pdf_path:
        return False
    try:
        file_inputs = driver.find_elements(By.XPATH, "//input[@type='file']")
        if file_inputs:
            file_inputs[0].send_keys(str(Path(resume_pdf_path).resolve()))
            return True
    except Exception as e:
        print("[autofill] resume upload error:", e)
    return False

def prefill_with_selenium(job_url: str, profile: Dict[str, Any], resume_pdf_path: Optional[str], chrome_driver_path: Optional[str], headless: bool, pause_seconds: int, user_data_dir: Optional[str], out_dir: Path):
    print(f"[autofill] Opening job URL: {job_url}")
    driver = init_driver(chrome_driver_path=chrome_driver_path, headless=headless, user_data_dir=user_data_dir)
    try:
        driver.get(job_url)
        time.sleep(3)  # allow JS to run
        filled = fill_inputs_heuristic(driver, profile)
        uploaded = upload_resume_if_found(driver, resume_pdf_path)
        # save screenshot
        ensure_dir(out_dir)
        screenshot_path = out_dir / "prefill_screenshot.png"
        driver.save_screenshot(str(screenshot_path))
        print(f"[autofill] Prefilled ~{filled} fields. Resume uploaded: {uploaded}. Screenshot: {screenshot_path}")
        if pause_seconds > 0:
            print(f"[autofill] Browser will stay open for {pause_seconds} seconds for manual review (please manually submit if ready).")
            time.sleep(pause_seconds)
    except Exception as e:
        print("[autofill] Error during prefill:", e)
        traceback.print_exc()
    finally:
        try:
            driver.quit()
        except Exception:
            pass

# -----------------------
# Main flow
# -----------------------
def read_dataset(csv_path: Path) -> List[Dict[str, str]]:
    jobs = []
    if not csv_path.exists():
        print("[dataset] No dataset CSV found.")
        return jobs
    with csv_path.open("r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            jobs.append({
                "title": row.get("title","").strip(),
                "company": row.get("company","").strip(),
                "location": row.get("location","").strip(),
                "url": row.get("url","").strip(),
                "snippet": row.get("snippet","").strip()
            })
    return jobs

def main():
    print_banner("Semi-automated Job Bot (All-in-one)")
    csv_path = Path(CONFIG.get("DATASET_CSV", "jobs_dataset.csv"))
    create_dataset_if_missing(csv_path)
    init_db()

    jobs = read_dataset(csv_path)
    if not jobs:
        print("[main] No jobs found in dataset. Edit jobs_dataset.csv and re-run.")
        return

    max_jobs = int(CONFIG.get("MAX_JOBS") or 0)
    if max_jobs > 0:
        jobs = jobs[:max_jobs]

    profile = {
        # --- FILL IN YOUR DETAILS BELOW ---
        "first_name": "Your First Name",
        "last_name": "Your Last Name",
        "full_name": "Your Full Name",
        "email": "your.email@example.com",
        "phone": "Your Phone Number",
        "location": "Your City, Your Country",
        "current_title": "Your Current Job Title",
        "years_experience": 5, # <-- Replace with your years of experience
        "skills": ["Skill 1","Skill 2","Skill 3","Skill 4","Skill 5","Skill 6","Skill 7"], # <-- Replace with your key skills
        "education": "Your Highest Degree, University Name",
        "resume_text": "A brief summary of your experience and qualifications." # <-- You can also paste a summary of your resume here
        # -----------------------------------
    }

    output_root = Path(CONFIG.get("OUTPUT_DIR","outputs"))
    ensure_dir(output_root)

    for idx, job in enumerate(jobs, start=1):
        print_banner(f"Job {idx}/{len(jobs)}: {job['title']} @ {job['company']}")
        # Build job_id and out_dir
        job_hash = job_id_from_string(job.get("url") or (job.get("title","") + job.get("company","")))
        job["job_id"] = job_hash
        out_dir = output_root / job_hash
        ensure_dir(out_dir)

        # Save job meta
        save_text(out_dir / "job_meta.json", json.dumps(job, indent=2, ensure_ascii=False))

        # Upsert in DB as found
        upsert_job(job, status="found", out_dir=out_dir)

        # Generate docs via OpenAI (if key present)
        cover_text, bullets_text = generate_documents(profile, job)
        save_text(out_dir / "cover_letter.txt", cover_text)
        save_text(out_dir / "resume_bullets.txt", bullets_text)

        # Copy resume PDF if provided (into out_dir)
        resume_pdf = CONFIG.get("RESUME_PDF_PATH") or None
        if resume_pdf:
            try:
                from shutil import copyfile
                rp = Path(resume_pdf)
                if rp.exists():
                    copyfile(str(rp), str(out_dir / rp.name))
                    print(f"[main] Copied resume PDF into {out_dir}")
                else:
                    print("[main] RESUME_PDF_PATH set but file not found:", rp)
            except Exception as e:
                print("[main] Could not copy resume PDF:", e)

        # Optionally prefill application page
        job_url = job.get("url") or ""
        if CONFIG.get("ENABLE_PREFILL") and job_url:
            try:
                prefill_with_selenium(
                    job_url,
                    profile,
                    resume_pdf,
                    CONFIG.get("CHROME_DRIVER_PATH"),
                    CONFIG.get("SELENIUM_HEADLESS", False),
                    int(CONFIG.get("PAUSE_AFTER_PREFILL_SECONDS", 45)),
                    CONFIG.get("CHROME_USER_DATA_DIR"),
                    out_dir
                )
                upsert_job(job, status="pending_review", out_dir=out_dir)
            except Exception as e:
                print("[main] Prefill failed:", e)
                upsert_job(job, status="prefill_error", out_dir=out_dir)
        else:
            if not job_url:
                print("[main] No URL for this job — skipping prefill.")
            else:
                print("[main] PREFILL disabled in config — skipping prefill.")
            upsert_job(job, status="docs_generated", out_dir=out_dir)

        print(f"[main] Done with job {job['title']} @ {job['company']}. Outputs in {out_dir}")

    print_banner("All jobs processed. Check outputs/ and job_bot.sqlite")

if __name__ == "__main__":
    main()



[dataset] Found existing dataset at jobs_dataset.csv


[openai] Generating cover letter...
[openai] Cover generation error: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742

[openai] Generating resume bullets...
[openai] Bullets generation error: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g

In [1]:
!python -m pip install --upgrade requests beautifulsoup4 selenium openai python-dotenv

Collecting requests
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting beautifulsoup4
  Downloading beautifulsoup4-4.14.2-py3-none-any.whl.metadata (3.8 kB)
Collecting selenium
  Downloading selenium-4.36.0-py3-none-any.whl.metadata (7.5 kB)
Collecting openai
  Downloading openai-2.3.0-py3-none-any.whl.metadata (29 kB)
Collecting trio<1.0,>=0.30.0 (from selenium)
  Downloading trio-0.31.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket<1.0,>=0.12.2 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting outcome (from trio<1.0,>=0.30.0->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket<1.0,>=0.12.2->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Downloading requests-2.32.5-py3-none-any.whl (64 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.7/64.7 kB[0m [31m6.1 MB/s[0m eta [36m0:0

In [2]:
!export OPENAI_API_KEY="sk-..."   # macOS / Linux
# or set in Windows PowerShell: $env:OPENAI_API_KEY = "sk-..."

In [3]:
#!/usr/bin/env python3
"""
job_bot_all_in_one.py

Single-file semi-automated job application assistant + sample dataset.

What it does:
- Creates a sample dataset 'jobs_dataset.csv' (if not already present).
- Reads job listings from that CSV.
- For each job, generates a tailored cover letter and resume bullets using OpenAI.
- Saves outputs in outputs/<job_id>/ (cover_letter.txt, resume_bullets.txt, job_meta.json).
- Optionally opens the job URL in a Chrome window and attempts to prefill common fields using Selenium (stops before final submit).
- Stores job status in a local SQLite DB 'job_bot.sqlite'.

Configuration:
- Edit CONFIG dictionary below (or create config.json and it will be read if present).
- Provide OPENAI_API_KEY via env var or .env.

Important:
- This script is semi-automated and will NOT click final "Submit".
- Respect job site TOS. Use automation responsibly.
"""

import os
import json
import csv
import time
import hashlib
import sqlite3
import traceback
from pathlib import Path
from typing import List, Dict, Optional, Any

import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv

# Selenium imports
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.common.exceptions import WebDriverException

# OpenAI
import openai

# Load .env if present
load_dotenv()

# -----------------------
# Configuration (edit these settings)
# -----------------------
CONFIG: Dict[str, Any] = {
    # If chromedriver is not on PATH, set absolute path here, e.g. "C:/chromedriver/chromedriver.exe" or "/usr/local/bin/chromedriver"
    "CHROME_DRIVER_PATH": None,
    "OPENAI_MODEL": "gpt-4o-mini",
    # If you have a resume PDF you want uploaded when a file input is found, set the path; else leave empty.
    "RESUME_PDF_PATH": "",
    # If true, the script will attempt to open each job URL in Chrome and prefill fields (requires chromedriver).
    "ENABLE_PREFILL": True,
    # How many seconds to keep the browser open for manual review after prefill (set to 0 to close immediately)
    "PAUSE_AFTER_PREFILL_SECONDS": 45,
    # Output directory
    "OUTPUT_DIR": "outputs",
    # Dataset filename (CSV)
    "DATASET_CSV": "jobs_dataset.csv",
    # Max jobs to process from dataset (0 or None means all)
    "MAX_JOBS": 0,
    # Headless mode for Selenium (False = visible browser; recommended visible so you can review)
    "SELENIUM_HEADLESS": False,
    # Optional Chrome user-data-dir (useful if you want to reuse a logged in session for LinkedIn, etc.)
    "CHROME_USER_DATA_DIR": None, # Removed this line
}

# If there's a config.json in the folder, load & merge (so user can edit a file instead of code)
CONFIG_PATH = Path("config.json")
if CONFIG_PATH.exists():
    try:
        with CONFIG_PATH.open("r", encoding="utf-8") as f:
            user_cfg = json.load(f)
            # Remove CHROME_USER_DATA_DIR if present in config.json to avoid conflict
            if "CHROME_USER_DATA_DIR" in user_cfg:
                del user_cfg["CHROME_USER_DATA_DIR"]
            CONFIG.update(user_cfg)
            print("[config] Loaded config.json")
    except Exception as e:
        print("[config] Failed to read config.json — continuing with built-in CONFIG. Error:", e)

# -----------------------
# Environment / OpenAI setup
# -----------------------
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") or os.getenv("OPENAI_API")
if not OPENAI_API_KEY:
    print("WARNING: OPENAI_API_KEY not found in environment. You must set it for OpenAI generation to work.")
else:
    openai.api_key = OPENAI_API_KEY
OPENAI_MODEL = CONFIG.get("OPENAI_MODEL", "gpt-4o-mini")

# -----------------------
# Utilities
# -----------------------
def ensure_dir(path: Path):
    path.mkdir(parents=True, exist_ok=True)

def job_id_from_string(s: str) -> str:
    if not s:
        s = "no-url"
    return hashlib.sha1(s.encode("utf-8")).hexdigest()[:12]

def save_text(path: Path, text: str):
    ensure_dir(path.parent)
    path.write_text(text, encoding="utf-8")

def print_banner(msg: str):
    print("\n" + "="*8 + " " + msg + " " + "="*8 + "\n")

# -----------------------
# Sample dataset creation
# -----------------------
SAMPLE_JOBS = [
    # Each record: title, company, location, url (optional), snippet
    {
        "title": "Software Engineer (Backend)",
        "company": "Acme Tech",
        "location": "Bengaluru, India",
        "url": "https://in.indeed.com/viewjob?jk=sample1",  # sample URL (not necessarily live)
        "snippet": "Build and maintain scalable APIs in Python. Experience with Docker and AWS preferred."
    },
    {
        "title": "Data Analyst",
        "company": "DataSense Analytics",
        "location": "Remote",
        "url": "",
        "snippet": "Analyze large datasets using SQL and Python. Experience with visualization tools."
    },
    {
        "title": "AI Engineer",
        "company": "NeuroLabs",
        "location": "Bengaluru, India",
        "url": "https://in.indeed.com/viewjob?jk=sample2",
        "snippet": "Develop and deploy ML models in production. Familiarity with PyTorch/TensorFlow."
    },
    {
        "title": "Frontend Developer",
        "company": "BrightUI",
        "location": "Pune, India",
        "url": "",
        "snippet": "Create responsive React interfaces and collaborate with designers."
    },
    {
        "title": "DevOps Engineer",
        "company": "CloudWorks",
        "location": "Bengaluru, India",
        "url": "https://in.indeed.com/viewjob?jk=sample3",
        "snippet": "Implement CI/CD, manage Kubernetes clusters, and automate infrastructure."
    }
]

def create_dataset_if_missing(csv_path: Path):
    if csv_path.exists():
        print(f"[dataset] Found existing dataset at {csv_path}")
        return
    print(f"[dataset] Creating sample dataset at {csv_path} ...")
    ensure_dir(csv_path.parent)
    with csv_path.open("w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["title","company","location","url","snippet"])
        writer.writeheader()
        for r in SAMPLE_JOBS:
            writer.writerow(r)
    print("[dataset] Sample dataset created. You can edit jobs_dataset.csv to add/remove jobs.")

# -----------------------
# Storage: SQLite
# -----------------------
DB_PATH = Path("job_bot.sqlite")

def init_db():
    conn = sqlite3.connect(DB_PATH)
    cur = conn.cursor()
    cur.execute("""
    CREATE TABLE IF NOT EXISTS jobs (
        id TEXT PRIMARY KEY,
        title TEXT,
        company TEXT,
        location TEXT,
        url TEXT,
        snippet TEXT,
        status TEXT,
        out_dir TEXT,
        generated_at TEXT,
        meta TEXT
    )
    """)
    conn.commit()
    conn.close()

def upsert_job(job: Dict[str, Any], status: str, out_dir: Optional[Path] = None):
    conn = sqlite3.connect(DB_PATH)
    cur = conn.cursor()
    job_id = job["job_id"]
    meta_json = json.dumps(job, ensure_ascii=False)
    cur.execute("""
    INSERT OR REPLACE INTO jobs (id, title, company, location, url, snippet, status, out_dir, generated_at, meta)
    VALUES (?, ?, ?, ?, ?, ?, ?, ?, datetime('now'), ?)
    """, (job_id, job.get("title"), job.get("company"), job.get("location"), job.get("url"), job.get("snippet"), status, str(out_dir or ""), meta_json))
    conn.commit()
    conn.close()

# -----------------------
# OpenAI generator
# -----------------------
def generate_documents(profile: Dict[str, Any], job: Dict[str, Any], model: str = OPENAI_MODEL):
    """
    Returns (cover_text, bullets_text)
    """
    if not OPENAI_API_KEY:
        print("[openai] Skipping generation because OPENAI_API_KEY is not set.")
        return ("", "")

    job_title = job.get("title", "")
    company = job.get("company", "")
    snippet = job.get("snippet", "")

    prompt_cover = (
        f"You are a professional career assistant. Write a one-page cover letter (200-350 words) "
        f"for {profile.get('full_name')} applying to the role '{job_title}' at '{company}'. "
        f"Use the following profile:\n{json.dumps(profile, indent=2)}\n\n"
        f"Job snippet:\n{snippet}\n\n"
        "Make the cover letter tailored, professional, and include a one-sentence call-to-action at the end."
    )

    prompt_bullets = (
        f"Generate 6 concise, achievement-oriented resume bullet points for {profile.get('full_name')} tailored to the '{job_title}' role at '{company}'. "
        f"Use the profile:\n{json.dumps(profile, indent=2)}\n\n"
        "Bullets should be 8-20 words, use strong action verbs, and include plausible metrics where appropriate."
    )

    cover_text, bullets_text = "", ""
    try:
        print("[openai] Generating cover letter...")
        resp = openai.ChatCompletion.create(
            model=model,
            messages=[{"role":"system","content":"You are a helpful assistant."},
                      {"role":"user","content":prompt_cover}],
            temperature=0.2,
            max_tokens=700
        )
        cover_text = resp["choices"][0]["message"]["content"].strip()
    except Exception as e:
        print("[openai] Cover generation error:", e)
        cover_text = f"[error generating cover] {e}"

    try:
        print("[openai] Generating resume bullets...")
        resp2 = openai.ChatCompletion.create(
            model=model,
            messages=[{"role":"system","content":"You are a helpful assistant."},
                      {"role":"user","content":prompt_bullets}],
            temperature=0.2,
            max_tokens=400
        )
        bullets_text = resp2["choices"][0]["message"]["content"].strip()
    except Exception as e:
        print("[openai] Bullets generation error:", e)
        bullets_text = f"[error generating bullets] {e}"

    return cover_text, bullets_text

# -----------------------
# Selenium autofill utilities
# -----------------------
def init_driver(chrome_driver_path: Optional[str] = None, headless: bool = False, user_data_dir: Optional[str] = None):
    options = webdriver.ChromeOptions()
    if headless:
        options.add_argument("--headless=new")
    options.add_argument("--start-maximized")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    if user_data_dir:
        options.add_argument(f"--user-data-dir={user_data_dir}")
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option("useAutomationExtension", False)

    try:
        if chrome_driver_path:
            service = ChromeService(chrome_driver_path)
            driver = webdriver.Chrome(service=service, options=options)
        else:
            driver = webdriver.Chrome(options=options)
    except WebDriverException as e:
        print("[selenium] Could not start Chrome WebDriver. Ensure chromedriver is installed & matching your Chrome version.")
        raise e
    return driver

def fill_inputs_heuristic(driver, profile: Dict[str, Any]) -> int:
    """
    Heuristic fill: attempts to fill many common input fields using label/placeholder/name attributes.
    Returns number of fields filled (approx).
    """
    filled = 0
    try:
        elems = driver.find_elements(By.TAG_NAME, "input")
        for el in elems:
            try:
                typ = (el.get_attribute("type") or "").lower()
                if typ in ["hidden","submit","button","checkbox","radio","file"]:
                    continue
                name = (el.get_attribute("name") or "").lower()
                placeholder = (el.get_attribute("placeholder") or "").lower()
                aria = (el.get_attribute("aria-label") or "").lower()
                id_attr = el.get_attribute("id") or ""
                label_text = ""
                if id_attr:
                    try:
                        lbl = driver.find_element(By.CSS_SELECTOR, f"label[for='{id_attr}']")
                        label_text = (lbl.text or "").lower()
                    except Exception:
                        label_text = ""
                if not label_text:
                    # try parent label
                    try:
                        parent_lbl = el.find_element(By.XPATH, "ancestor::label")
                        label_text = (parent_lbl.text or "").lower()
                    except Exception:
                        label_text = label_text or ""
                # determine value
                value = None
                if "email" in name or "email" in placeholder or "email" in aria or "email" in label_text:
                    value = profile.get("email")
                elif "first" in name or "first" in placeholder or "first" in label_text:
                    value = profile.get("first_name")
                elif "last" in name or "last" in placeholder or "last" in label_text:
                    value = profile.get("last_name")
                elif "phone" in name or "phone" in placeholder or "phone" in label_text:
                    value = profile.get("phone")
                elif ("full" in label_text and "name" in label_text) or ("fullname" in name) or ("name" in name and "first" not in name and "last" not in name):
                    value = profile.get("full_name")
                elif "location" in name or "location" in placeholder or "city" in name:
                    value = profile.get("location")
                # fill if we have a value
                if value:
                    try:
                        el.clear()
                        el.send_keys(value)
                    except Exception:
                        driver.execute_script("arguments[0].value = arguments[1];", el, value)
                    filled += 1
            except Exception:
                continue
    except Exception:
        pass
    return filled

def upload_resume_if_found(driver, resume_pdf_path: Optional[str]) -> bool:
    if not resume_pdf_path:
        return False
    try:
        file_inputs = driver.find_elements(By.XPATH, "//input[@type='file']")
        if file_inputs:
            file_inputs[0].send_keys(str(Path(resume_pdf_path).resolve()))
            return True
    except Exception as e:
        print("[autofill] resume upload error:", e)
    return False

def prefill_with_selenium(job_url: str, profile: Dict[str, Any], resume_pdf_path: Optional[str], chrome_driver_path: Optional[str], headless: bool, pause_seconds: int, user_data_dir: Optional[str], out_dir: Path):
    print(f"[autofill] Opening job URL: {job_url}")
    driver = init_driver(chrome_driver_path=chrome_driver_path, headless=headless, user_data_dir=user_data_dir)
    try:
        driver.get(job_url)
        time.sleep(3)  # allow JS to run
        filled = fill_inputs_heuristic(driver, profile)
        uploaded = upload_resume_if_found(driver, resume_pdf_path)
        # save screenshot
        ensure_dir(out_dir)
        screenshot_path = out_dir / "prefill_screenshot.png"
        driver.save_screenshot(str(screenshot_path))
        print(f"[autofill] Prefilled ~{filled} fields. Resume uploaded: {uploaded}. Screenshot: {screenshot_path}")
        if pause_seconds > 0:
            print(f"[autofill] Browser will stay open for {pause_seconds} seconds for manual review (please manually submit if ready).")
            time.sleep(pause_seconds)
    except Exception as e:
        print("[autofill] Error during prefill:", e)
        traceback.print_exc()
    finally:
        try:
            driver.quit()
        except Exception:
            pass

# -----------------------
# Main flow
# -----------------------
def read_dataset(csv_path: Path) -> List[Dict[str, str]]:
    jobs = []
    if not csv_path.exists():
        print("[dataset] No dataset CSV found.")
        return jobs
    with csv_path.open("r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            jobs.append({
                "title": row.get("title","").strip(),
                "company": row.get("company","").strip(),
                "location": row.get("location","").strip(),
                "url": row.get("url","").strip(),
                "snippet": row.get("snippet","").strip()
            })
    return jobs

def main():
    print_banner("Semi-automated Job Bot (All-in-one)")
    csv_path = Path(CONFIG.get("DATASET_CSV", "jobs_dataset.csv"))
    create_dataset_if_missing(csv_path)
    init_db()

    jobs = read_dataset(csv_path)
    if not jobs:
        print("[main] No jobs found in dataset. Edit jobs_dataset.csv and re-run.")
        return

    max_jobs = int(CONFIG.get("MAX_JOBS") or 0)
    if max_jobs > 0:
        jobs = jobs[:max_jobs]

    profile = {
        # You can edit the profile here or in config.json (the script does not currently read profile from config.json
        # to keep this single-file simple). If you'd like to supply your profile via config.json, add code accordingly.
        "first_name": "John",
        "last_name": "Doe",
        "full_name": "John Doe",
        "email": "john.doe@example.com",
        "phone": "+91-98765-43210",
        "location": "Bengaluru, India",
        "current_title": "Software Engineer",
        "years_experience": 5,
        "skills": ["Python","Django","REST APIs","SQL","Docker","AWS","Unit Testing"],
        "education": "B.Tech Computer Science, XYZ University",
        "resume_text": "Experienced Software Engineer with strong backend and cloud skills."
    }

    output_root = Path(CONFIG.get("OUTPUT_DIR","outputs"))
    ensure_dir(output_root)

    for idx, job in enumerate(jobs, start=1):
        print_banner(f"Job {idx}/{len(jobs)}: {job['title']} @ {job['company']}")
        # Build job_id and out_dir
        job_hash = job_id_from_string(job.get("url") or (job.get("title","") + job.get("company","")))
        job["job_id"] = job_hash
        out_dir = output_root / job_hash
        ensure_dir(out_dir)

        # Save job meta
        save_text(out_dir / "job_meta.json", json.dumps(job, indent=2, ensure_ascii=False))

        # Upsert in DB as found
        upsert_job(job, status="found", out_dir=out_dir)

        # Generate docs via OpenAI (if key present)
        cover_text, bullets_text = generate_documents(profile, job)
        save_text(out_dir / "cover_letter.txt", cover_text)
        save_text(out_dir / "resume_bullets.txt", bullets_text)

        # Copy resume PDF if provided (into out_dir)
        resume_pdf = CONFIG.get("RESUME_PDF_PATH") or None
        if resume_pdf:
            try:
                from shutil import copyfile
                rp = Path(resume_pdf)
                if rp.exists():
                    copyfile(str(rp), str(out_dir / rp.name))
                    print(f"[main] Copied resume PDF into {out_dir}")
                else:
                    print("[main] RESUME_PDF_PATH set but file not found:", rp)
            except Exception as e:
                print("[main] Could not copy resume PDF:", e)

        # Optionally prefill application page
        job_url = job.get("url") or ""
        if CONFIG.get("ENABLE_PREFILL") and job_url:
            try:
                prefill_with_selenium(
                    job_url,
                    profile,
                    resume_pdf,
                    CONFIG.get("CHROME_DRIVER_PATH"),
                    CONFIG.get("SELENIUM_HEADLESS", False),
                    int(CONFIG.get("PAUSE_AFTER_PREFILL_SECONDS", 45)),
                    CONFIG.get("CHROME_USER_DATA_DIR"),
                    out_dir
                )
                upsert_job(job, status="pending_review", out_dir=out_dir)
            except Exception as e:
                print("[main] Prefill failed:", e)
                upsert_job(job, status="prefill_error", out_dir=out_dir)
        else:
            if not job_url:
                print("[main] No URL for this job — skipping prefill.")
            else:
                print("[main] PREFILL disabled in config — skipping prefill.")
            upsert_job(job, status="docs_generated", out_dir=out_dir)

        print(f"[main] Done with job {job['title']} @ {job['company']}. Outputs in {out_dir}")

    print_banner("All jobs processed. Check outputs/ and job_bot.sqlite")

if __name__ == "__main__":
    main()



[dataset] Creating sample dataset at jobs_dataset.csv ...
[dataset] Sample dataset created. You can edit jobs_dataset.csv to add/remove jobs.


[openai] Skipping generation because OPENAI_API_KEY is not set.
[autofill] Opening job URL: https://in.indeed.com/viewjob?jk=sample1
[selenium] Could not start Chrome WebDriver. Ensure chromedriver is installed & matching your Chrome version.
[main] Prefill failed: Message: session not created: probably user data directory is already in use, please specify a unique value for --user-data-dir argument, or don't use --user-data-dir; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#sessionnotcreatedexception
Stacktrace:
#0 0x57142259a4ca <unknown>
#1 0x571422019566 <unknown>
#2 0x571422054b4c <unknown>
#3 0x5714220507c4 <unknown>
#4 0x57142209fbd3 <unknown>
#5 0x57142209f286 <unknown>
#6 0x571422091403 <unknown>
#7 0x57142205db02 <unknown>
#8 0x57142205e7c1 <unknown>
#9 0x57142