In [4]:
!pip install -q transformers sentence-transformers accelerate scikit-learn


In [9]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

t5_model_name = "google/flan-t5-small"

t5_tokenizer = AutoTokenizer.from_pretrained(t5_model_name)
t5_model = AutoModelForSeq2SeqLM.from_pretrained(t5_model_name)

def job_fitting_prompt_t5(job_description, resume_text):
    prompt = f"""
You are a recruiter. In 2–4 full sentences, explain why this candidate is or is not a good fit for the job.
Mention specific skills, experience, and technologies when possible.

Job Description:
{job_description}

Candidate Resume:
{resume_text}

Answer (2–4 sentences):
""".strip()

    inputs = t5_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
    outputs = t5_model.generate(
        **inputs,
        max_new_tokens=120,
        num_beams=4,
        early_stopping=True
    )
    return t5_tokenizer.decode(outputs[0], skip_special_tokens=True)


print("T5 model ready.")


T5 model ready.


In [6]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

bert_model = SentenceTransformer("all-MiniLM-L6-v2")

def job_fitting_prompt_bert(job_description, resume_text):
    job_emb = bert_model.encode([job_description])
    res_emb = bert_model.encode([resume_text])
    sim = cosine_similarity(job_emb, res_emb)[0][0]
    return float(sim)

print("BERT model ready.")


BERT model ready.


In [11]:
job_desc = """
We are looking for a backend software engineer with strong experience in Python,
REST APIs, SQL databases, and cloud platforms such as AWS or GCP.
"""

resume_text = """
Computer Engineering student with experience building backend services in Python and Flask,
working with PostgreSQL databases, and deploying small projects to AWS.
Strong problem-solving skills and experience with Git and Linux.
"""

print("=== T5 job-fit justification ===")
print(job_fitting_prompt_t5(job_desc, resume_text))

print("\n=== BERT similarity score ===")
print(job_fitting_prompt_bert(job_desc, resume_text))


=== T5 job-fit justification ===
a computer engineer with strong experience in Python, REST APIs, SQL databases, and cloud platforms

=== BERT similarity score ===
0.766248881816864


In [14]:
import requests
import csv
from datetime import datetime, timedelta, timezone

# ===============================
# CONFIG: put your Jooble API key
# ===============================
JOOBLE_API_KEY = "0a37febe-753a-4e3c-91ba-f2722abbb82a"

KEYWORDS = "software engineer OR developer OR backend"
LOCATION = "United States"

RESULTS_PER_PAGE = 50   # Jooble default page size
MAX_PAGES = 10          # safety limit, adjust if needed

OUTPUT_CSV = "job_listings_last_week_jooble.csv"

# Simple tech keyword list for tagging
TECH_KEYWORDS = [
    "python", "java", "javascript", "typescript", "c++", "c#", "go", "golang",
    "rust", "sql", "nosql", "aws", "azure", "gcp", "docker", "kubernetes",
    "react", "angular", "vue", "node", "linux", "bash", "shell", "git",
    "ci/cd", "tensorflow", "pytorch", "spark", "hadoop"
]

# ===============================
# Helper functions
# ===============================

def parse_updated_date(updated_str):
    """
    Jooble 'updated' looks like:
    '2023-09-15T12:55:35.3870000'
    We try to parse it into a datetime.
    """
    if not updated_str:
        return None
    try:
        # trim to 26 chars to handle microseconds safely
        trimmed = updated_str[:26]
        return datetime.fromisoformat(trimmed)
    except Exception:
        try:
            return datetime.fromisoformat(updated_str.split("T")[0])
        except Exception:
            return None

def is_within_last_week(dt):
    if dt is None:
        return False
    now = datetime.now(timezone.utc)
    one_week_ago = now - timedelta(days=7)
    if dt.tzinfo is None:
        dt = dt.replace(tzinfo=timezone.utc)
    return dt >= one_week_ago

def detect_is_remote(title, location, description):
    text = " ".join([title or "", location or "", description or ""]).lower()
    return ("remote" in text) or ("work from home" in text) or ("wfh" in text)

def extract_tech_keywords(text):
    if not text:
        return ""
    lower = text.lower()
    found = sorted({kw for kw in TECH_KEYWORDS if kw in lower})
    return ";".join(found)

def fetch_jobs_page(page):
    url = f"https://jooble.org/api/{JOOBLE_API_KEY}"
    payload = {
        "keywords": KEYWORDS,
        "location": LOCATION,
        "page": page,
        "result_on_page": RESULTS_PER_PAGE
    }
    resp = requests.post(url, json=payload)
    resp.raise_for_status()
    return resp.json()

# ===============================
# Main fetching logic
# ===============================

if not JOOBLE_API_KEY or JOOBLE_API_KEY == "YOUR_JOOBLE_API_KEY_HERE":
    raise ValueError("Please set JOOBLE_API_KEY to your actual key before running this cell.")

all_rows = []

for page in range(1, MAX_PAGES + 1):
    data = fetch_jobs_page(page)
    jobs = data.get("jobs", [])

    if not jobs:
        break

    stop_due_to_date = False

    for job in jobs:
        updated_str = job.get("updated")
        updated_dt = parse_updated_date(updated_str)

        # Only keep jobs from the last 7 days
        if not is_within_last_week(updated_dt):
            stop_due_to_date = True
            continue

        title = job.get("title", "")
        company = job.get("company", "")
        location = job.get("location", "")
        description = job.get("snippet", "") or job.get("description", "")
        url = job.get("link", "")

        is_remote = detect_is_remote(title, location, description)
        tech_kw = extract_tech_keywords(description)

        date_posted = updated_dt.date().isoformat() if updated_dt else ""

        all_rows.append({
            "job_title": title,
            "company": company,
            "location": location,
            "date_posted": date_posted,
            "description": description,
            "url": url,
            "is_remote": is_remote,
            "tech_keywords": tech_kw
        })

    if stop_due_to_date:
        break

# ===============================
# Write CSV
# ===============================
if all_rows:
    fieldnames = [
        "job_title",
        "company",
        "location",
        "date_posted",
        "description",
        "url",
        "is_remote",
        "tech_keywords"
    ]

    with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for row in all_rows:
            writer.writerow(row)

    print(f"Wrote {len(all_rows)} jobs to {OUTPUT_CSV}")
else:
    print("No jobs from the last 7 days matched the criteria.")


Wrote 10 jobs to job_listings_last_week_jooble.csv
