### Serp API

In [23]:
# Loading keys
from dotenv import load_dotenv
import os
import pandas as pd
from datetime import datetime
import json
from openai import OpenAI

# Load environment variables from .env file
load_dotenv()

# Access the API key
serp_key = os.getenv("SERP_KEY")
client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
)

In [4]:
from serpapi import GoogleSearch

In [10]:
SERP_PAGE_LIMIT = 2
data = {
    "title": [],
    "company_name": [],
    "location": [],
    "via": [],
    "description": [],
    "salary": [],
    "linkedin_link": []
}
next_page_token = ""
for i in range(SERP_PAGE_LIMIT):
    params = {
        "engine": "google_jobs",
        "q": "senior data scientist since yesterday in united states full time",
        "google_domain": "google.com",
        "gl": "us",
        "hl": "en",
        "api_key": serp_key,
        "next_page_token": next_page_token
    }

    search = GoogleSearch(params)
    results = search.get_dict()

    for job in results["jobs_results"]:
        linkedIn_flag = False
        linkedIn_link = None
        for option in job["apply_options"]:
            if option["title"] == "LinkedIn":
                linkedIn_flag = True
                linkedIn_link = option["link"]
                break

        if linkedIn_flag:
            data["title"].append(job["title"])
            data["company_name"].append(job["company_name"])
            data["location"].append(job["location"])
            data["via"].append(job["via"])
            data["linkedin_link"].append(linkedIn_link)

            # Description
            header = " | ".join(f"{key}: {value}" for key, value in job["detected_extensions"].items())
            data["description"].append(header + "\n" + job["description"])

            # Salary
            if "salary" in job["detected_extensions"]:
                data["salary"].append(job["detected_extensions"]["salary"])
            else:
                data["salary"].append(None)

        # Set next page token
        next_page_token = results["serpapi_pagination"]["next_page_token"]

In [50]:
today = datetime.today().strftime("%Y-%m-%d")

# Create filename
serp_filename = f"serp_{today}.csv"
SERP_FOLDER = "serp_extract"
# Save DataFrame
df = pd.DataFrame(data)
print(len(df))
df.to_csv(SERP_FOLDER + "/" + serp_filename, index=False)

8


### OpenAI

In [55]:
df = pd.read_csv(SERP_FOLDER + "/" + serp_filename)
with open('resume.txt', 'r') as f:
    resume_input = f.read()

In [30]:
# Your base prompt template
prompt_template = """
You are an expert job-matching assistant. Your task is to evaluate how well a given job description matches a candidate's resume using weighted criteria, reflecting what matters most to the candidate.

The candidate is targeting **Senior Data Scientist roles**, has **2 years of experience**, and is looking for **individual contributor (IC) roles only**, with a **total compensation of $240,000 or more**. Analyze the match between the resume and job description using the following weighted criteria:

---

🔍 Evaluation Criteria (with Weights):
1. Role Fit (Responsibilities & Scope) – Weight: 10  
2. Experience Requirement (Must require at most 2 years experience with a Master's degree) – Weight: 10  
3. Growth & Learning Opportunities – Weight: 9  
4. Role Level (Must be IC; not managerial or senior managerial) – Weight: 9  
5. Team & Manager Quality (if mentioned) – Weight: 8  
6. Company Stability & Mission Fit – Weight: 8  
7. Compensation (Target: $240K+, estimate based on role and company if not present) – Weight: 8  
8. Work-Life Balance & Culture – Weight: 7  
9. Technical Stack Relevance – Weight: 6  
10. Location / Remote Flexibility – Weight: 5  
11. Perks & Benefits – Weight: 3  
12. Job Description Quality – Weight: 2
13. Distance from current location, less is better (Jersey City, NJ) - Weight: 7

All ratings must be in the range of **1 to 10**, where 1 is very poor alignment and 10 is perfect alignment.

---

📥 Inputs:
- Resume: {resume_input}  
- Job Description: {job_description}  

---

📤 Output Format:
Respond **only with a valid JSON object** in the format below:

```json
{{
  "criteria_ratings": {{
    "role_fit": {{"rating": X, "weight": 10}},
    "experience_requirement": {{"rating": X, "weight": 10}},
    "growth_opportunities": {{"rating": X, "weight": 9}},
    "role_level": {{"rating": X, "weight": 9}},
    "team_quality": {{"rating": X, "weight": 8}},
    "company_stability_mission": {{"rating": X, "weight": 8}},
    "compensation": {{"rating": X, "weight": 8}},
    "work_life_balance": {{"rating": X, "weight": 7}},
    "tech_stack": {{"rating": X, "weight": 6}},
    "location_remote": {{"rating": X, "weight": 5}},
    "benefits": {{"rating": X, "weight": 3}},
    "jd_quality": {{"rating": X, "weight": 2}},
    "distance_jc": {{"rating": X, "weight": 7}}
  }},
  "reasoning": "2-3 lines, at most 150 words, summarizing the overall fit and justification for the final score."
}}
```

Respond only with a valid JSON object. Do not include any text, Markdown code block, or backticks.
"""

In [56]:
for i, row in df.iterrows():
    # Fill in the prompt
    filled_prompt = prompt_template.format(resume_input=resume_input, job_description=row['description'])

    # Call the API
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": filled_prompt}
        ],
        temperature=0.1  # lower temperature for more consistent structure
    )

    # Extract and parse the JSON output
    response_text = response.choices[0].message.content
    try:
        result = json.loads(response_text)
    except json.JSONDecodeError:
        print("Could not parse JSON. Here's the raw output:")
        print(response_text)
        continue  # skip this row
    else:
        # Flatten and add ratings and weights
        total_score = 0
        total_weight = 0

        for criterion, values in result['criteria_ratings'].items():
            rating = values.get('rating', 0)
            weight = values.get('weight', 0)

            # Store rating and weight in DataFrame
            df.at[i, f'{criterion}_rating'] = rating
            df.at[i, f'{criterion}_weight'] = weight

            # Accumulate weighted score
            total_score += rating * weight
            total_weight += weight

        # Add reasoning
        df.at[i, 'reasoning'] = result.get('reasoning')

        # Compute and add final weighted average score
        final_score = total_score / total_weight if total_weight else 0
        df.at[i, 'final_score'] = round(final_score, 2)
    
    break

In [57]:
OUTPUT_DIR = 'outputs'
output_filename = f"output_{today}.xlsx"

# Current columns
cols = list(df.columns)

cols.remove('linkedin_link')
cols.append('linkedin_link')
df = df[cols]

df['linkedin_link'] = df['linkedin_link'].apply(lambda x: f'=HYPERLINK("{x}", "{x}")')
df = df.sort_values(by='final_score', ascending=False, na_position='last')
df.to_excel(OUTPUT_DIR + '/' + output_filename, sheet_name='Job_Match_Results', index=False)