In [25]:
from langchain_ollama import OllamaLLM
import json
import sys
import os
import pprint
import re

In [3]:
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

In [4]:
def check_project_root():
    project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
    if project_root not in sys.path:
        raise ImportError(f"Project root '{project_root}' not found in Python path")
    print(f"Project root '{project_root}' successfully added to Python path")

try:
    check_project_root()
except ImportError as e:

    print(f"Error: {e}")

Project root 'c:\Users\yq198\Desktop\DANNY_AI\CV_Agent' successfully added to Python path


In [6]:
llm_qwen_model = "qwen3:latest"

# Initialize the Ollama model
llm_qwen = OllamaLLM(model=llm_qwen_model) 

In [21]:
# Loading both json files from data folder to test prompt
def load_json(file_path):
    with open(file_path, "r") as f:
        return json.load(f)
    
cv_json = load_json("../data/cv_parsed_qwen.json") 
jd_json = load_json("../data/jd_parsed_qwen.json")
print(pprint.pprint(cv_json))
print("---------------------------")
print(pprint.pprint(jd_json))


{'education': [{'degree': 'Bachelorâ€™s Degree in Banking and Finance',
                'end_date': '',
                'institution': 'University of London',
                'start_date': ''}],
 'experience': [{'company': 'AI Singapore',
                 'description': 'Enhanced clinician efficiency by 15% through '
                                'an end-to-end ML pipeline (20 data sources, 4 '
                                'engineers) that enabled prioritization of '
                                'low-confidence daily predictions. Eliminated '
                                'the need for clinicians to manually extract '
                                'symptoms from medical reports, enhancing '
                                'efficiency by collaborating on fine-tuning a '
                                'biomedical BERT LLM that improved feature F1 '
                                'score by 25%. Developed a customized date '
                                'aggregation module

In [28]:
industry = "Technology"

CD_vs_JD_prompt = f"""
You are a highly experienced human resource assistant who is proficient in the {industry} domain.

Your task is to compare a candidate's CV against a given Job Description (JD). 
Return ONLY valid JSON inside a fenced code block (```json ... ```). 
Do not output anything else.

Schema:
```json
{{
  "ats_score": 0,
  "ats_feedback": "Brief pros and cons summary of the CV for this role",
  "matched_skills": ["skill1", "skill2"],
  "missing_skills": ["skillX", "skillY"],
  "recommendations": ["action1", "action2"],
  "gap_analysis": ["gap1", "gap2"]
}}

Guidelines:

1. ats_score: A number from 0–10 representing overall alignment between CV and JD.
2. ats_feedback: Short feedback about why the score was given (strengths + weaknesses).
3. matched_skills: List of overlapping skills between CV and JD.
4. missing_skills: Skills required by JD but not present in CV.
5. recommendations: Concrete steps the candidate can take to improve the CV for this role.
6. gap_analysis: High-level gaps between the CV and JD (e.g., missing degree, limited experience, domain mismatch).
7. If a field has no data, return an empty list [] or empty string "".
8. Do not use placeholders, ellipses, or extra commentary.

Here is the candidate CV JSON:
{cv_json}

Here is the job description JSON:
{jd_json}
"""

In [29]:
cv_jd_comparision = llm_qwen.invoke(CD_vs_JD_prompt)

# Inspect the parsed CV
pprint.pprint(cv_jd_comparision)


('<think>\n'
 "Okay, let's tackle this comparison between the candidate's CV and the job "
 'description. First, I need to understand what the job is asking for. The '
 'position is a Machine Learning Engineer focused on real estate price '
 'prediction and investment potential. The responsibilities include developing '
 'models, handling heterogeneous data, NLP analysis, data management, '
 'dashboard creation, and collaboration with experts.\n'
 '\n'
 "Looking at the candidate's skills, they have Python, which is a must. They "
 'also list Scikit-learn, Pandas, NumPy, PyTorch, and TensorFlow, which are '
 'all in the required skills. However, the job mentions Dash, Plotly, and '
 'Streamlit for dashboards, and the candidate has experience with Power BI and '
 'FastAPI. Wait, the skills section mentions Dash? Let me check again. The '
 "skills_required in the JD include Dash, Plotly, Streamlit. The candidate's "
 'skills list includes "Data Tools & Databases: Power BI, FAISS, Chroma, 

In [30]:
def clean_text_into_json(raw_input):

    # 1. Remove <think> blocks if they exist
    cleaned_text = re.sub(r"<think>.*?</think>", "", raw_input, flags=re.DOTALL)

    # 2. Extract the JSON inside ```json ... ```
    match = re.search(r"```json\s*(\{.*?\})\s*```", cleaned_text, flags=re.DOTALL)
    if match:
        json_text = match.group(1)
    else:
        # fallback: extract from first { to last }
        start = cleaned_text.find("{")
        end = cleaned_text.rfind("}")
        json_text = cleaned_text[start:end+1]

    # 3. Load JSON
    try:
        cv_json = json.loads(json_text)
    except json.JSONDecodeError as e:
        print("Failed to parse JSON:", e)
        print("Raw JSON text:")
        print(json_text)
        cv_json = None
        
    return cv_json


In [31]:
clean_text_into_json(cv_jd_comparision)

{'ats_score': 6,
 'ats_feedback': 'The candidate demonstrates strong ML and Python skills, with experience in NLP and model deployment. However, lacks proficiency in dashboard tools (Dash/Plotly/Streamlit) and geospatial tools required for real estate analysis.',
 'matched_skills': ['Python',
  'Scikit-learn',
  'Pandas',
  'NumPy',
  'PyTorch',
  'TensorFlow'],
 'missing_skills': ['Dash',
  'Plotly',
  'Streamlit',
  'GeoPandas',
  'Shapely',
  'Kepler.gl',
  'PostgreSQL/PostGIS',
  'Elasticsearch'],
 'recommendations': ['Add specific projects using Dash/Plotly/Streamlit for dashboard creation',
  'Highlight geospatial data analysis experience or tools',
  'Mention familiarity with PostgreSQL/PostGIS or Elasticsearch'],
 'gap_analysis': ["Bachelor's in Banking and Finance (not Computer Science/Data Science)",
  'No experience with geospatial tools or real estate data analysis',
  'Lack of dashboard development expertise']}