## 1. Define Your Keyword/Phrase List
First, collect the terms you care about into two categories:

In [1]:
import re
import spacy
from spacy.matcher import PhraseMatcher

import scoring_processor as spr

with open("KANDACE_LOUDOR_new.md", encoding="utf-8") as f:
    raw_md = f.read()

### 2. Preprocess the Resume Text

In [2]:
# 2. 
clean_text = spr.normalize(raw_md)


In [3]:
required, optional = spr.load_keywords_from_config()

In [4]:
# sanity: any accidental overlap between required and optional keywords?
overlap = set(required) & set(optional)
if overlap:
    print("WARNING: overlap between words in required/optional:", overlap)

In [5]:
# --- match as unique sets ---
req_found = {t for t in required if t in clean_text}
opt_found = {t for t in optional if t in clean_text}

### 3. Scoring
- Find the "raw score" out of 5

In [11]:
raw_score, terms_that_were_found = spr.calculate_score(clean_text, required, optional)

print(f"Raw score: {final_score} out of 5")
print(f"Found keywords: {terms_that_were_found}")

Raw score: 3.5 out of 5
Found keywords: {'data scientist', 'pandas', 'keras', 'python', 'sql'}


- Now compute percentage and report

In [14]:
# compute percentage
max_points = len(required) * 1 + len(optional) * 0.5
percent = (raw_score / max_points) * 100 if max_points else 0

# 6. Report
print("Required matched:", sorted(req_found))
print("Optional matched:", sorted(opt_found))
print(f"Raw score: {raw_score} / {max_points}")
print(f"Percentage match: {percent:.1f}%")

Required matched: ['data scientist', 'python']
Optional matched: ['keras', 'pandas', 'sql']
Raw score: 3.5 / 13.0
Percentage match: 26.9%
