In [3]:
import spacy
import re
from nltk.tokenize import word_tokenize

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Define custom tech skills list
tech_skills = [
    'Python', 'Java', 'C++', 'C', 'HTML', 'CSS', 'JavaScript', 'SQL', 'MongoDB',
    'TensorFlow', 'Keras', 'PyTorch', 'AWS', 'Docker', 'Machine Learning',
    'Deep Learning', 'Data Science', 'NLP', 'Flask', 'Django'
]

def extract_skills(text):
    """Extract tech skills from resume text"""
    tokens = word_tokenize(text)
    cleaned_tokens = [t.strip().capitalize() for t in tokens]
    detected = [skill for skill in tech_skills if skill.lower() in text.lower()]
    return list(set(detected))

def extract_entities(text):
    """Extract name, location, and organization"""
    doc = nlp(text)
    name, location, org = None, None, None

    # Try to capture full name if both first and last appear separately
    persons = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]
    if len(persons) >= 2:
        # Join if both are separate
        name = " ".join(persons[:2])
    elif len(persons) == 1:
        name = persons[0]

    # Try to capture location and organization
    for ent in doc.ents:
        if ent.label_ == "GPE" and location is None:
            location = ent.text
        elif ent.label_ == "ORG" and org is None:
            org = ent.text

    # Backup: try regex for location if spaCy misses it
    if location is None:
        loc_match = re.search(r"(Bangalore|Hyderabad|Mumbai|Delhi|Pune|Chennai|Kolkata)", text, re.I)
        if loc_match:
            location = loc_match.group(0)

    return name, location, org

# ---- Example Resume Text ----
resume_text = """
Akash Kumar is a skilled software engineer living in Bangalore.
He has previously worked at Google as a Machine Learning Engineer.
His technical expertise includes Python, TensorFlow, AWS, and Docker.
"""

# ---- Extract Entities and Skills ----
name, location, org = extract_entities(resume_text)
skills = extract_skills(resume_text)

# ---- Print Output ----
print("Name:", name)
print("Location:", location)
print("Organization:", org)
print("Detected Skills:", skills)


Name: Kumar Bangalore
Location: Bangalore
Organization: Google
Detected Skills: ['Python', 'TensorFlow', 'C', 'Docker', 'Machine Learning', 'AWS']
