In [454]:
import pandas as pd
import numpy as np
from openai import OpenAI
import pdfminer
import ast
from pdfminer.high_level import extract_text
from collections import defaultdict
import re
from pyresparser import ResumeParser
import spacy
from spacy.matcher import Matcher
from rapidfuzz import fuzz, process
from spacy.matcher import PhraseMatcher
from dateutil import parser
from datetime import datetime
from skillNer.general_params import SKILL_DB
from skillNer.skill_extractor_class import SkillExtractor
from nltk.corpus import wordnet
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MultiLabelBinarizer
from sentence_transformers import SentenceTransformer, util
from sklearn.feature_extraction.text import TfidfVectorizer
import math

In [25]:
print(pdfminer.__version__)


20231228


1. Find a PDF scraper to scrape CV of student
2. Obtain metadata (Student Profile) of the student from the csv and store it in a dataframe
3. Obtain data of jobs


## Create a database of jobs (Create using synthetic dataset) --> Input this into a VectorDB

Job ID, Job Title, Industry, Required Education, Required Degree Field, Years of Experience, Hard Skills, Soft Skills


## Create for Student (Use a PDFreader on a resume, then use a NER model to fill the table)
Name, Education Level, Degree Field, Name of University, GPA, Work Experience, Hard Skill, Soft Skills, Learning Speed (Categories like Fast Medium Slow)

### Next Steps:
Explore User-Item Matrix Creation (k-NearestNeighbour)
Explore LLM with RAG
Explore Matrix Factorisation (Collaborative Filteriing)
Explore GNNs (Graph-Based Recommendation Systems)

Explore Clustering-Based Recommendation (Unsupervised ML)
Explore Semantic Similarity with Embeddings (Content-Based Filtering) # DONE
Explore K-Nearest Neighbours

### Create the list of jobs and the skill sets required for each job

In [None]:
list_jobs = [
    "Immigration Officer", "Judge", "Paralegal", "Legal Assistant", "Public Defender", "Urban Policy Planner",
    "Supply Chain Manager", "Logistics Coordinator", "Transportation Planner", "Warehouse Manager", "Freight Broker",
    "Truck Driver", "Airline Operations Manager", "Port Manager", "Inventory Manager", "Procurement Officer",
    "AI Product Manager", "Prompt Engineer (for AI)", "Data Privacy Officer", "Robotics Engineer", "VR/AR Developer",
    "Quantum Computing Researcher", "Drone Operator", "Space Tourism Guide", "Smart Home Technician", "Climate Tech Entrepreneur",
    "Blockchain Product Manager", "Autonomous Vehicle Engineer", "IoT (Internet of Things) Engineer", "Digital Twin Engineer", "Ethical Hacker"
]

def job_skill_creator(list_jobs):
    prompt = f"""
    You are a highly experienced headhunting specialist helping university students find suitable graduate jobs.

    You are tasked to create a synthetic dataset for 200 unique jobs. 
    Use the following list of jobs exactly as provided:
    {list_jobs}

    For each job, generate the following fields:
    - Job ID: A unique integer starting from 1.
    - Job Title: Exactly as given from the list (no changes).
    - Industry: Choose appropriately from these: ["Technology", "Finance", "Healthcare", "Arts and Media", "Education", "Human Resources and Operations", "Construction and Engineering", "Environmental and Sustainability", "Travel, Hospitality and Tourism", "Science and Research", "Retail and Sales", "Law and Government", "Transportation and Logistics", "Emerging Technologies"].
    - Required Education: Choose one of ["Bachelor", "Master", "None"].
    - Required Degree Field: Choose one of ["Computing", "Information Systems", "Arts and Social Sciences", "Science", "Engineering", "Design", "Business", "Medicine (Medicine and Nursing Students)"].
    - Years of Experience: Generate a realistic integer between 0 and 5.
    - Hard Skills: Provide a Python list of 2-5 relevant technical skills as strings (e.g., ["Python", "Data Analysis", "Cloud Computing"]).
    - Soft Skills: Provide a Python list of 2-5 soft skills as strings (e.g., ["Problem-Solving", "Communication", "Adaptability"]).

    **Important formatting rules**:
    - Output only the result as a **Python list of dictionaries**.
    - Each dictionary represents one job.
    - Do not add any explanation text. Only output the Python list object.
    - Make sure the skills and degree fields make logical sense based on the job.

    Example format for one job:

    {{
    "Job ID": 1,
    "Job Title": "Software Engineer",
    "Industry": "Technology",
    "Required Education": "Bachelor",
    "Required Degree Field": "Computing",
    "Years of Experience": 2,
    "Hard Skills": ["Python", "Software Development", "Version Control"],
    "Soft Skills": ["Problem-Solving", "Teamwork"]
    }}

    You must Start from Job ID 170 and must continue until Job ID 200.
    """



    client = OpenAI(
        api_key = "sk-06cb346d24fe4b36bd7b8b9fe91eaff0",
        base_url="https://api.deepseek.com")

    response = client.chat.completions.create(
        model = "deepseek-chat",
        messages = [
            {"role": "system", "content": "You are an extremely experienced head hunting specialist."},
            {"role": "user", "content": prompt},
        ],
        stream = False
    )

    return response.choices[0].message.content


In [19]:
job_data = [
    {
        "Job ID": 1,
        "Job Title": "Software Engineer",
        "Industry": "Technology",
        "Required Education": "Bachelor",
        "Required Degree Field": "Computing",
        "Years of Experience": 2,
        "Hard Skills": ["Python", "Software Development", "Version Control"],
        "Soft Skills": ["Problem-Solving", "Teamwork"]
    },
    {
        "Job ID": 2,
        "Job Title": "Frontend Developer",
        "Industry": "Technology",
        "Required Education": "Bachelor",
        "Required Degree Field": "Computing",
        "Years of Experience": 1,
        "Hard Skills": ["JavaScript", "HTML/CSS", "React"],
        "Soft Skills": ["Creativity", "Attention to Detail"]
    },
    {
        "Job ID": 3,
        "Job Title": "Backend Developer",
        "Industry": "Technology",
        "Required Education": "Bachelor",
        "Required Degree Field": "Computing",
        "Years of Experience": 2,
        "Hard Skills": ["Java", "SQL", "API Development"],
        "Soft Skills": ["Logical Thinking", "Collaboration"]
    },
    {
        "Job ID": 4,
        "Job Title": "Full Stack Developer",
        "Industry": "Technology",
        "Required Education": "Bachelor",
        "Required Degree Field": "Computing",
        "Years of Experience": 3,
        "Hard Skills": ["JavaScript", "Python", "Node.js"],
        "Soft Skills": ["Adaptability", "Communication"]
    },
    {
        "Job ID": 5,
        "Job Title": "DevOps Engineer",
        "Industry": "Technology",
        "Required Education": "Bachelor",
        "Required Degree Field": "Computing",
        "Years of Experience": 3,
        "Hard Skills": ["Docker", "Kubernetes", "CI/CD"],
        "Soft Skills": ["Problem-Solving", "Teamwork"]
    },
    {
        "Job ID": 6,
        "Job Title": "Machine Learning Engineer",
        "Industry": "Technology",
        "Required Education": "Master",
        "Required Degree Field": "Computing",
        "Years of Experience": 2,
        "Hard Skills": ["Python", "TensorFlow", "Data Modeling"],
        "Soft Skills": ["Analytical Thinking", "Creativity"]
    },
    {
        "Job ID": 7,
        "Job Title": "Data Scientist",
        "Industry": "Technology",
        "Required Education": "Master",
        "Required Degree Field": "Computing",
        "Years of Experience": 2,
        "Hard Skills": ["Python", "Machine Learning", "Data Visualization"],
        "Soft Skills": ["Critical Thinking", "Communication"]
    },
    {
        "Job ID": 8,
        "Job Title": "Data Analyst",
        "Industry": "Technology",
        "Required Education": "Bachelor",
        "Required Degree Field": "Computing",
        "Years of Experience": 1,
        "Hard Skills": ["SQL", "Excel", "Tableau"],
        "Soft Skills": ["Attention to Detail", "Problem-Solving"]
    },
    {
        "Job ID": 9,
        "Job Title": "Data Engineer",
        "Industry": "Technology",
        "Required Education": "Bachelor",
        "Required Degree Field": "Computing",
        "Years of Experience": 2,
        "Hard Skills": ["SQL", "ETL", "Big Data"],
        "Soft Skills": ["Collaboration", "Logical Thinking"]
    },
    {
        "Job ID": 10,
        "Job Title": "Cloud Architect",
        "Industry": "Technology",
        "Required Education": "Master",
        "Required Degree Field": "Computing",
        "Years of Experience": 4,
        "Hard Skills": ["AWS", "Azure", "Cloud Security"],
        "Soft Skills": ["Strategic Thinking", "Leadership"]
    },
    {
        "Job ID": 11,
        "Job Title": "Cybersecurity Analyst",
        "Industry": "Technology",
        "Required Education": "Bachelor",
        "Required Degree Field": "Computing",
        "Years of Experience": 2,
        "Hard Skills": ["Network Security", "Ethical Hacking", "SIEM"],
        "Soft Skills": ["Attention to Detail", "Problem-Solving"]
    },
    {
        "Job ID": 12,
        "Job Title": "AI Researcher",
        "Industry": "Science and Research",
        "Required Education": "Master",
        "Required Degree Field": "Computing",
        "Years of Experience": 3,
        "Hard Skills": ["Python", "Deep Learning", "Natural Language Processing"],
        "Soft Skills": ["Innovation", "Critical Thinking"]
    },
    {
        "Job ID": 13,
        "Job Title": "Blockchain Developer",
        "Industry": "Emerging Technologies",
        "Required Education": "Bachelor",
        "Required Degree Field": "Computing",
        "Years of Experience": 2,
        "Hard Skills": ["Solidity", "Smart Contracts", "Ethereum"],
        "Soft Skills": ["Problem-Solving", "Collaboration"]
    },
    {
        "Job ID": 14,
        "Job Title": "Web Developer",
        "Industry": "Technology",
        "Required Education": "Bachelor",
        "Required Degree Field": "Computing",
        "Years of Experience": 1,
        "Hard Skills": ["HTML/CSS", "JavaScript", "PHP"],
        "Soft Skills": ["Creativity", "Adaptability"]
    },
    {
        "Job ID": 15,
        "Job Title": "Game Developer",
        "Industry": "Arts and Media",
        "Required Education": "Bachelor",
        "Required Degree Field": "Computing",
        "Years of Experience": 2,
        "Hard Skills": ["Unity", "C#", "3D Modeling"],
        "Soft Skills": ["Creativity", "Teamwork"]
    },
    {
        "Job ID": 16,
        "Job Title": "UX Designer",
        "Industry": "Technology",
        "Required Education": "Bachelor",
        "Required Degree Field": "Design",
        "Years of Experience": 2,
        "Hard Skills": ["Figma", "User Research", "Wireframing"],
        "Soft Skills": ["Empathy", "Communication"]
    },
    {
        "Job ID": 17,
        "Job Title": "UI Designer",
        "Industry": "Technology",
        "Required Education": "Bachelor",
        "Required Degree Field": "Design",
        "Years of Experience": 1,
        "Hard Skills": ["Adobe XD", "Typography", "Color Theory"],
        "Soft Skills": ["Creativity", "Attention to Detail"]
    },
    {
        "Job ID": 18,
        "Job Title": "Product Manager (Tech)",
        "Industry": "Technology",
        "Required Education": "Master",
        "Required Degree Field": "Business",
        "Years of Experience": 4,
        "Hard Skills": ["Agile Methodology", "Product Roadmapping", "Market Research"],
        "Soft Skills": ["Leadership", "Communication"]
    },
    {
        "Job ID": 19,
        "Job Title": "Mobile App Developer",
        "Industry": "Technology",
        "Required Education": "Bachelor",
        "Required Degree Field": "Computing",
        "Years of Experience": 2,
        "Hard Skills": ["Swift", "Kotlin", "React Native"],
        "Soft Skills": ["Problem-Solving", "Adaptability"]
    },
    {
        "Job ID": 20,
        "Job Title": "Embedded Systems Engineer",
        "Industry": "Technology",
        "Required Education": "Bachelor",
        "Required Degree Field": "Engineering",
        "Years of Experience": 3,
        "Hard Skills": ["C/C++", "Microcontrollers", "RTOS"],
        "Soft Skills": ["Attention to Detail", "Analytical Thinking"]
    },
    {
        "Job ID": 21,
        "Job Title": "Financial Analyst",
        "Industry": "Finance",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 2,
        "Hard Skills": ["Excel", "Financial Modeling", "Data Analysis"],
        "Soft Skills": ["Analytical Thinking", "Communication"]
    },
    {
        "Job ID": 22,
        "Job Title": "Investment Banker",
        "Industry": "Finance",
        "Required Education": "Master",
        "Required Degree Field": "Business",
        "Years of Experience": 3,
        "Hard Skills": ["Financial Modeling", "Valuation", "M&A"],
        "Soft Skills": ["Negotiation", "Attention to Detail"]
    },
    {
        "Job ID": 23,
        "Job Title": "Management Consultant",
        "Industry": "Finance",
        "Required Education": "Master",
        "Required Degree Field": "Business",
        "Years of Experience": 3,
        "Hard Skills": ["Strategic Planning", "Market Analysis", "Project Management"],
        "Soft Skills": ["Problem-Solving", "Communication"]
    },
    {
        "Job ID": 24,
        "Job Title": "Auditor",
        "Industry": "Finance",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 2,
        "Hard Skills": ["Accounting", "Risk Assessment", "Audit Software"],
        "Soft Skills": ["Attention to Detail", "Integrity"]
    },
    {
        "Job ID": 25,
        "Job Title": "Accountant",
        "Industry": "Finance",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 1,
        "Hard Skills": ["Bookkeeping", "Tax Preparation", "Financial Reporting"],
        "Soft Skills": ["Accuracy", "Organization"]
    },
    {
        "Job ID": 26,
        "Job Title": "Risk Analyst",
        "Industry": "Finance",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 2,
        "Hard Skills": ["Risk Modeling", "Data Analysis", "Statistics"],
        "Soft Skills": ["Critical Thinking", "Attention to Detail"]
    },
    {
        "Job ID": 27,
        "Job Title": "Actuary",
        "Industry": "Finance",
        "Required Education": "Master",
        "Required Degree Field": "Science",
        "Years of Experience": 3,
        "Hard Skills": ["Statistical Modeling", "Probability", "Financial Mathematics"],
        "Soft Skills": ["Analytical Thinking", "Problem-Solving"]
    },
    {
        "Job ID": 28,
        "Job Title": "Tax Consultant",
        "Industry": "Finance",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 2,
        "Hard Skills": ["Tax Law", "Financial Planning", "Compliance"],
        "Soft Skills": ["Communication", "Attention to Detail"]
    },
    {
        "Job ID": 29,
        "Job Title": "Corporate Lawyer",
        "Industry": "Law and Government",
        "Required Education": "Master",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 4,
        "Hard Skills": ["Contract Law", "Corporate Governance", "Legal Research"],
        "Soft Skills": ["Negotiation", "Critical Thinking"]
    },
    {
        "Job ID": 30,
        "Job Title": "Compliance Officer",
        "Industry": "Law and Government",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 2,
        "Hard Skills": ["Regulatory Compliance", "Risk Management", "Auditing"],
        "Soft Skills": ["Attention to Detail", "Integrity"]
    },
    {
        "Job ID": 31,
        "Job Title": "Business Development Manager",
        "Industry": "Business",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 3,
        "Hard Skills": ["Market Research", "Sales Strategies", "CRM"],
        "Soft Skills": ["Negotiation", "Communication"]
    },
    {
        "Job ID": 32,
        "Job Title": "Marketing Analyst",
        "Industry": "Retail and Sales",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 1,
        "Hard Skills": ["Data Analysis", "SEO", "Google Analytics"],
        "Soft Skills": ["Creativity", "Communication"]
    },
    {
        "Job ID": 33,
        "Job Title": "Project Manager",
        "Industry": "Business",
        "Required Education": "Master",
        "Required Degree Field": "Business",
        "Years of Experience": 3,
        "Hard Skills": ["Agile Methodology", "Budgeting", "Stakeholder Management"],
        "Soft Skills": ["Leadership", "Organization"]
    },
    {
        "Job ID": 34,
        "Job Title": "Venture Capital Analyst",
        "Industry": "Finance",
        "Required Education": "Master",
        "Required Degree Field": "Business",
        "Years of Experience": 2,
        "Hard Skills": ["Financial Modeling", "Due Diligence", "Market Analysis"],
        "Soft Skills": ["Analytical Thinking", "Communication"]
    },
    {
        "Job ID": 35,
        "Job Title": "Fund Manager",
        "Industry": "Finance",
        "Required Education": "Master",
        "Required Degree Field": "Business",
        "Years of Experience": 5,
        "Hard Skills": ["Portfolio Management", "Investment Strategies", "Risk Assessment"],
        "Soft Skills": ["Decision-Making", "Leadership"]
    },
    {
        "Job ID": 36,
        "Job Title": "Sales Executive",
        "Industry": "Retail and Sales",
        "Required Education": "None",
        "Required Degree Field": "Business",
        "Years of Experience": 1,
        "Hard Skills": ["CRM", "Negotiation", "Market Research"],
        "Soft Skills": ["Communication", "Persuasion"]
    },
    {
        "Job ID": 37,
        "Job Title": "Entrepreneur / Startup Founder",
        "Industry": "Business",
        "Required Education": "None",
        "Required Degree Field": "Business",
        "Years of Experience": 3,
        "Hard Skills": ["Business Planning", "Fundraising", "Product Development"],
        "Soft Skills": ["Leadership", "Risk-Taking"]
    },
    {
        "Job ID": 38,
        "Job Title": "Strategy Consultant",
        "Industry": "Business",
        "Required Education": "Master",
        "Required Degree Field": "Business",
        "Years of Experience": 4,
        "Hard Skills": ["Market Analysis", "Competitive Research", "Business Modeling"],
        "Soft Skills": ["Critical Thinking", "Communication"]
    },
    {
        "Job ID": 39,
        "Job Title": "Business Analyst",
        "Industry": "Business",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 2,
        "Hard Skills": ["Data Analysis", "Process Improvement", "Requirement Gathering"],
        "Soft Skills": ["Problem-Solving", "Communication"]
    },
    {
        "Job ID": 40,
        "Job Title": "Real Estate Analyst",
        "Industry": "Finance",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 2,
        "Hard Skills": ["Market Research", "Financial Modeling", "Property Valuation"],
        "Soft Skills": ["Analytical Thinking", "Attention to Detail"]
    },
    {
        "Job ID": 41,
        "Job Title": "Doctor (General Practitioner)",
        "Industry": "Healthcare",
        "Required Education": "Master",
        "Required Degree Field": "Medicine (Medicine and Nursing Students)",
        "Years of Experience": 3,
        "Hard Skills": ["Diagnosis", "Patient Care", "Medical Knowledge"],
        "Soft Skills": ["Empathy", "Communication"]
    },
    {
        "Job ID": 42,
        "Job Title": "Surgeon",
        "Industry": "Healthcare",
        "Required Education": "Master",
        "Required Degree Field": "Medicine (Medicine and Nursing Students)",
        "Years of Experience": 5,
        "Hard Skills": ["Surgical Procedures", "Anatomy", "Patient Management"],
        "Soft Skills": ["Precision", "Stress Management"]
    },
    {
        "Job ID": 43,
        "Job Title": "Nurse",
        "Industry": "Healthcare",
        "Required Education": "Bachelor",
        "Required Degree Field": "Medicine (Medicine and Nursing Students)",
        "Years of Experience": 2,
        "Hard Skills": ["Patient Care", "Medication Administration", "First Aid"],
        "Soft Skills": ["Compassion", "Teamwork"]
    },
    {
        "Job ID": 44,
        "Job Title": "Dentist",
        "Industry": "Healthcare",
        "Required Education": "Master",
        "Required Degree Field": "Medicine (Medicine and Nursing Students)",
        "Years of Experience": 3,
        "Hard Skills": ["Dental Procedures", "Oral Health", "Patient Education"],
        "Soft Skills": ["Manual Dexterity", "Communication"]
    },
    {
        "Job ID": 45,
        "Job Title": "Pharmacist",
        "Industry": "Healthcare",
        "Required Education": "Master",
        "Required Degree Field": "Medicine (Medicine and Nursing Students)",
        "Years of Experience": 2,
        "Hard Skills": ["Medication Management", "Pharmaceutical Knowledge", "Dosage Calculation"],
        "Soft Skills": ["Attention to Detail", "Customer Service"]
    },
    {
        "Job ID": 46,
        "Job Title": "Physiotherapist",
        "Industry": "Healthcare",
        "Required Education": "Bachelor",
        "Required Degree Field": "Medicine (Medicine and Nursing Students)",
        "Years of Experience": 2,
        "Hard Skills": ["Rehabilitation Techniques", "Exercise Prescription", "Patient Assessment"],
        "Soft Skills": ["Empathy", "Patience"]
    },
    {
        "Job ID": 47,
        "Job Title": "Radiologist",
        "Industry": "Healthcare",
        "Required Education": "Master",
        "Required Degree Field": "Medicine (Medicine and Nursing Students)",
        "Years of Experience": 4,
        "Hard Skills": ["Medical Imaging", "Diagnosis", "Radiation Safety"],
        "Soft Skills": ["Analytical Thinking", "Attention to Detail"]
    },
    {
        "Job ID": 48,
        "Job Title": "Occupational Therapist",
        "Industry": "Healthcare",
        "Required Education": "Master",
        "Required Degree Field": "Medicine (Medicine and Nursing Students)",
        "Years of Experience": 3,
        "Hard Skills": ["Patient Assessment", "Rehabilitation Techniques", "Therapeutic Equipment"],
        "Soft Skills": ["Empathy", "Communication", "Patience"]
    },
    {
        "Job ID": 49,
        "Job Title": "Clinical Research Associate",
        "Industry": "Healthcare",
        "Required Education": "Bachelor",
        "Required Degree Field": "Science",
        "Years of Experience": 2,
        "Hard Skills": ["Clinical Trials", "Data Collection", "Regulatory Compliance"],
        "Soft Skills": ["Attention to Detail", "Organization", "Teamwork"]
    },
    {
        "Job ID": 50,
        "Job Title": "Medical Laboratory Technologist",
        "Industry": "Healthcare",
        "Required Education": "Bachelor",
        "Required Degree Field": "Science",
        "Years of Experience": 1,
        "Hard Skills": ["Lab Testing", "Microscopy", "Data Analysis"],
        "Soft Skills": ["Precision", "Time Management", "Collaboration"]
    },
    {
        "Job ID": 51,
        "Job Title": "Paramedic",
        "Industry": "Healthcare",
        "Required Education": "Bachelor",
        "Required Degree Field": "Medicine (Medicine and Nursing Students)",
        "Years of Experience": 2,
        "Hard Skills": ["Emergency Care", "CPR", "Medical Equipment"],
        "Soft Skills": ["Calm Under Pressure", "Quick Decision-Making", "Compassion"]
    },
    {
        "Job ID": 52,
        "Job Title": "Psychiatrist",
        "Industry": "Healthcare",
        "Required Education": "Master",
        "Required Degree Field": "Medicine (Medicine and Nursing Students)",
        "Years of Experience": 5,
        "Hard Skills": ["Diagnosis", "Psychotherapy", "Medication Management"],
        "Soft Skills": ["Active Listening", "Empathy", "Patience"]
    },
    {
        "Job ID": 53,
        "Job Title": "Psychologist",
        "Industry": "Healthcare",
        "Required Education": "Master",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 3,
        "Hard Skills": ["Psychological Testing", "Counseling", "Behavioral Analysis"],
        "Soft Skills": ["Empathy", "Communication", "Critical Thinking"]
    },
    {
        "Job ID": 54,
        "Job Title": "Medical Sales Representative",
        "Industry": "Healthcare",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 2,
        "Hard Skills": ["Sales Techniques", "Product Knowledge", "CRM Software"],
        "Soft Skills": ["Negotiation", "Communication", "Persuasion"]
    },
    {
        "Job ID": 55,
        "Job Title": "Healthcare Administrator",
        "Industry": "Healthcare",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 4,
        "Hard Skills": ["Healthcare Regulations", "Budget Management", "Staff Scheduling"],
        "Soft Skills": ["Leadership", "Organization", "Problem-Solving"]
    },
    {
        "Job ID": 56,
        "Job Title": "Veterinary Surgeon",
        "Industry": "Healthcare",
        "Required Education": "Master",
        "Required Degree Field": "Medicine (Medicine and Nursing Students)",
        "Years of Experience": 3,
        "Hard Skills": ["Surgical Procedures", "Animal Diagnosis", "Anesthesia"],
        "Soft Skills": ["Compassion", "Attention to Detail", "Patience"]
    },
    {
        "Job ID": 57,
        "Job Title": "Nutritionist",
        "Industry": "Healthcare",
        "Required Education": "Bachelor",
        "Required Degree Field": "Science",
        "Years of Experience": 1,
        "Hard Skills": ["Diet Planning", "Nutritional Assessment", "Food Science"],
        "Soft Skills": ["Communication", "Empathy", "Motivation"]
    },
    {
        "Job ID": 58,
        "Job Title": "Optometrist",
        "Industry": "Healthcare",
        "Required Education": "Master",
        "Required Degree Field": "Medicine (Medicine and Nursing Students)",
        "Years of Experience": 2,
        "Hard Skills": ["Eye Examinations", "Contact Lens Fitting", "Vision Therapy"],
        "Soft Skills": ["Patient Care", "Attention to Detail", "Communication"]
    },
    {
        "Job ID": 59,
        "Job Title": "Public Health Officer",
        "Industry": "Healthcare",
        "Required Education": "Master",
        "Required Degree Field": "Science",
        "Years of Experience": 4,
        "Hard Skills": ["Epidemiology", "Health Policy", "Disease Prevention"],
        "Soft Skills": ["Leadership", "Communication", "Critical Thinking"]
    },
    {
        "Job ID": 60,
        "Job Title": "Medical Technologist",
        "Industry": "Healthcare",
        "Required Education": "Bachelor",
        "Required Degree Field": "Science",
        "Years of Experience": 2,
        "Hard Skills": ["Lab Testing", "Diagnostic Equipment", "Data Analysis"],
        "Soft Skills": ["Attention to Detail", "Teamwork", "Problem-Solving"]
    },
    {
        "Job ID": 61,
        "Job Title": "Graphic Designer",
        "Industry": "Arts and Media",
        "Required Education": "Bachelor",
        "Required Degree Field": "Design",
        "Years of Experience": 1,
        "Hard Skills": ["Adobe Creative Suite", "Typography", "Branding"],
        "Soft Skills": ["Creativity", "Communication", "Time Management"]
    },
    {
        "Job ID": 62,
        "Job Title": "Animator",
        "Industry": "Arts and Media",
        "Required Education": "Bachelor",
        "Required Degree Field": "Design",
        "Years of Experience": 2,
        "Hard Skills": ["2D/3D Animation", "Storyboarding", "Motion Graphics"],
        "Soft Skills": ["Creativity", "Collaboration", "Attention to Detail"]
    },
    {
        "Job ID": 63,
        "Job Title": "Illustrator",
        "Industry": "Arts and Media",
        "Required Education": "Bachelor",
        "Required Degree Field": "Design",
        "Years of Experience": 1,
        "Hard Skills": ["Digital Illustration", "Sketching", "Concept Art"],
        "Soft Skills": ["Creativity", "Adaptability", "Attention to Detail"]
    },
    {
        "Job ID": 64,
        "Job Title": "Video Editor",
        "Industry": "Arts and Media",
        "Required Education": "Bachelor",
        "Required Degree Field": "Design",
        "Years of Experience": 2,
        "Hard Skills": ["Adobe Premiere", "Color Grading", "Sound Editing"],
        "Soft Skills": ["Creativity", "Time Management", "Collaboration"]
    },
    {
        "Job ID": 65,
        "Job Title": "Film Director",
        "Industry": "Arts and Media",
        "Required Education": "Bachelor",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 5,
        "Hard Skills": ["Storytelling", "Cinematography", "Script Analysis"],
        "Soft Skills": ["Leadership", "Creativity", "Communication"]
    },
    {
        "Job ID": 66,
        "Job Title": "Music Producer",
        "Industry": "Arts and Media",
        "Required Education": "Bachelor",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 3,
        "Hard Skills": ["Audio Mixing", "MIDI Programming", "Sound Design"],
        "Soft Skills": ["Creativity", "Collaboration", "Attention to Detail"]
    },
    {
        "Job ID": 67,
        "Job Title": "Sound Engineer",
        "Industry": "Arts and Media",
        "Required Education": "Bachelor",
        "Required Degree Field": "Engineering",
        "Years of Experience": 2,
        "Hard Skills": ["Audio Editing", "Live Sound Mixing", "Acoustics"],
        "Soft Skills": ["Attention to Detail", "Problem-Solving", "Teamwork"]
    },
    {
        "Job ID": 68,
        "Job Title": "Photographer",
        "Industry": "Arts and Media",
        "Required Education": "Bachelor",
        "Required Degree Field": "Design",
        "Years of Experience": 1,
        "Hard Skills": ["Photo Editing", "Lighting Techniques", "Composition"],
        "Soft Skills": ["Creativity", "Attention to Detail", "Adaptability"]
    },
    {
        "Job ID": 69,
        "Job Title": "Content Creator",
        "Industry": "Arts and Media",
        "Required Education": "Bachelor",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 1,
        "Hard Skills": ["Video Production", "Social Media Management", "Copywriting"],
        "Soft Skills": ["Creativity", "Communication", "Adaptability"]
    },
    {
        "Job ID": 70,
        "Job Title": "Copywriter",
        "Industry": "Arts and Media",
        "Required Education": "Bachelor",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 2,
        "Hard Skills": ["SEO Writing", "Brand Messaging", "Content Strategy"],
        "Soft Skills": ["Creativity", "Communication", "Research"]
    },
    {
        "Job ID": 71,
        "Job Title": "Journalist",
        "Industry": "Arts and Media",
        "Required Education": "Bachelor",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 2,
        "Hard Skills": ["Investigative Reporting", "Interviewing", "Fact-Checking"],
        "Soft Skills": ["Communication", "Curiosity", "Adaptability"]
    },
    {
        "Job ID": 72,
        "Job Title": "Screenwriter",
        "Industry": "Arts and Media",
        "Required Education": "Bachelor",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 3,
        "Hard Skills": ["Scriptwriting", "Story Development", "Dialogue Writing"],
        "Soft Skills": ["Creativity", "Persistence", "Collaboration"]
    },
    {
        "Job ID": 73,
        "Job Title": "Social Media Manager",
        "Industry": "Arts and Media",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 2,
        "Hard Skills": ["Content Scheduling", "Analytics", "Campaign Management"],
        "Soft Skills": ["Creativity", "Communication", "Adaptability"]
    },
    {
        "Job ID": 74,
        "Job Title": "Brand Strategist",
        "Industry": "Arts and Media",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 3,
        "Hard Skills": ["Market Research", "Brand Positioning", "Consumer Insights"],
        "Soft Skills": ["Strategic Thinking", "Communication", "Creativity"]
    },
    {
        "Job ID": 75,
        "Job Title": "Art Director",
        "Industry": "Arts and Media",
        "Required Education": "Bachelor",
        "Required Degree Field": "Design",
        "Years of Experience": 4,
        "Hard Skills": ["Visual Storytelling", "Creative Direction", "Branding"],
        "Soft Skills": ["Leadership", "Creativity", "Collaboration"]
    },
    {
        "Job ID": 76,
        "Job Title": "Creative Director",
        "Industry": "Arts and Media",
        "Required Education": "Bachelor",
        "Required Degree Field": "Design",
        "Years of Experience": 5,
        "Hard Skills": ["Campaign Development", "Creative Strategy", "Team Leadership"],
        "Soft Skills": ["Visionary Thinking", "Leadership", "Communication"]
    },
    {
        "Job ID": 77,
        "Job Title": "Game Designer",
        "Industry": "Arts and Media",
        "Required Education": "Bachelor",
        "Required Degree Field": "Design",
        "Years of Experience": 2,
        "Hard Skills": ["Game Mechanics", "Level Design", "Prototyping"],
        "Soft Skills": ["Creativity", "Problem-Solving", "Collaboration"]
    },
    {
        "Job ID": 78,
        "Job Title": "Fashion Designer",
        "Industry": "Arts and Media",
        "Required Education": "Bachelor",
        "Required Degree Field": "Design",
        "Years of Experience": 3,
        "Hard Skills": ["Pattern Making", "Textile Knowledge", "Fashion Illustration"],
        "Soft Skills": ["Creativity", "Attention to Detail", "Trend Awareness"]
    },
    {
        "Job ID": 79,
        "Job Title": "Interior Designer",
        "Industry": "Arts and Media",
        "Required Education": "Bachelor",
        "Required Degree Field": "Design",
        "Years of Experience": 2,
        "Hard Skills": ["Space Planning", "3D Rendering", "Material Selection"],
        "Soft Skills": ["Creativity", "Communication", "Problem-Solving"]
    },
    {
        "Job ID": 80,
        "Job Title": "Web Content Manager",
        "Industry": "Technology",
        "Required Education": "Bachelor",
        "Required Degree Field": "Computing",
        "Years of Experience": 2,
        "Hard Skills": ["Content Management Systems", "SEO", "HTML/CSS"],
        "Soft Skills": ["Communication", "Organization", "Attention to Detail"]
    },
    {
        "Job ID": 81,
        "Job Title": "University Lecturer",
        "Industry": "Education",
        "Required Education": "Master",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 4,
        "Hard Skills": ["Curriculum Development", "Academic Research", "Lecture Delivery"],
        "Soft Skills": ["Communication", "Mentorship", "Public Speaking"]
    },
    {
        "Job ID": 82,
        "Job Title": "Primary School Teacher",
        "Industry": "Education",
        "Required Education": "Bachelor",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 2,
        "Hard Skills": ["Lesson Planning", "Classroom Management", "Child Development"],
        "Soft Skills": ["Patience", "Communication", "Empathy"]
    },
    {
        "Job ID": 83,
        "Job Title": "Secondary School Teacher",
        "Industry": "Education",
        "Required Education": "Bachelor",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 3,
        "Hard Skills": ["Subject Expertise", "Assessment Design", "Classroom Management"],
        "Soft Skills": ["Communication", "Patience", "Leadership"]
    },
    {
        "Job ID": 84,
        "Job Title": "Curriculum Developer",
        "Industry": "Education",
        "Required Education": "Master",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 3,
        "Hard Skills": ["Educational Standards", "Instructional Design", "Assessment Strategies"],
        "Soft Skills": ["Collaboration", "Creativity", "Problem-Solving"]
    },
    {
        "Job ID": 85,
        "Job Title": "Research Scientist",
        "Industry": "Science and Research",
        "Required Education": "Master",
        "Required Degree Field": "Science",
        "Years of Experience": 4,
        "Hard Skills": ["Experimental Design", "Data Analysis", "Scientific Writing"],
        "Soft Skills": ["Critical Thinking", "Curiosity", "Persistence"]
    },
    {
        "Job ID": 86,
        "Job Title": "Librarian",
        "Industry": "Education",
        "Required Education": "Master",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 2,
        "Hard Skills": ["Cataloging", "Information Retrieval", "Digital Archives"],
        "Soft Skills": ["Organization", "Customer Service", "Attention to Detail"]
    },
    {
        "Job ID": 87,
        "Job Title": "Instructional Designer",
        "Industry": "Education",
        "Required Education": "Master",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 3,
        "Hard Skills": ["E-Learning Development", "Learning Management Systems", "Assessment Design"],
        "Soft Skills": ["Collaboration", "Creativity", "Problem-Solving"]
    },
    {
        "Job ID": 88,
        "Job Title": "Special Education Teacher",
        "Industry": "Education",
        "Required Education": "Bachelor",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 3,
        "Hard Skills": ["Classroom Management", "Individualized Education Plans", "Behavioral Interventions"],
        "Soft Skills": ["Patience", "Empathy", "Communication"]
    },
    {
        "Job ID": 89,
        "Job Title": "Academic Advisor",
        "Industry": "Education",
        "Required Education": "Master",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 2,
        "Hard Skills": ["Academic Planning", "Student Counseling", "Degree Audits"],
        "Soft Skills": ["Active Listening", "Interpersonal Skills", "Problem-Solving"]
    },
    {
        "Job ID": 90,
        "Job Title": "Training Specialist",
        "Industry": "Human Resources and Operations",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 4,
        "Hard Skills": ["Curriculum Development", "Learning Management Systems", "Training Delivery"],
        "Soft Skills": ["Presentation Skills", "Adaptability", "Collaboration"]
    },
    {
        "Job ID": 91,
        "Job Title": "Learning and Development Manager",
        "Industry": "Human Resources and Operations",
        "Required Education": "Master",
        "Required Degree Field": "Business",
        "Years of Experience": 5,
        "Hard Skills": ["Training Strategy", "Performance Metrics", "E-Learning Tools"],
        "Soft Skills": ["Leadership", "Strategic Thinking", "Communication"]
    },
    {
        "Job ID": 92,
        "Job Title": "E-learning Specialist",
        "Industry": "Education",
        "Required Education": "Bachelor",
        "Required Degree Field": "Information Systems",
        "Years of Experience": 3,
        "Hard Skills": ["Learning Management Systems", "Instructional Design", "Multimedia Development"],
        "Soft Skills": ["Creativity", "Attention to Detail", "Problem-Solving"]
    },
    {
        "Job ID": 93,
        "Job Title": "Test Developer",
        "Industry": "Education",
        "Required Education": "Bachelor",
        "Required Degree Field": "Science",
        "Years of Experience": 2,
        "Hard Skills": ["Assessment Design", "Psychometrics", "Data Analysis"],
        "Soft Skills": ["Analytical Thinking", "Attention to Detail", "Collaboration"]
    },
    {
        "Job ID": 94,
        "Job Title": "Science Educator",
        "Industry": "Education",
        "Required Education": "Bachelor",
        "Required Degree Field": "Science",
        "Years of Experience": 1,
        "Hard Skills": ["Lesson Planning", "Laboratory Management", "STEM Education"],
        "Soft Skills": ["Enthusiasm", "Communication", "Patience"]
    },
    {
        "Job ID": 95,
        "Job Title": "Language Instructor",
        "Industry": "Education",
        "Required Education": "Bachelor",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 2,
        "Hard Skills": ["Language Proficiency", "Curriculum Development", "Cultural Knowledge"],
        "Soft Skills": ["Patience", "Communication", "Adaptability"]
    },
    {
        "Job ID": 96,
        "Job Title": "College Admissions Officer",
        "Industry": "Education",
        "Required Education": "Bachelor",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 3,
        "Hard Skills": ["Application Review", "Recruitment Strategies", "CRM Software"],
        "Soft Skills": ["Interpersonal Skills", "Decision-Making", "Organization"]
    },
    {
        "Job ID": 97,
        "Job Title": "Early Childhood Educator",
        "Industry": "Education",
        "Required Education": "Bachelor",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 2,
        "Hard Skills": ["Child Development", "Classroom Management", "Early Literacy"],
        "Soft Skills": ["Patience", "Creativity", "Empathy"]
    },
    {
        "Job ID": 98,
        "Job Title": "Education Policy Analyst",
        "Industry": "Education",
        "Required Education": "Master",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 4,
        "Hard Skills": ["Policy Research", "Data Analysis", "Report Writing"],
        "Soft Skills": ["Critical Thinking", "Communication", "Problem-Solving"]
    },
    {
        "Job ID": 99,
        "Job Title": "Tutor",
        "Industry": "Education",
        "Required Education": "None",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 1,
        "Hard Skills": ["Subject Matter Expertise", "Teaching Techniques", "Assessment"],
        "Soft Skills": ["Patience", "Communication", "Adaptability"]
    },
    {
        "Job ID": 100,
        "Job Title": "Career Counselor",
        "Industry": "Human Resources and Operations",
        "Required Education": "Master",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 3,
        "Hard Skills": ["Career Assessments", "Resume Writing", "Interview Coaching"],
        "Soft Skills": ["Empathy", "Active Listening", "Interpersonal Skills"]
    },
    {
        "Job ID": 101,
        "Job Title": "Human Resources Manager",
        "Industry": "Human Resources and Operations",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 5,
        "Hard Skills": ["Employee Relations", "HR Policies", "Performance Management"],
        "Soft Skills": ["Leadership", "Conflict Resolution", "Communication"]
    },
    {
        "Job ID": 102,
        "Job Title": "Recruiter",
        "Industry": "Human Resources and Operations",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 2,
        "Hard Skills": ["Talent Sourcing", "Applicant Tracking Systems", "Interviewing"],
        "Soft Skills": ["Networking", "Communication", "Persuasion"]
    },
    {
        "Job ID": 103,
        "Job Title": "Talent Acquisition Specialist",
        "Industry": "Human Resources and Operations",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 3,
        "Hard Skills": ["Recruitment Strategies", "Employer Branding", "Candidate Assessment"],
        "Soft Skills": ["Relationship Building", "Negotiation", "Communication"]
    },
    {
        "Job ID": 104,
        "Job Title": "Compensation and Benefits Manager",
        "Industry": "Human Resources and Operations",
        "Required Education": "Master",
        "Required Degree Field": "Business",
        "Years of Experience": 5,
        "Hard Skills": ["Salary Benchmarking", "Benefits Administration", "Payroll Systems"],
        "Soft Skills": ["Analytical Thinking", "Attention to Detail", "Communication"]
    },
    {
        "Job ID": 105,
        "Job Title": "Organizational Development Consultant",
        "Industry": "Human Resources and Operations",
        "Required Education": "Master",
        "Required Degree Field": "Business",
        "Years of Experience": 4,
        "Hard Skills": ["Change Management", "Leadership Training", "Employee Engagement"],
        "Soft Skills": ["Strategic Thinking", "Problem-Solving", "Interpersonal Skills"]
    },
    {
        "Job ID": 106,
        "Job Title": "Office Manager",
        "Industry": "Human Resources and Operations",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 3,
        "Hard Skills": ["Office Administration", "Budget Management", "Scheduling"],
        "Soft Skills": ["Organization", "Communication", "Multitasking"]
    },
    {
        "Job ID": 107,
        "Job Title": "Executive Assistant",
        "Industry": "Human Resources and Operations",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 2,
        "Hard Skills": ["Calendar Management", "Travel Coordination", "Document Preparation"],
        "Soft Skills": ["Discretion", "Time Management", "Communication"]
    },
    {
        "Job ID": 108,
        "Job Title": "Administrative Assistant",
        "Industry": "Human Resources and Operations",
        "Required Education": "None",
        "Required Degree Field": "Business",
        "Years of Experience": 1,
        "Hard Skills": ["Data Entry", "Filing Systems", "Office Software"],
        "Soft Skills": ["Organization", "Communication", "Attention to Detail"]
    },
    {
        "Job ID": 109,
        "Job Title": "Facilities Manager",
        "Industry": "Human Resources and Operations",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 4,
        "Hard Skills": ["Maintenance Coordination", "Space Planning", "Safety Compliance"],
        "Soft Skills": ["Problem-Solving", "Leadership", "Communication"]
    },
    {
        "Job ID": 110,
        "Job Title": "Event Coordinator",
        "Industry": "Travel, Hospitality and Tourism",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 2,
        "Hard Skills": ["Event Planning", "Vendor Management", "Budgeting"],
        "Soft Skills": ["Organization", "Creativity", "Interpersonal Skills"]
    },
    {
        "Job ID": 111,
        "Job Title": "Civil Engineer",
        "Industry": "Construction and Engineering",
        "Required Education": "Bachelor",
        "Required Degree Field": "Engineering",
        "Years of Experience": 3,
        "Hard Skills": ["Structural Design", "AutoCAD", "Project Management"],
        "Soft Skills": ["Problem-Solving", "Teamwork", "Attention to Detail"]
    },
    {
        "Job ID": 112,
        "Job Title": "Electrical Engineer",
        "Industry": "Construction and Engineering",
        "Required Education": "Bachelor",
        "Required Degree Field": "Engineering",
        "Years of Experience": 2,
        "Hard Skills": ["Circuit Design", "Power Systems", "MATLAB"],
        "Soft Skills": ["Analytical Thinking", "Collaboration", "Creativity"]
    },
    {
        "Job ID": 113,
        "Job Title": "Mechanical Engineer",
        "Industry": "Construction and Engineering",
        "Required Education": "Bachelor",
        "Required Degree Field": "Engineering",
        "Years of Experience": 3,
        "Hard Skills": ["CAD Modeling", "Thermodynamics", "Prototyping"],
        "Soft Skills": ["Problem-Solving", "Innovation", "Teamwork"]
    },
    {
        "Job ID": 114,
        "Job Title": "Structural Engineer",
        "Industry": "Construction and Engineering",
        "Required Education": "Master",
        "Required Degree Field": "Engineering",
        "Years of Experience": 4,
        "Hard Skills": ["Finite Element Analysis", "Seismic Design", "Steel and Concrete Design"],
        "Soft Skills": ["Attention to Detail", "Critical Thinking", "Collaboration"]
    },
    {
        "Job ID": 115,
        "Job Title": "Architect",
        "Industry": "Construction and Engineering",
        "Required Education": "Master",
        "Required Degree Field": "Design",
        "Years of Experience": 5,
        "Hard Skills": ["Architectural Design", "Revit", "Building Codes"],
        "Soft Skills": ["Creativity", "Visualization", "Communication"]
    },
    {
        "Job ID": 116,
        "Job Title": "Construction Project Manager",
        "Industry": "Construction and Engineering",
        "Required Education": "Bachelor",
        "Required Degree Field": "Engineering",
        "Years of Experience": 5,
        "Hard Skills": ["Project Scheduling", "Cost Estimation", "Contract Management"],
        "Soft Skills": ["Leadership", "Negotiation", "Problem-Solving"]
    },
    {
        "Job ID": 117,
        "Job Title": "Urban Planner",
        "Industry": "Construction and Engineering",
        "Required Education": "Master",
        "Required Degree Field": "Design",
        "Years of Experience": 3,
        "Hard Skills": ["GIS Software", "Zoning Regulations", "Community Engagement"],
        "Soft Skills": ["Analytical Thinking", "Communication", "Collaboration"]
    },
    {
        "Job ID": 118,
        "Job Title": "Carpenter",
        "Industry": "Construction and Engineering",
        "Required Education": "None",
        "Required Degree Field": "None",
        "Years of Experience": 2,
        "Hard Skills": ["Woodworking", "Blueprint Reading", "Framing"],
        "Soft Skills": ["Attention to Detail", "Physical Stamina", "Teamwork"]
    },
    {
        "Job ID": 119,
        "Job Title": "Electrician",
        "Industry": "Construction and Engineering",
        "Required Education": "None",
        "Required Degree Field": "None",
        "Years of Experience": 3,
        "Hard Skills": ["Electrical Wiring", "Troubleshooting", "National Electrical Code"],
        "Soft Skills": ["Problem-Solving", "Safety Awareness", "Manual Dexterity"]
    },
    {
        "Job ID": 120,
        "Job Title": "Plumber",
        "Industry": "Construction and Engineering",
        "Required Education": "None",
        "Required Degree Field": "None",
        "Years of Experience": 2,
        "Hard Skills": ["Pipe Installation", "Leak Repair", "Plumbing Codes"],
        "Soft Skills": ["Problem-Solving", "Physical Stamina", "Customer Service"]
    },
    {
        "Job ID": 121,
        "Job Title": "Welder",
        "Industry": "Construction and Engineering",
        "Required Education": "None",
        "Required Degree Field": "None",
        "Years of Experience": 1,
        "Hard Skills": ["Arc Welding", "Metal Fabrication", "Blueprint Reading"],
        "Soft Skills": ["Attention to Detail", "Physical Stamina", "Teamwork"]
    },
    {
        "Job ID": 122,
        "Job Title": "Heavy Equipment Operator",
        "Industry": "Construction and Engineering",
        "Required Education": "None",
        "Required Degree Field": "None",
        "Years of Experience": 2,
        "Hard Skills": ["Equipment Maintenance", "Safety Protocols", "Site Preparation"],
        "Soft Skills": ["Coordination", "Attention to Detail", "Reliability"]
    },
    {
        "Job ID": 123,
        "Job Title": "Manufacturing Engineer",
        "Industry": "Construction and Engineering",
        "Required Education": "Bachelor",
        "Required Degree Field": "Engineering",
        "Years of Experience": 3,
        "Hard Skills": ["Process Improvement", "Lean Manufacturing", "CAD/CAM"],
        "Soft Skills": ["Problem-Solving", "Innovation", "Teamwork"]
    },
    {
        "Job ID": 124,
        "Job Title": "Quality Assurance Engineer",
        "Industry": "Construction and Engineering",
        "Required Education": "Bachelor",
        "Required Degree Field": "Engineering",
        "Years of Experience": 2,
        "Hard Skills": ["Testing Protocols", "Statistical Analysis", "Quality Standards"],
        "Soft Skills": ["Attention to Detail", "Analytical Thinking", "Communication"]
    },
    {
        "Job ID": 125,
        "Job Title": "Factory Supervisor",
        "Industry": "Construction and Engineering",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 4,
        "Hard Skills": ["Production Scheduling", "Inventory Management", "Safety Compliance"],
        "Soft Skills": ["Leadership", "Decision-Making", "Conflict Resolution"]
    },
    {
        "Job ID": 126,
        "Job Title": "Environmental Scientist",
        "Industry": "Environmental and Sustainability",
        "Required Education": "Bachelor",
        "Required Degree Field": "Science",
        "Years of Experience": 3,
        "Hard Skills": ["Environmental Impact Assessment", "GIS", "Data Analysis"],
        "Soft Skills": ["Critical Thinking", "Communication", "Attention to Detail"]
    },
    {
        "Job ID": 127,
        "Job Title": "Sustainability Specialist",
        "Industry": "Environmental and Sustainability",
        "Required Education": "Master",
        "Required Degree Field": "Science",
        "Years of Experience": 4,
        "Hard Skills": ["Sustainability Reporting", "Life Cycle Assessment", "Carbon Footprinting"],
        "Soft Skills": ["Strategic Planning", "Stakeholder Engagement", "Problem-Solving"]
    },
    {
        "Job ID": 128,
        "Job Title": "Renewable Energy Engineer",
        "Industry": "Environmental and Sustainability",
        "Required Education": "Bachelor",
        "Required Degree Field": "Engineering",
        "Years of Experience": 2,
        "Hard Skills": ["Solar Energy Systems", "Wind Turbine Design", "Energy Modeling"],
        "Soft Skills": ["Innovation", "Teamwork", "Project Management"]
    },
    {
        "Job ID": 129,
        "Job Title": "Marine Biologist",
        "Industry": "Science and Research",
        "Required Education": "Master",
        "Required Degree Field": "Science",
        "Years of Experience": 3,
        "Hard Skills": ["Marine Ecosystems Analysis", "Field Sampling", "Statistical Modeling"],
        "Soft Skills": ["Patience", "Observational Skills", "Collaboration"]
    },
    {
        "Job ID": 130,
        "Job Title": "Climate Change Analyst",
        "Industry": "Environmental and Sustainability",
        "Required Education": "Master",
        "Required Degree Field": "Science",
        "Years of Experience": 4,
        "Hard Skills": ["Climate Modeling", "Policy Analysis", "Data Visualization"],
        "Soft Skills": ["Analytical Thinking", "Communication", "Adaptability"]
    },
    {
        "Job ID": 131,
        "Job Title": "Water Resource Specialist",
        "Industry": "Environmental and Sustainability",
        "Required Education": "Bachelor",
        "Required Degree Field": "Engineering",
        "Years of Experience": 3,
        "Hard Skills": ["Hydrological Modeling", "Water Quality Analysis", "GIS"],
        "Soft Skills": ["Problem-Solving", "Teamwork", "Attention to Detail"]
    },
    {
        "Job ID": 132,
        "Job Title": "Ecologist",
        "Industry": "Science and Research",
        "Required Education": "Bachelor",
        "Required Degree Field": "Science",
        "Years of Experience": 2,
        "Hard Skills": ["Ecosystem Assessment", "Field Research", "Species Identification"],
        "Soft Skills": ["Patience", "Observation", "Communication"]
    },
    {
        "Job ID": 133,
        "Job Title": "Agricultural Scientist",
        "Industry": "Science and Research",
        "Required Education": "Master",
        "Required Degree Field": "Science",
        "Years of Experience": 3,
        "Hard Skills": ["Crop Science", "Soil Analysis", "Agricultural Biotechnology"],
        "Soft Skills": ["Research Skills", "Problem-Solving", "Collaboration"]
    },
    {
        "Job ID": 134,
        "Job Title": "Wildlife Conservationist",
        "Industry": "Environmental and Sustainability",
        "Required Education": "Bachelor",
        "Required Degree Field": "Science",
        "Years of Experience": 2,
        "Hard Skills": ["Wildlife Tracking", "Habitat Restoration", "Conservation Planning"],
        "Soft Skills": ["Passion for Wildlife", "Patience", "Teamwork"]
    },
    {
        "Job ID": 135,
        "Job Title": "Environmental Policy Analyst",
        "Industry": "Law and Government",
        "Required Education": "Master",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 4,
        "Hard Skills": ["Policy Analysis", "Regulatory Compliance", "Stakeholder Consultation"],
        "Soft Skills": ["Critical Thinking", "Communication", "Negotiation"]
    },
    {
        "Job ID": 136,
        "Job Title": "Hotel Manager",
        "Industry": "Travel, Hospitality and Tourism",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 5,
        "Hard Skills": ["Hospitality Management", "Revenue Management", "Customer Service"],
        "Soft Skills": ["Leadership", "Communication", "Problem-Solving"]
    },
    {
        "Job ID": 137,
        "Job Title": "Travel Agent",
        "Industry": "Travel, Hospitality and Tourism",
        "Required Education": "None",
        "Required Degree Field": "Business",
        "Years of Experience": 1,
        "Hard Skills": ["Travel Booking Systems", "Itinerary Planning", "Customer Service"],
        "Soft Skills": ["Communication", "Sales Skills", "Attention to Detail"]
    },
    {
        "Job ID": 138,
        "Job Title": "Tour Guide",
        "Industry": "Travel, Hospitality and Tourism",
        "Required Education": "None",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 0,
        "Hard Skills": ["Local Knowledge", "Multilingual Skills", "First Aid"],
        "Soft Skills": ["Public Speaking", "Enthusiasm", "Interpersonal Skills"]
    },
    {
        "Job ID": 139,
        "Job Title": "Event Planner",
        "Industry": "Travel, Hospitality and Tourism",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 3,
        "Hard Skills": ["Event Coordination", "Vendor Management", "Budgeting"],
        "Soft Skills": ["Organization", "Creativity", "Time Management"]
    },
    {
        "Job ID": 140,
        "Job Title": "Flight Attendant",
        "Industry": "Travel, Hospitality and Tourism",
        "Required Education": "None",
        "Required Degree Field": "Business",
        "Years of Experience": 1,
        "Hard Skills": ["Safety Procedures", "Customer Service", "First Aid"],
        "Soft Skills": ["Communication", "Patience", "Teamwork"]
    },
    {
        "Job ID": 141,
        "Job Title": "Pilot",
        "Industry": "Transportation and Logistics",
        "Required Education": "Bachelor",
        "Required Degree Field": "Engineering",
        "Years of Experience": 5,
        "Hard Skills": ["Flight Operations", "Navigation Systems", "Aircraft Systems"],
        "Soft Skills": ["Decision-Making", "Leadership", "Calm Under Pressure"]
    },
    {
        "Job ID": 142,
        "Job Title": "Cruise Director",
        "Industry": "Travel, Hospitality and Tourism",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 4,
        "Hard Skills": ["Entertainment Coordination", "Guest Relations", "Event Management"],
        "Soft Skills": ["Leadership", "Communication", "Creativity"]
    },
    {
        "Job ID": 143,
        "Job Title": "Resort Manager",
        "Industry": "Travel, Hospitality and Tourism",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 5,
        "Hard Skills": ["Resort Operations", "Hospitality Management", "Revenue Optimization"],
        "Soft Skills": ["Leadership", "Customer Service", "Problem-Solving"]
    },
    {
        "Job ID": 144,
        "Job Title": "Concierge",
        "Industry": "Travel, Hospitality and Tourism",
        "Required Education": "None",
        "Required Degree Field": "Business",
        "Years of Experience": 2,
        "Hard Skills": ["Guest Services", "Local Knowledge", "Reservation Systems"],
        "Soft Skills": ["Communication", "Problem-Solving", "Attention to Detail"]
    },
    {
        "Job ID": 145,
        "Job Title": "Restaurant Manager",
        "Industry": "Travel, Hospitality and Tourism",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 3,
        "Hard Skills": ["Restaurant Operations", "Inventory Management", "Staff Scheduling"],
        "Soft Skills": ["Leadership", "Customer Service", "Teamwork"]
    },
    {
        "Job ID": 146,
        "Job Title": "Chemist",
        "Industry": "Science and Research",
        "Required Education": "Bachelor",
        "Required Degree Field": "Science",
        "Years of Experience": 2,
        "Hard Skills": ["Chemical Analysis", "Laboratory Techniques", "Data Interpretation"],
        "Soft Skills": ["Attention to Detail", "Analytical Thinking", "Problem-Solving"]
    },
    {
        "Job ID": 147,
        "Job Title": "Physicist",
        "Industry": "Science and Research",
        "Required Education": "Master",
        "Required Degree Field": "Science",
        "Years of Experience": 3,
        "Hard Skills": ["Theoretical Modeling", "Data Analysis", "Computational Physics"],
        "Soft Skills": ["Critical Thinking", "Curiosity", "Patience"]
    },
    {
        "Job ID": 148,
        "Job Title": "Biologist",
        "Industry": "Science and Research",
        "Required Education": "Bachelor",
        "Required Degree Field": "Science",
        "Years of Experience": 2,
        "Hard Skills": ["Microscopy", "DNA Analysis", "Statistical Methods"],
        "Soft Skills": ["Observation", "Research Skills", "Collaboration"]
    },
    {
        "Job ID": 149,
        "Job Title": "Astronomer",
        "Industry": "Science and Research",
        "Required Education": "Master",
        "Required Degree Field": "Science",
        "Years of Experience": 4,
        "Hard Skills": ["Telescope Operation", "Astrophysical Modeling", "Data Analysis"],
        "Soft Skills": ["Patience", "Analytical Thinking", "Problem-Solving"]
    },
    {
        "Job ID": 150,
        "Job Title": "Mathematician",
        "Industry": "Science and Research",
        "Required Education": "Master",
        "Required Degree Field": "Science",
        "Years of Experience": 3,
        "Hard Skills": ["Mathematical Modeling", "Statistical Analysis", "Algorithm Development"],
        "Soft Skills": ["Logical Thinking", "Creativity", "Attention to Detail"]
    },
    {
        "Job ID": 151,
        "Job Title": "Statistician",
        "Industry": "Science and Research",
        "Required Education": "Master",
        "Required Degree Field": "Science",
        "Years of Experience": 3,
        "Hard Skills": ["Statistical Software", "Data Analysis", "Experimental Design"],
        "Soft Skills": ["Analytical Thinking", "Problem-Solving", "Communication"]
    },
    {
        "Job ID": 152,
        "Job Title": "Geneticist",
        "Industry": "Science and Research",
        "Required Education": "Master",
        "Required Degree Field": "Science",
        "Years of Experience": 4,
        "Hard Skills": ["Genomic Sequencing", "PCR", "Bioinformatics"],
        "Soft Skills": ["Research Skills", "Attention to Detail", "Collaboration"]
    },
    {
        "Job ID": 153,
        "Job Title": "Biomedical Engineer",
        "Industry": "Healthcare",
        "Required Education": "Bachelor",
        "Required Degree Field": "Engineering",
        "Years of Experience": 3,
        "Hard Skills": ["Medical Device Design", "Biomechanics", "Signal Processing"],
        "Soft Skills": ["Problem-Solving", "Teamwork", "Innovation"]
    },
    {
        "Job ID": 154,
        "Job Title": "Materials Scientist",
        "Industry": "Science and Research",
        "Required Education": "Master",
        "Required Degree Field": "Science",
        "Years of Experience": 4,
        "Hard Skills": ["Materials Characterization", "Polymer Science", "Nanotechnology"],
        "Soft Skills": ["Research Skills", "Analytical Thinking", "Creativity"]
    },
    {
        "Job ID": 155,
        "Job Title": "Forensic Scientist",
        "Industry": "Science and Research",
        "Required Education": "Bachelor",
        "Required Degree Field": "Science",
        "Years of Experience": 3,
        "Hard Skills": ["DNA Analysis", "Toxicology", "Crime Scene Investigation"],
        "Soft Skills": ["Attention to Detail", "Critical Thinking", "Communication"]
    },
    {
        "Job ID": 156,
        "Job Title": "Retail Store Manager",
        "Industry": "Retail and Sales",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 4,
        "Hard Skills": ["Inventory Management", "Sales Analysis", "Staff Training"],
        "Soft Skills": ["Leadership", "Customer Service", "Problem-Solving"]
    },
    {
        "Job ID": 157,
        "Job Title": "Cashier",
        "Industry": "Retail and Sales",
        "Required Education": "None",
        "Required Degree Field": "Business",
        "Years of Experience": 0,
        "Hard Skills": ["Point of Sale Systems", "Cash Handling", "Product Knowledge"],
        "Soft Skills": ["Customer Service", "Attention to Detail", "Friendliness"]
    },
    {
        "Job ID": 158,
        "Job Title": "Visual Merchandiser",
        "Industry": "Retail and Sales",
        "Required Education": "Bachelor",
        "Required Degree Field": "Design",
        "Years of Experience": 2,
        "Hard Skills": ["Store Layout Design", "Display Creation", "Trend Analysis"],
        "Soft Skills": ["Creativity", "Attention to Detail", "Communication"]
    },
    {
        "Job ID": 159,
        "Job Title": "Customer Service Representative",
        "Industry": "Retail and Sales",
        "Required Education": "None",
        "Required Degree Field": "Business",
        "Years of Experience": 1,
        "Hard Skills": ["CRM Software", "Order Processing", "Product Knowledge"],
        "Soft Skills": ["Communication", "Empathy", "Problem-Solving"]
    },
    {
        "Job ID": 160,
        "Job Title": "E-commerce Manager",
        "Industry": "Retail and Sales",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 3,
        "Hard Skills": ["Online Marketing", "SEO", "Web Analytics"],
        "Soft Skills": ["Strategic Thinking", "Communication", "Adaptability"]
    },
    {
        "Job ID": 161,
        "Job Title": "Sales Associate",
        "Industry": "Retail and Sales",
        "Required Education": "None",
        "Required Degree Field": "Business",
        "Years of Experience": 1,
        "Hard Skills": ["Product Knowledge", "Sales Techniques", "Customer Relationship Management"],
        "Soft Skills": ["Communication", "Persuasion", "Friendliness"]
    },
    {
        "Job ID": 162,
        "Job Title": "Account Manager",
        "Industry": "Retail and Sales",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 3,
        "Hard Skills": ["Client Relationship Management", "Sales Forecasting", "Contract Negotiation"],
        "Soft Skills": ["Communication", "Negotiation", "Problem-Solving"]
    },
    {
        "Job ID": 163,
        "Job Title": "Key Account Executive",
        "Industry": "Retail and Sales",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 4,
        "Hard Skills": ["Account Strategy", "Revenue Growth", "Market Analysis"],
        "Soft Skills": ["Relationship Building", "Negotiation", "Strategic Thinking"]
    },
    {
        "Job ID": 164,
        "Job Title": "Retail Buyer",
        "Industry": "Retail and Sales",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 3,
        "Hard Skills": ["Vendor Negotiation", "Inventory Planning", "Trend Forecasting"],
        "Soft Skills": ["Analytical Thinking", "Decision-Making", "Communication"]
    },
    {
        "Job ID": 165,
        "Job Title": "Merchandise Planner",
        "Industry": "Retail and Sales",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 3,
        "Hard Skills": ["Demand Forecasting", "Inventory Optimization", "Sales Analysis"],
        "Soft Skills": ["Analytical Skills", "Attention to Detail", "Problem-Solving"]
    },
    {
        "Job ID": 166,
        "Job Title": "Public Policy Analyst",
        "Industry": "Law and Government",
        "Required Education": "Master",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 4,
        "Hard Skills": ["Policy Research", "Legislative Analysis", "Stakeholder Engagement"],
        "Soft Skills": ["Critical Thinking", "Communication", "Negotiation"]
    },
    {
        "Job ID": 167,
        "Job Title": "Police Officer",
        "Industry": "Law and Government",
        "Required Education": "None",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 2,
        "Hard Skills": ["Law Enforcement", "Criminal Investigation", "First Aid"],
        "Soft Skills": ["Communication", "Decision-Making", "Physical Fitness"]
    },
    {
        "Job ID": 168,
        "Job Title": "Firefighter",
        "Industry": "Law and Government",
        "Required Education": "None",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 2,
        "Hard Skills": ["Fire Suppression", "Emergency Medical Response", "Hazardous Materials Handling"],
        "Soft Skills": ["Bravery", "Teamwork", "Physical Fitness"]
    },
    {
        "Job ID": 169,
        "Job Title": "Diplomat",
        "Industry": "Law and Government",
        "Required Education": "Master",
        "Required Degree Field": "Arts and Social Sciences"
    },
    {
        "Job ID": 170,
        "Job Title": "Immigration Officer",
        "Industry": "Law and Government",
        "Required Education": "Bachelor",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 3,
        "Hard Skills": ["Legal Documentation", "Immigration Laws", "Case Management"],
        "Soft Skills": ["Communication", "Empathy", "Attention to Detail"]
    },
    {
        "Job ID": 171,
        "Job Title": "Judge",
        "Industry": "Law and Government",
        "Required Education": "Master",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 5,
        "Hard Skills": ["Legal Research", "Judicial Procedures", "Courtroom Management"],
        "Soft Skills": ["Decision-Making", "Integrity", "Leadership"]
    },
    {
        "Job ID": 172,
        "Job Title": "Paralegal",
        "Industry": "Law and Government",
        "Required Education": "Bachelor",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 2,
        "Hard Skills": ["Legal Writing", "Document Drafting", "Case Preparation"],
        "Soft Skills": ["Organization", "Time Management", "Teamwork"]
    },
    {
        "Job ID": 173,
        "Job Title": "Legal Assistant",
        "Industry": "Law and Government",
        "Required Education": "Bachelor",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 1,
        "Hard Skills": ["Legal Research", "Document Management", "Scheduling"],
        "Soft Skills": ["Communication", "Attention to Detail", "Adaptability"]
    },
    {
        "Job ID": 174,
        "Job Title": "Public Defender",
        "Industry": "Law and Government",
        "Required Education": "Master",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 4,
        "Hard Skills": ["Criminal Law", "Trial Advocacy", "Client Representation"],
        "Soft Skills": ["Empathy", "Public Speaking", "Resilience"]
    },
    {
        "Job ID": 175,
        "Job Title": "Urban Policy Planner",
        "Industry": "Law and Government",
        "Required Education": "Master",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 3,
        "Hard Skills": ["Policy Analysis", "Urban Development", "GIS Mapping"],
        "Soft Skills": ["Critical Thinking", "Collaboration", "Negotiation"]
    },
    {
        "Job ID": 176,
        "Job Title": "Supply Chain Manager",
        "Industry": "Transportation and Logistics",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 4,
        "Hard Skills": ["Inventory Management", "Logistics Software", "Supplier Negotiation"],
        "Soft Skills": ["Leadership", "Problem-Solving", "Strategic Thinking"]
    },
    {
        "Job ID": 177,
        "Job Title": "Logistics Coordinator",
        "Industry": "Transportation and Logistics",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 2,
        "Hard Skills": ["Shipping Coordination", "Route Planning", "ERP Systems"],
        "Soft Skills": ["Organization", "Communication", "Multitasking"]
    },
    {
        "Job ID": 178,
        "Job Title": "Transportation Planner",
        "Industry": "Transportation and Logistics",
        "Required Education": "Bachelor",
        "Required Degree Field": "Engineering",
        "Years of Experience": 3,
        "Hard Skills": ["Traffic Analysis", "GIS Software", "Transport Modeling"],
        "Soft Skills": ["Analytical Thinking", "Teamwork", "Project Management"]
    },
    {
        "Job ID": 179,
        "Job Title": "Warehouse Manager",
        "Industry": "Transportation and Logistics",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 3,
        "Hard Skills": ["Inventory Control", "Warehouse Software", "Safety Compliance"],
        "Soft Skills": ["Leadership", "Time Management", "Problem-Solving"]
    },
    {
        "Job ID": 180,
        "Job Title": "Freight Broker",
        "Industry": "Transportation and Logistics",
        "Required Education": "None",
        "Required Degree Field": "Business",
        "Years of Experience": 1,
        "Hard Skills": ["Freight Negotiation", "Load Board Management", "Shipping Regulations"],
        "Soft Skills": ["Communication", "Networking", "Persuasion"]
    },
    {
        "Job ID": 181,
        "Job Title": "Truck Driver",
        "Industry": "Transportation and Logistics",
        "Required Education": "None",
        "Required Degree Field": "None",
        "Years of Experience": 0,
        "Hard Skills": ["Vehicle Operation", "Route Navigation", "Cargo Handling"],
        "Soft Skills": ["Time Management", "Patience", "Safety Awareness"]
    },
    {
        "Job ID": 182,
        "Job Title": "Airline Operations Manager",
        "Industry": "Transportation and Logistics",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 4,
        "Hard Skills": ["Flight Scheduling", "Regulatory Compliance", "Operations Software"],
        "Soft Skills": ["Leadership", "Decision-Making", "Crisis Management"]
    },
    {
        "Job ID": 183,
        "Job Title": "Port Manager",
        "Industry": "Transportation and Logistics",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 5,
        "Hard Skills": ["Port Operations", "Maritime Regulations", "Cargo Handling"],
        "Soft Skills": ["Leadership", "Strategic Planning", "Stakeholder Management"]
    },
    {
        "Job ID": 184,
        "Job Title": "Inventory Manager",
        "Industry": "Transportation and Logistics",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 3,
        "Hard Skills": ["Stock Control", "Demand Forecasting", "ERP Systems"],
        "Soft Skills": ["Attention to Detail", "Analytical Thinking", "Organization"]
    },
    {
        "Job ID": 185,
        "Job Title": "Procurement Officer",
        "Industry": "Transportation and Logistics",
        "Required Education": "Bachelor",
        "Required Degree Field": "Business",
        "Years of Experience": 2,
        "Hard Skills": ["Vendor Management", "Contract Negotiation", "Purchase Order Systems"],
        "Soft Skills": ["Negotiation", "Communication", "Cost Awareness"]
    },
    {
        "Job ID": 186,
        "Job Title": "AI Product Manager",
        "Industry": "Technology",
        "Required Education": "Master",
        "Required Degree Field": "Computing",
        "Years of Experience": 4,
        "Hard Skills": ["Machine Learning", "Product Roadmapping", "Agile Development"],
        "Soft Skills": ["Leadership", "Strategic Thinking", "Stakeholder Management"]
    },
    {
        "Job ID": 187,
        "Job Title": "Prompt Engineer (for AI)",
        "Industry": "Technology",
        "Required Education": "Bachelor",
        "Required Degree Field": "Computing",
        "Years of Experience": 2,
        "Hard Skills": ["Natural Language Processing", "AI Model Fine-Tuning", "Python"],
        "Soft Skills": ["Creativity", "Problem-Solving", "Attention to Detail"]
    },
    {
        "Job ID": 188,
        "Job Title": "Data Privacy Officer",
        "Industry": "Technology",
        "Required Education": "Master",
        "Required Degree Field": "Information Systems",
        "Years of Experience": 5,
        "Hard Skills": ["GDPR Compliance", "Risk Assessment", "Data Encryption"],
        "Soft Skills": ["Ethical Judgment", "Communication", "Policy Development"]
    },
    {
        "Job ID": 189,
        "Job Title": "Robotics Engineer",
        "Industry": "Technology",
        "Required Education": "Master",
        "Required Degree Field": "Engineering",
        "Years of Experience": 3,
        "Hard Skills": ["Robotics Programming", "CAD Design", "Control Systems"],
        "Soft Skills": ["Innovation", "Collaboration", "Problem-Solving"]
    },
    {
        "Job ID": 190,
        "Job Title": "VR/AR Developer",
        "Industry": "Technology",
        "Required Education": "Bachelor",
        "Required Degree Field": "Computing",
        "Years of Experience": 2,
        "Hard Skills": ["Unity 3D", "3D Modeling", "C# Programming"],
        "Soft Skills": ["Creativity", "Teamwork", "User-Centric Thinking"]
    },
    {
        "Job ID": 191,
        "Job Title": "Quantum Computing Researcher",
        "Industry": "Science and Research",
        "Required Education": "Master",
        "Required Degree Field": "Science",
        "Years of Experience": 4,
        "Hard Skills": ["Quantum Algorithms", "Linear Algebra", "Qiskit"],
        "Soft Skills": ["Analytical Thinking", "Curiosity", "Persistence"]
    },
    {
        "Job ID": 192,
        "Job Title": "Drone Operator",
        "Industry": "Technology",
        "Required Education": "None",
        "Required Degree Field": "None",
        "Years of Experience": 1,
        "Hard Skills": ["Drone Piloting", "Aerial Photography", "FAA Regulations"],
        "Soft Skills": ["Precision", "Safety Awareness", "Adaptability"]
    },
    {
        "Job ID": 193,
        "Job Title": "Space Tourism Guide",
        "Industry": "Travel, Hospitality and Tourism",
        "Required Education": "Bachelor",
        "Required Degree Field": "Arts and Social Sciences",
        "Years of Experience": 2,
        "Hard Skills": ["Customer Service", "Safety Protocols", "Tour Coordination"],
        "Soft Skills": ["Communication", "Enthusiasm", "Public Speaking"]
    },
    {
        "Job ID": 194,
        "Job Title": "Smart Home Technician",
        "Industry": "Technology",
        "Required Education": "None",
        "Required Degree Field": "Engineering",
        "Years of Experience": 1,
        "Hard Skills": ["IoT Device Installation", "Network Configuration", "Troubleshooting"],
        "Soft Skills": ["Customer Service", "Problem-Solving", "Technical Aptitude"]
    },
    {
        "Job ID": 195,
        "Job Title": "Climate Tech Entrepreneur",
        "Industry": "Environmental and Sustainability",
        "Required Education": "Master",
        "Required Degree Field": "Business",
        "Years of Experience": 5,
        "Hard Skills": ["Sustainable Technologies", "Business Development", "Fundraising"],
        "Soft Skills": ["Innovation", "Risk-Taking", "Leadership"]
    },
    {
        "Job ID": 196,
        "Job Title": "Blockchain Product Manager",
        "Industry": "Technology",
        "Required Education": "Master",
        "Required Degree Field": "Computing",
        "Years of Experience": 4,
        "Hard Skills": ["Blockchain Protocols", "Smart Contracts", "Decentralized Applications"],
        "Soft Skills": ["Strategic Vision", "Collaboration", "Market Analysis"]
    },
    {
        "Job ID": 197,
        "Job Title": "Autonomous Vehicle Engineer",
        "Industry": "Technology",
        "Required Education": "Master",
        "Required Degree Field": "Engineering",
        "Years of Experience": 3,
        "Hard Skills": ["Machine Learning", "Sensor Fusion", "ROS (Robot Operating System)"],
        "Soft Skills": ["Problem-Solving", "Teamwork", "Innovation"]
    },
    {
        "Job ID": 198,
        "Job Title": "IoT (Internet of Things) Engineer",
        "Industry": "Technology",
        "Required Education": "Bachelor",
        "Required Degree Field": "Engineering",
        "Years of Experience": 2,
        "Hard Skills": ["Embedded Systems", "Wireless Protocols", "Cloud Integration"],
        "Soft Skills": ["Analytical Thinking", "Collaboration", "Creativity"]
    },
    {
        "Job ID": 199,
        "Job Title": "Digital Twin Engineer",
        "Industry": "Technology",
        "Required Education": "Master",
        "Required Degree Field": "Engineering",
        "Years of Experience": 3,
        "Hard Skills": ["3D Modeling", "Simulation Software", "Data Analytics"],
        "Soft Skills": ["Attention to Detail", "Problem-Solving", "Interdisciplinary Collaboration"]
    },
    {
        "Job ID": 200,
        "Job Title": "Ethical Hacker",
        "Industry": "Technology",
        "Required Education": "Bachelor",
        "Required Degree Field": "Computing",
        "Years of Experience": 3,
        "Hard Skills": ["Penetration Testing", "Network Security", "Vulnerability Assessment"],
        "Soft Skills": ["Curiosity", "Ethical Judgment", "Problem-Solving"]
    }
]

In [491]:
df = pd.read_csv('job_data.csv')

df

Unnamed: 0,Job ID,Job Title,Industry,Required Education,Required Degree Field,Years of Experience,Hard Skills,Soft Skills
0,1,Software Engineer,Technology,Bachelor,Computing,2,"[""Python"", ""Java"", ""C++"", ""Git"", ""Version Cont...","['Problem-Solving', 'Teamwork']"
1,2,Frontend Developer,Technology,Bachelor,Computing,1,"[""HTML"", ""CSS"", ""JavaScript"", ""React"", ""TypeSc...","['Creativity', 'Attention to Detail']"
2,3,Backend Developer,Technology,Bachelor,Computing,2,"[""Java"", ""Python"", ""Node.js"", ""Spring Boot"", ""...","['Logical Thinking', 'Collaboration']"
3,4,Full Stack Developer,Technology,Bachelor,Computing,3,"[""JavaScript"", ""React"", ""Node.js"", ""MongoDB"", ...","['Adaptability', 'Communication']"
4,5,DevOps Engineer,Technology,Bachelor,Computing,3,"[""Docker"", ""Kubernetes"", ""CI/CD"", ""AWS"", ""Terr...","['Problem-Solving', 'Teamwork']"
...,...,...,...,...,...,...,...,...
195,196,Blockchain Product Manager,Technology,Master,Computing,4,"['Blockchain Protocols Ethereum, Solana', 'Sma...","['Strategic Vision', 'Collaboration', 'Market ..."
196,197,Autonomous Vehicle Engineer,Technology,Master,Engineering,3,"['Machine Learning', 'Sensor Fusion LIDAR, RAD...","['Problem-Solving', 'Teamwork', 'Innovation']"
197,198,IoT (Internet of Things) Engineer,Technology,Bachelor,Engineering,2,"['Embedded Systems Programming', 'Wireless Pro...","['Analytical Thinking', 'Collaboration', 'Crea..."
198,199,Digital Twin Engineer,Technology,Master,Engineering,3,"['3D Modeling Software SolidWorks, Revit', 'Si...","['Attention to Detail', 'Problem-Solving', 'In..."


### Create a resume parser that converts student resume into a dataframe

In [481]:
# Using pdfminer.six to extract text from resume
text = extract_text("Dione Tay's Resume.pdf")
print(text)



DIONE TAY JIA WEN 

Blk 7 Bedok Reservoir View #09-03 

Telephone: (+65) 9755 3295 

Email: Dionetay00@gmail.com 

LinkedIn: https://www.linkedin.com/in/dione-tay-320b371ab 

EDUCATION 

Nanyang Technological University 

Aug 2021 – April 2024 

Bachelor of Business Specialising in Marketing with Honours (DISTINCTION) 

Groupe KEDGE Business School (Semester Exchange in France)                     Aug 2023 – Dec 2023 

Singapore Polytechnic 

Apr 2018 – April 2021 

Diploma in Business Administration (Marketing Management) 

ACADEMIC PROJECT  

Singapore Polytechnic, Business Administration (Marketing) 
ScanLiving Brand Strategy Competition 
•  Gained insights into marketing visualization and planning strategy 
•  Directed a team to conduct market research and analyse data 

Mar 2019 – Oct 2019 

P&G Philippines Healthcare – Branding Strategy and Marketing Plan 
•  Collaborated with students from De La Salle University students in Manila 
•  Built good rapport with counterparts and acq

## Using Spacy and Regex to capture student's information from Resume

In [482]:
class StudentInfoExtractor:
    def __init__(self, text):
        self.nlp = spacy.load("en_core_web_lg")
        self.skill_extractor = SkillExtractor(self.nlp, SKILL_DB, PhraseMatcher)
        self.soft_skills_list = self.load_soft_skills_list()
        self.text = text
    
    # Rule-based Heuristics: Assumes that the name is in the first line, contains at least 2 words, and it starts with a captial letter
    def extract_name_from_top_line(self, text):
        lines = text.strip().splitlines()
        if not lines:
            return None

        first_line = lines[0].strip()  # remove leading/trailing spaces

        # Check if it's likely to be a name: 2+ words, all start with uppercase
        if len(first_line.split()) >= 2 and all(w[0].isupper() for w in first_line.split() if w[0].isalpha()):
            return first_line.strip()  # just in case, double-strip

        return None
    
    # Regular Expression
    def extract_emails(self, text):
        pattern = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[A-Za-z]{2,}"
        matches = re.findall(pattern, text)
        return matches if matches else None
    
    # Regular Expressions
    def extract_contact_information(self, text):
        contact_number = None

        # Updated pattern to match international numbers with + sign and spacing
        pattern = r'(?:\+?\d{1,3}[-.\s]?)?(?:\d{3,4}[-.\s]?){2,3}\d{2,4}'

        match = re.search(pattern, text)
        if match:
            contact_number = match.group().strip()
        return contact_number
    
    def capture_education_level(self, text):
        text = text.lower()

        if "phd" in text or "doctor of philosophy" in text:
            return "PhD"
        elif "master" in text or "masters" in text or "master's" in text or "msc" in text:
            return "Master"
        elif "bachelor" in text or "bachelor's" in text or "bsc" in text:
            return "Bachelor"
        elif "polytechnic" in text or "poly" in text:
            return "Polytechnic"
        elif "diploma" in text:
            return "Diploma"
        elif "high school" in text or "junior college" in text or "jc" in text:
            return "High School"
        else:
            return None
        
    def capture_degree_field(self, text):
        pattern = r'(?:Bachelor|Master|PhD|Bachelors|Bachelor\'s|Master\'s|Diploma|Polytechnic|Higher Diploma|Advanced Diploma)[^.\n]*? in ([A-Za-z &\-\/]+)'

        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            degree_field = match.group(1).strip()
            return degree_field

        return None
    
    def capture_university_name(self, text):
        top_290_universities = [
        "Massachusetts Institute of Technology (MIT)",
        "Imperial College London",
        "University of Oxford",
        "Harvard University",
        "University of Cambridge",
        "Stanford University",
        "ETH Zurich – Swiss Federal Institute of Technology",
        "National University of Singapore (NUS)",
        "University College London (UCL)",
        "California Institute of Technology (Caltech)",
        "University of Pennsylvania",
        "University of California, Berkeley (UCB)",
        "The University of Melbourne",
        "Peking University",
        "Nanyang Technological University, Singapore (NTU Singapore)",
        "Cornell University",
        "The University of Hong Kong",
        "The University of Sydney",
        "The University of New South Wales (UNSW Sydney)",
        "Tsinghua University",
        "University of Chicago",
        "Princeton University",
        "Yale University",
        "Université PSL",
        "University of Toronto",
        "EPFL – École polytechnique fédérale de Lausanne",
        "The University of Edinburgh",
        "Technical University of Munich",
        "McGill University",
        "Australian National University (ANU)",
        "Seoul National University",
        "Johns Hopkins University",
        "The University of Tokyo",
        "Columbia University",
        "The University of Manchester",
        "The Chinese University of Hong Kong (CUHK)",
        "Monash University",
        "University of British Columbia",
        "Fudan University",
        "King's College London",
        "The University of Queensland",
        "University of California, Los Angeles (UCLA)",
        "New York University (NYU)",
        "University of Michigan-Ann Arbor",
        "Shanghai Jiao Tong University",
        "Institut Polytechnique de Paris",
        "The Hong Kong University of Science and Technology",
        "Zhejiang University",
        "Delft University of Technology",
        "Kyoto University",
        "Northwestern University",
        "The London School of Economics and Political Science (LSE)",
        "KAIST - Korea Advanced Institute of Science & Technology",
        "University of Bristol",
        "University of Amsterdam",
        "Yonsei University",
        "The Hong Kong Polytechnic University",
        "Carnegie Mellon University",
        "Ludwig-Maximilians-Universität München",
        "Universiti Malaya (UM)",
        "Duke University",
        "City University of Hong Kong",
        "KU Leuven",
        "Sorbonne University",
        "The University of Auckland",
        "University of Texas at Austin",
        "Korea University",
        "National Taiwan University (NTU)",
        "The University of Warwick",
        "University of Illinois at Urbana-Champaign",
        "Universidad de Buenos Aires (UBA)",
        "University of California, San Diego (UCSD)",
        "Université Paris-Saclay",
        "KTH Royal Institute of Technology",
        "Lund University",
        "University of Washington",
        "The University of Western Australia",
        "University of Glasgow",
        "Brown University",
        "University of Birmingham",
        "University of Southampton",
        "The University of Adelaide",
        "University of Leeds",
        "Universität Heidelberg",
        "Tokyo Institute of Technology (Tokyo Tech)",
        "Osaka University",
        "Trinity College Dublin, The University of Dublin",
        "University of Technology Sydney",
        "Durham University",
        "Pennsylvania State University",
        "Purdue University",
        "Universidade de São Paulo",
        "Pontificia Universidad Católica de Chile (UC)",
        "Lomonosov Moscow State University",
        "Universidad Nacional Autónoma de México (UNAM)",
        "University of Alberta",
        "Freie Universitaet Berlin",
        "Pohang University of Science And Technology (POSTECH)",
        "RWTH Aachen University",
        "University of Copenhagen",
        "King Fahd University of Petroleum & Minerals (KFUPM)",
        "Karlsruhe Institute of Technology (KIT)",
        "Uppsala University",
        "University of St Andrews",
        "The University of Sheffield",
        "Utrecht University",
        "Tohoku University",
        "Boston University",
        "University of Nottingham",
        "Technical University of Denmark",
        "University of Zurich",
        "Politecnico di Milano",
        "Aalto University",
        "Georgia Institute of Technology",
        "University of Waterloo",
        "University of Wisconsin-Madison",
        "University of Helsinki",
        "Indian Institute of Technology Bombay (IITB)",
        "University of Oslo",
        "Queen Mary University of London",
        "Western University",
        "Qatar University",
        "RMIT University",
        "Sungkyunkwan University (SKKU)",
        "University of Southern California",
        "Humboldt-Universität zu Berlin",
        "University College Dublin",
        "Stockholm University",
        "Newcastle University",
        "University of California, Davis",
        "University of Basel",
        "Sapienza University of Rome",
        "Alma Mater Studiorum - Università di Bologna",
        "Macquarie University",
        "University of Science and Technology of China",
        "Eindhoven University of Technology",
        "University of Vienna",
        "Universiti Kebangsaan Malaysia (UKM)",
        "Chalmers University of Technology",
        "Universidad de Chile",
        "Lancaster University",
        "Leiden University",
        "Rice University",
        "University of Bern",
        "University of Groningen",
        "University of Pittsburgh",
        "University of Reading",
        "University of Twente",
        "University of York",
        "Vrije Universiteit Amsterdam",
        "Wageningen University & Research",
        "Aarhus University",
        "Arizona State University",
        "Autonomous University of Barcelona",
        "Birkbeck, University of London",
        "Brandeis University",
        "Case Western Reserve University",
        "Chiba University",
        "Colorado State University",
        "Curtin University",
        "Dalhousie University",
        "Deakin University",
        "Drexel University",
        "Ecole des Ponts ParisTech",
        "Ecole Normale Supérieure de Lyon",
        "Ecole Polytechnique",
        "Emory University",
        "Florida State University",
        "George Washington University",
        "Ghent University",
        "Griffith University",
        "Hanyang University",
        "Heriot-Watt University",
        "Hokkaido University",
        "Indian Institute of Science",
        "Indian Institute of Technology Delhi (IITD)",
        "Indian Institute of Technology Kanpur (IITK)",
        "Indian Institute of Technology Kharagpur (IITKGP)",
        "Indian Institute of Technology Madras (IITM)",
        "Indian Institute of Technology Roorkee (IITR)",
        "Indiana University Bloomington",
        "Iowa State University",
        "James Cook University",
        "Jilin University",
        "Kobe University",
        "Kyushu University",
        "La Trobe University",
        "Laval University",
        "Lomonosov Moscow State University",
        "Louisiana State University",
        "Lund University",
        "Mahidol University",
        "Michigan State University",
        "Nagoya University",
        "National Autonomous University of Mexico (UNAM)",
        "National Cheng Kung University",
        "National Chiao Tung University",
        "National Tsing Hua University",
        "Newcastle University",
        "North Carolina State University",
        "Norwegian University of Science and Technology",
        "Ohio State University",
        "Oregon State University",
        "Osaka University",
        "Peking University",
        "Pennsylvania State University",
        "Politecnico di Torino",
        "Pontificia Universidad Católica de Chile",
        "Purdue University",
        "Queen's University",
        "Rensselaer Polytechnic Institute",
        "RMIT University",
        "Rutgers University–New Brunswick",
        "San Diego State University",
        "Seoul National University",
        "Shanghai Jiao Tong University",
        "Sichuan University",
        "Simon Fraser University",
        "Sofia University",
        "Sogang University",
        "South China University of Technology",
        "Southern Methodist University",
        "Stellenbosch University",
        "Stony Brook University",
        "Sungkyunkwan University",
        "Sun Yat-sen University",
        "Syracuse University",
        "Technical University of Munich",
        "Technion - Israel Institute of Technology",
        "Technische Universität Berlin",
        "Technische Universität Dresden",
        "Texas A&M University",
        "The Chinese University of Hong Kong",
        "The Hong Kong Polytechnic University",
        "The Hong Kong University of Science and Technology",
        "The University of Adelaide",
        "The University of Auckland",
        "The University of Hong Kong",
        "The University of Melbourne",
        "The University of New South Wales",
        "The University of Queensland",
        "The University of Sydney",
        "The University of Tokyo",
        "The University of Western Australia",
        "Tianjin University",
        "Tohoku University",
        "Tokyo Institute of Technology",
        "Tongji University",
        "Tsinghua University",
        "Tufts University",
        "Tulane University",
        "UCL (University College London)",
        "Universidad Autónoma de Madrid",
        "Universidad de Buenos Aires",
        "Universidad de Chile",
        "Universidad Nacional Autónoma de México",
        "Universidade de São Paulo",
        "Université de Montréal",
        "Université Laval",
        "Université Paris-Saclay",
        "Université PSL",
        "University College Dublin",
        "University of Alberta",
        "University of Amsterdam",
        "University of Arizona",
        "University of Basel",
        "University of Bath",
        "University of Bergen",
        "University of Birmingham",
        "University of Bristol",
        "University of British Columbia",
        "University of Calgary",
        "University of California, Berkeley",
        "University of California, Davis",
        "University of California, Irvine",
        "University of California, Los Angeles",
        "University of California, San Diego",
        "University of California, Santa Barbara",
        "University of Cambridge",
        "University of Cape Town",
        "University of Colorado Boulder",
        "University of Copenhagen",
        "University of Edinburgh",
        "University of Florida",
        "University of Geneva",
        "University of Glasgow",
        "University of Göttingen",
        "University of Groningen",
        "University of Helsinki",
        "University of Hong Kong",
        ]
        
        lines = text.splitlines()
        best_match = process.extractOne(
            query=' '.join(lines),
            choices = top_290_universities,
            scorer = fuzz.partial_ratio,
            score_cutoff = 80  # Only return if match is strong enough
        )

        if best_match:
            return best_match[0]  # university name
        return None
    
    def capture_gpa_or_classification(self, text):
        # 1. Match GPA (e.g., GPA: 4.5)
        gpa_pattern = r"GPA\s*[:\-]?\s*(\d\.\d{1,2})"
        match = re.search(gpa_pattern, text, re.IGNORECASE)
        if match:
            return f"{match.group(1)}"

        # 2. Match Honours / Merit classifications
        honours_pattern = (
            r"(Honours with Highest Distinction|Honors with Highest Distinction|"
            r"Honours with Distinction|Honors with Distinction|"
            r"Honours with Merit|Honors with Merit|"
            r"Honours|Honors|"
            r"Graduated with Merit|with Merit)"
        )
        match = re.search(honours_pattern, text, re.IGNORECASE)
        if match:
            return f"{match.group(1)}"

        # 3. Match graduation without honours
        no_honours_pattern = r"(without Honours|without Honors|non-honours|non-honors)"
        match = re.search(no_honours_pattern, text, re.IGNORECASE)
        if match:
            return "Graduated without Honours"
        return None
    
    def extract_work_experience_dates(self, text):
        # Step 1: Get work experience section only
        work_section = ""
        lines = text.splitlines()
        in_work_section = False

        for line in lines:
            if "work experience" in line.lower() or "work" in line.lower() or "experience" in line.lower():
                in_work_section = True
                continue
            if in_work_section and (line.strip().lower().startswith("education") or "project" in line.lower()):
                break
            if in_work_section:
                work_section += line + "\n"

        # Step 2: Extract date ranges like "Sep 2024 – Jan 2025"
        date_pattern = re.findall(r'([A-Za-z]+\s\d{4})\s*[-–]\s*([A-Za-z]+\s\d{4})', work_section)

        total_months = 0
        for start_str, end_str in date_pattern:
            try:
                start_date = parser.parse(start_str)
                end_date = parser.parse(end_str)
                months = (end_date.year - start_date.year) * 12 + (end_date.month - start_date.month) + 1
                total_months += months
            except:
                continue

        # Convert months to years
        total_years = round(total_months / 12, 1)
        return total_years
    
    def capture_hard_skills(self, text):
        skill_extractor = self.skill_extractor
        annotations = skill_extractor.annotate(text)
        full_matches = annotations['results']['full_matches']
        hard_skill_phrases = set() # to avoid duplicates

        for item in full_matches:
            hard_skill_phrases.add(item['doc_node_value'])

        hard_skill_list = sorted(hard_skill_phrases)

        return hard_skill_list
    
    def load_soft_skills_list(self):
        return [
        "communication", "teamwork", "problem solving", "adaptability", "leadership",
        "creativity", "empathy", "work ethic", "critical thinking", "interpersonal skills",
        "time management", "attention to detail", "collaboration", "resilience", "flexibility",
        "self-motivation", "integrity", "decision-making", "emotional intelligence", "organization",
        "accountability", "conflict resolution", "stress management", "public speaking", "negotiation",
        "patience", "active listening", "persuasion", "cultural awareness", "constructive feedback",
        "initiative", "self-awareness", "positivity", "goal setting", "multitasking",
        "mentoring", "influence", "diplomacy", "respectfulness", "helpfulness",
        "accepting feedback", "tolerance", "relationship building", "customer focus", "self-regulation",
        "team building", "managing ambiguity", "perspective-taking", "coaching", "follow-through",
        "professionalism", "humility", "dependability", "result orientation", "receptiveness",
        "curiosity", "collaborative mindset", "resourcefulness", "enthusiasm", "dedication",
        "learning agility", "perseverance", "grit", "open-mindedness", "confidence",
        "clarity", "fairness", "courtesy", "approachability", "perspicacity",
        "mindfulness", "tact", "observational skills", "analytical thinking", "prioritization",
        "risk-taking", "strategic thinking", "proactiveness", "goal orientation", "concentration",
        "body language awareness", "presentation skills", "inclusiveness", "team spirit", "loyalty",
        "self-confidence", "service orientation", "engagement", "civic-mindedness", "discipline",
        "cooperation", "innovation", "pragmatism", "emotional regulation", "humor",
        "inspirational skills", "ethical judgment", "self-reflection", "being a good listener", "decisiveness",
        "cross-functional collaboration", "reliability", "respect for diversity", "multicultural competence", "confidentiality",
        "time awareness", "efficiency", "sociability", "brainstorming", "inquiry",
        "context awareness", "diligence", "goal alignment", "people management", "service mindset",
        "networking", "stress tolerance", "discipline", "optimism", "tenacity",
        "giving recognition", "peer support", "peer learning", "team synergy", "intrapersonal skills",
        "systems thinking", "respect for others’ time", "customer-centricity", "self-discipline", "project ownership",
        "volunteering spirit", "internal motivation", "problem sensitivity", "personal initiative", "conflict management",
        "clarity in writing", "initiative-taking", "constructive criticism", "mental flexibility", "learning from failure",
        "engaging others", "facilitation skills", "resolving ambiguity", "diversity sensitivity", "negotiation skills",
        "peer mentoring", "boundary-setting", "social awareness", "building rapport", "relationship management",
        "peer collaboration", "cohesiveness", "respectful disagreement", "team facilitation", "active contribution",
        "learning from feedback", "conversational skills", "resilience under pressure", "persistence", "evaluative thinking",
        "analytical listening", "recognizing bias", "maintaining composure", "ethical thinking", "managing disappointment",
        "seeking help", "managing expectations", "setting boundaries", "vision articulation", "persuasive communication",
        "providing encouragement", "giving constructive criticism", "receiving criticism well", "avoiding gossip", "trustworthiness",
        "attention management", "scheduling", "habit formation", "workflow optimization", "time blocking",
        "crisis management", "adjusting priorities", "self-reinforcement", "self-appraisal", "welcoming change",
        "mental clarity", "social perceptiveness", "awareness of nonverbal cues", "emotional resilience", "adaptive learning",
        "clarifying assumptions", "welcoming feedback", "growth mindset", "sensemaking", "giving appreciation",
        "bridging communication gaps", "authenticity", "initiative in learning", "consensus building", "delegation",
        "mobilizing teams", "boundary management", "value alignment", "feedback looping", "improvisation",
        "time estimation", "respect for protocols", "collaborative problem solving", "prioritizing well-being", "nurturing others",
        "goal visualization", "trust building", "role flexibility", "proximity management", "credibility",
        "task ownership", "developing others", "preventing burnout", "leading by example", "peer evaluation",
        "handling rejection", "contextual thinking", "personal branding", "emotional balance", "listening with empathy",
        "assertiveness", "goal tracking", "conflict prevention", "solution orientation", "boundary awareness",
        "change management", "attention to social cues", "understanding group dynamics", "decision confidence", "value-driven behavior",
        "habitual consistency", "peer encouragement", "reframing", "handling failure", "modeling integrity",
        "noticing others’ strengths", "leveraging diversity", "self-compassion", "critiquing ideas not people", "respect for hierarchy",
        "adaptation to feedback", "reducing misunderstandings", "thinking before speaking", "soft assertiveness", "non-defensive responses",
        "maintaining focus", "quiet leadership", "noticing patterns", "timing your input", "emotional containment",
        "asking meaningful questions", "strategic compromise", "positive reinforcement", "speaking up", "discretion",
        "peer empowerment", "energizing others", "knowing your audience", "connecting ideas", "avoiding distractions",
        "team accountability", "handling ethical dilemmas", "co-creation", "learning how to learn", "transparent communication"
        ]
    
    def capture_soft_skills(self, text, similarity_threshold = 0.7):
        nlp = self.nlp  # Make sure this model is installed!
        matcher = PhraseMatcher(nlp.vocab, attr="LOWER")
        patterns = [self.nlp.make_doc(skill) for skill in self.soft_skills_list]
        matcher.add("SOFT_SKILLS", patterns)

        doc = nlp(text)

        # --- Phrase Matching ---
        matched_skills = set()
        matches = matcher(doc)
        for match_id, start, end in matches:
            span = doc[start:end]
            matched_skills.add(span.text.lower())

        # --- Vector Similarity Matching ---
        for skill in self.soft_skills_list:
            skill_doc = nlp(skill)
            # Check if both documents have vectors
            if doc.has_vector and skill_doc.has_vector:
                similarity = doc.similarity(skill_doc)
                if similarity >= similarity_threshold:
                    matched_skills.add(skill.lower())

        return sorted(matched_skills)
    
    def extract_all_info(self):
        try:
            return pd.DataFrame([{
                'Name': self.extract_name_from_top_line(self.text) or '',
                'Email': self.extract_emails(self.text) or [],
                'Contact Information': self.extract_contact_information(self.text) or '',
                'Education Level': self.capture_education_level(self.text) or '',
                'Degree Field': self.capture_degree_field(self.text) or '',
                'University': self.capture_university_name(self.text) or '',
                'GPA': self.capture_gpa_or_classification(self.text) or '',
                'Work Experience': self.extract_work_experience_dates(self.text) or 0,
                'Hard Skills': self.capture_hard_skills(self.text) or [],
                'Soft Skills': self.capture_soft_skills(self.text) or []
            }])
        except Exception as e:
            print(f"[ERROR] Failed to extract info: {e}")
            return pd.DataFrame()

extractor = StudentInfoExtractor(text)
nlp_df = extractor.extract_all_info()

nlp_df

loading full_matcher ...
loading abv_matcher ...
loading full_uni_matcher ...
loading low_form_matcher ...
loading token_matcher ...


Unnamed: 0,Name,Email,Contact Information,Education Level,Degree Field,University,GPA,Work Experience,Hard Skills,Soft Skills
0,DIONE TAY JIA WEN,[Dionetay00@gmail.com],,Bachelor,Marketing with Honours,Yale University,Honours,1.1,"[advertising campaign, brand awareness, brand ...","[attention management, change management, coll..."


## Using LLM to extract student's information from resume

In [88]:
prompt = f"""
You are an expert headhunting and recruitment specialist. Your task is to extract **structured candidate information** from the resume text provided below.

Extract the following details as accurately as possible:

- Name (full legal name of the candidate)
- Email (all email address)
- Contact Information (include phone number if available)
- Education Level (e.g., Bachelor, Master, PhD)
- Degree Field (e.g., Data Science, Computer Science, Economics)
- University Name (full name of the institution attended)
- GPA (only if explicitly stated)
- Work Experience (total number of years, based on roles mentioned)
- Hard Skills (list of domain-specific or technical competencies)
- Soft Skills (list of interpersonal or transferable attributes)

**Resume Text:**
{text}

Respond strictly in the following format with no additional comments or explanations:

Name:
Email:
Contact Information:
Education Level:
Degree Field:
University Name:
GPA:
Work Experience (years):
Hard Skills:
Soft Skills:
"""
client = OpenAI(
        api_key = "sk-06cb346d24fe4b36bd7b8b9fe91eaff0",
        base_url="https://api.deepseek.com")

response = client.chat.completions.create(
    model = "deepseek-chat",
    messages = [
        {"role": "system", "content": "You are an extremely experienced head hunting specialist, that is experienced with reading resumes."},
        {"role": "user", "content": prompt},
    ],
    stream = False
)

column_names = ['Name', 'Email', 'Contact Information','Education Level', 'Degree Field', 'University', 'GPA',
                'Work Experience', 'Hard Skills', 'Soft Skills']
student_df = pd.DataFrame(columns=column_names)

new_student = {}

for col in column_names:
    pattern = rf"{col}.*:\s*?(.*)"
    match = re.search(pattern, response.choices[0].message.content)
    if match:
        value = match.group(1).strip()
        new_student[col] = value
    else:
        new_student[col] = ""

new_student['Work Experience'] = float(new_student['Work Experience'])
new_student = pd.DataFrame([new_student])
student_df = pd.concat([student_df, new_student], ignore_index=True)
student_df

  student_df = pd.concat([student_df, new_student], ignore_index=True)


Unnamed: 0,Name,Email,Contact Information,Education Level,Degree Field,University,GPA,Work Experience,Hard Skills,Soft Skills
0,Tay Zhi Wen Jeremiah,"tayjeremiah2000@gmail.com, e0726076@u.nus.edu",+65 86125565,Bachelor,Data Science and Analytics (Hons),National University of Singapore,Honors with Distinction,1.5,"Python, LangChain, RAG, LLM, Power BI, Regex, ...","Collaboration, Knowledge Sharing, Problem Solv..."


In [73]:
column_names = ['Name', 'Education Level', 'Degree Field', 'University', 'GPA',
                'Work Experience', 'Hard Skills', 'Soft Skills']
student_df = pd.DataFrame(columns=column_names)

new_student = {}

for col in column_names:
    pattern = rf"{col}.*:\s*?(.*)"
    match = re.search(pattern, response.choices[0].message.content)
    if match:
        value = match.group(1).strip()
        new_student[col] = value
    else:
        new_student[col] = ""

new_student['Work Experience'] = float(new_student['Work Experience'])
new_student = pd.DataFrame([new_student])
student_df = pd.concat([student_df, new_student], ignore_index=True)

  student_df = pd.concat([student_df, new_student], ignore_index=True)


## Content-Based Recommendation using Cosine Similarity to Compute Relevance Scores

In [483]:
job_df = pd.read_csv('job_data.csv')
job_df["Hard Skills"] = job_df["Hard Skills"].apply(ast.literal_eval)
job_df["Soft Skills"] = job_df["Soft Skills"].apply(ast.literal_eval)
job_df

Unnamed: 0,Job ID,Job Title,Industry,Required Education,Required Degree Field,Years of Experience,Hard Skills,Soft Skills
0,1,Software Engineer,Technology,Bachelor,Computing,2,"[Python, Java, C++, Git, Version Control, Algo...","[Problem-Solving, Teamwork]"
1,2,Frontend Developer,Technology,Bachelor,Computing,1,"[HTML, CSS, JavaScript, React, TypeScript, Res...","[Creativity, Attention to Detail]"
2,3,Backend Developer,Technology,Bachelor,Computing,2,"[Java, Python, Node.js, Spring Boot, REST APIs...","[Logical Thinking, Collaboration]"
3,4,Full Stack Developer,Technology,Bachelor,Computing,3,"[JavaScript, React, Node.js, MongoDB, Express....","[Adaptability, Communication]"
4,5,DevOps Engineer,Technology,Bachelor,Computing,3,"[Docker, Kubernetes, CI/CD, AWS, Terraform, Je...","[Problem-Solving, Teamwork]"
...,...,...,...,...,...,...,...,...
195,196,Blockchain Product Manager,Technology,Master,Computing,4,"[Blockchain Protocols Ethereum, Solana, Smart ...","[Strategic Vision, Collaboration, Market Analy..."
196,197,Autonomous Vehicle Engineer,Technology,Master,Engineering,3,"[Machine Learning, Sensor Fusion LIDAR, RADAR,...","[Problem-Solving, Teamwork, Innovation]"
197,198,IoT (Internet of Things) Engineer,Technology,Bachelor,Engineering,2,"[Embedded Systems Programming, Wireless Protoc...","[Analytical Thinking, Collaboration, Creativity]"
198,199,Digital Twin Engineer,Technology,Master,Engineering,3,"[3D Modeling Software SolidWorks, Revit, Simul...","[Attention to Detail, Problem-Solving, Interdi..."


In [578]:
class RecommendationProcessor:
    def __init__(self, student_profile, job_data):
        self.model = SentenceTransformer("all-MiniLM-L6-v2")
        self.student = student_profile
        self.job_data = job_data

        # Preprocess skills
        self.job_data['Hard Skills'] = self.job_data['Hard Skills'].apply(self.preprocess_skills)
        self.job_data['Soft Skills'] = self.job_data['Soft Skills'].apply(self.preprocess_skills)
    
    def preprocess_skills(self, skills):
        return [skill.lower().strip() for skill in skills if isinstance(skill, str) and skill.strip()]
    
    def embed_skills(self, skills):
        if not skills or not any(skills):
            return np.zeros(self.model.get_sentence_embedding_dimension())

        embeddings = self.model.encode(skills, convert_to_numpy = True)
        return np.mean(embeddings, axis = 0)
    
    # Cosine similarity between two skill lists
    def skill_cosine_similarity(self, job_skills, student_skills):
        if not job_skills or not student_skills:
            return 0.0  # One empty - no similarity
        
        job_embed = self.embed_skills(job_skills)
        student_embed = self.embed_skills(student_skills)
        cosine_sim = cosine_similarity([job_embed], [student_embed])[0][0]
        return cosine_sim
    
    # Cosine Similarity between Student's Degree Field vs Required Degree Field
    def degree_field_semantic_similarity(self, job_field, student_field):
        embeddings = self.model.encode([job_field, student_field])
        return cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]

    def map_education_level(self, level):
        mapping = {
            "High School": 1,
            "Diploma": 2,
            "Polytechnic": 2,
            "Bachelor": 3,
            "Master": 4,
            "PhD": 5
        }
        return mapping.get(level, 0)

    def experience_score(self, student_exp, job_exp):
        diff = student_exp - job_exp
        return 1 / (1 + math.exp(-0.5 * diff))

    def compute_job_score(self, job):
        student = self.student

        education_score = 1 if self.map_education_level(student['Education Level']) >= self.map_education_level(job['Required Education']) else 0
        degree_score = self.degree_field_semantic_similarity(job['Required Degree Field'], student['Degree Field'])
        exp_score = self.experience_score(student['Work Experience'], job['Years of Experience'])
        hard_skill_score = self.skill_cosine_similarity(job['Hard Skills'], student['Hard Skills'])
        soft_skill_score = self.skill_cosine_similarity(job['Soft Skills'], student['Soft Skills'])

        total_score = (
            0.15 * education_score +
            0.20 * degree_score +
            0.20 * exp_score +
            0.30 * hard_skill_score +
            0.15 * soft_skill_score
        )

        return {
            "Job Title": job["Job Title"],
            "Total Score": round(total_score, 3),
            "Education Score": education_score,
            "Degree Score": round(degree_score, 3),
            "Experience Score": round(exp_score, 3),
            "Hard Skill Score": round(hard_skill_score, 3),
            "Soft Skill Score": round(soft_skill_score, 3)
        }
    
    def recommend_top_jobs(self, top_n = 5):
        job_scores = []
        for _, job in self.job_data.iterrows():
            score = self.compute_job_score(job)
            job_scores.append((job['Job Title'], score))

        sorted_jobs = sorted(job_scores, key=lambda x: x[1]['Total Score'], reverse=True)

        print(f"\nTop {top_n} Recommended Jobs for {self.student['Name']}:\n")
        for title, score in sorted_jobs[:top_n]:
            print(f"{title}: {score['Total Score']:.3f}")
            print(f"  Education Score:  {score['Education Score']}")
            print(f"  Degree Score:     {score['Degree Score']}")
            print(f"  Experience Score: {score['Experience Score']}")
            print(f"  Hard Skill Score: {score['Hard Skill Score']}")
            print(f"  Soft Skill Score: {score['Soft Skill Score']}")
            print()
        
        return sorted_jobs[:top_n]
    
    def generate_recommendation_row(self, student_name, top_jobs):
        return {
            "name": student_name,
            "first_recommendation": top_jobs[0][0] if len(top_jobs) > 0 else None,
            "second_recommendation": top_jobs[1][0] if len(top_jobs) > 1 else None,
            "third_recommendation": top_jobs[2][0] if len(top_jobs) > 2 else None,
            "fourth_recommendation": top_jobs[3][0] if len(top_jobs) > 3 else None,
            "fifth_recommendation": top_jobs[4][0] if len(top_jobs) > 4 else None
        }

In [579]:
student_profile = nlp_df.iloc[-1]
recommender = RecommendationProcessor(student_profile, job_df)
recommended_jobs = recommender.recommend_top_jobs()
new_student = recommender.generate_recommendation_row(recommender.student.Name, recommended_jobs)


Top 5 Recommended Jobs for DIONE TAY JIA WEN:

Brand Strategist: 0.665
  Education Score:  1
  Degree Score:     0.4620000123977661
  Experience Score: 0.279
  Hard Skill Score: 0.8460000157356262
  Soft Skill Score: 0.7570000290870667

Sales Executive: 0.660
  Education Score:  1
  Degree Score:     0.4620000123977661
  Experience Score: 0.512
  Hard Skill Score: 0.722000002861023
  Soft Skill Score: 0.652999997138977

Social Media Manager: 0.645
  Education Score:  1
  Degree Score:     0.4620000123977661
  Experience Score: 0.389
  Hard Skill Score: 0.7429999709129333
  Soft Skill Score: 0.6790000200271606

Medical Sales Representative: 0.636
  Education Score:  1
  Degree Score:     0.4620000123977661
  Experience Score: 0.389
  Hard Skill Score: 0.7210000157356262
  Soft Skill Score: 0.6660000085830688

Marketing Analyst: 0.635
  Education Score:  1
  Degree Score:     0.4620000123977661
  Experience Score: 0.512
  Hard Skill Score: 0.6179999709129333
  Soft Skill Score: 0.695999

NameError: name 'generate_recommendation_row' is not defined

In [585]:
new_student = recommender.generate_recommendation_row(recommender.student.Name, recommended_jobs)
new_student

{'name': 'DIONE TAY JIA WEN',
 'first_recommendation': 'Brand Strategist',
 'second_recommendation': 'Sales Executive',
 'third_recommendation': 'Social Media Manager',
 'fourth_recommendation': 'Medical Sales Representative',
 'fifth_recommendation': 'Marketing Analyst'}

Built an end-to-end NLP pipeline to extract and structure unstructured resume data (education, experience, skills) from PDFs using PDFMiner, regex, and spaCy, improving parsing accuracy by ~30% over rule-based baselines.



In [484]:
def preprocess_skills(skills):
    return [skill.lower().strip() for skill in skills if isinstance(skill, str) and skill.strip()]

# Apply preprocessing when loading data
job_df['Hard Skills'] = job_df['Hard Skills'].apply(preprocess_skills)
job_df['Soft Skills'] = job_df['Soft Skills'].apply(preprocess_skills)


model = SentenceTransformer("all-MiniLM-L6-v2")
def embed_skills(skills):
    if not skills or not any(skills):
        return np.zeros(model.get_sentence_embedding_dimension())

    embeddings = model.encode(skills, convert_to_numpy=True)
    return np.mean(embeddings, axis=0)

# Cosine similarity between two skill lists
def skill_cosine_similarity(job_skills, student_skills):
    if not job_skills or not student_skills:
        return 0.0  # One empty - no similarity
    
    job_embed = embed_skills(job_skills)
    student_embed = embed_skills(student_skills)
    cosine_sim = cosine_similarity([job_embed], [student_embed])[0][0]
    return cosine_sim


# Cosine Similarity between Student's Degree Field vs Required Degree Field
def degree_field_semantic_similarity(text1, text2):
    embeddings = model.encode([text1, text2])
    return cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]

def map_education_level(level):
    mapping = {
        "High School": 1,
        "Diploma": 2,
        "Polytechnic": 2,
        "Bachelor": 3,
        "Master": 4,
        "PhD": 5
    }
    return mapping.get(level, 0)

def experience_score(student_exp, job_exp):
    diff = student_exp - job_exp
    return 1 / (1 + math.exp(-0.5 * diff))


def compute_job_score(job, student):
    student_education = map_education_level(student['Education Level'])
    job_education = map_education_level(job['Required Education'])
    education_score = 1 if student_education >= job_education else 0

    degree_score = degree_field_semantic_similarity(student['Degree Field'], job['Required Degree Field'])

    student_exp = student['Work Experience']
    job_exp = job['Years of Experience']
    exp_score = experience_score(student_exp, job_exp)


    hard_skill_score = skill_cosine_similarity(job['Hard Skills'], student['Hard Skills'])
    soft_skill_score = skill_cosine_similarity(job['Soft Skills'], student['Soft Skills'])
    
    total_score = (
        0.15 * education_score +
        0.25 * degree_score +
        0.15 * exp_score +
        0.30 * hard_skill_score +
        0.15 * soft_skill_score
    )
    return {
        "Job Title": job["Job Title"],
        "Total Score": round(total_score, 3),
        "Education Score": education_score,
        "Degree Score": round(degree_score, 3),
        "Experience Score": round(exp_score, 3),
        "Hard Skill Score": round(hard_skill_score, 3),
        "Soft Skill Score": round(soft_skill_score, 3)
    }


# List to store job titles and scores
job_scores = []
current_student = nlp_df.iloc[0]

# Loop through each job in df and compute the score
for _, job in job_df.iterrows():
    score = compute_job_score(job, current_student)
    job_scores.append((job['Job Title'], score))

# Sort jobs by score in descending order
sorted_jobs = sorted(job_scores, key=lambda x: x[1]['Total Score'], reverse=True)

# Display top 5 jobs
print(f"\nTop 5 Recommended Jobs for {current_student['Name']}:\n")
for title, score in sorted_jobs[:5]:
    print(f"{title}: {score['Total Score']:.3f}")
    print(f"  Education Score:  {score['Education Score']}")
    print(f"  Degree Score:     {score['Degree Score']}")
    print(f"  Experience Score: {score['Experience Score']}")
    print(f"  Hard Skill Score: {score['Hard Skill Score']}")
    print(f"  Soft Skill Score: {score['Soft Skill Score']}")
    print()



Top 5 Recommended Jobs for DIONE TAY JIA WEN:

Brand Strategist: 0.674
  Education Score:  1
  Degree Score:     0.4620000123977661
  Experience Score: 0.279
  Hard Skill Score: 0.8460000157356262
  Soft Skill Score: 0.7570000290870667

Sales Executive: 0.657
  Education Score:  1
  Degree Score:     0.4620000123977661
  Experience Score: 0.512
  Hard Skill Score: 0.722000002861023
  Soft Skill Score: 0.652999997138977

Social Media Manager: 0.649
  Education Score:  1
  Degree Score:     0.4620000123977661
  Experience Score: 0.389
  Hard Skill Score: 0.7429999709129333
  Soft Skill Score: 0.6790000200271606

E-commerce Manager: 0.642
  Education Score:  1
  Degree Score:     0.4620000123977661
  Experience Score: 0.279
  Hard Skill Score: 0.7490000128746033
  Soft Skill Score: 0.7360000014305115

Medical Sales Representative: 0.640
  Education Score:  1
  Degree Score:     0.4620000123977661
  Experience Score: 0.389
  Hard Skill Score: 0.7210000157356262
  Soft Skill Score: 0.66600

In [558]:
from dotenv import load_dotenv
import os
import psycopg2

load_dotenv(override=True)  # Load environment variables

conn = psycopg2.connect(
    host=os.getenv("DB_HOST"),
    database=os.getenv("DB_NAME"),
    user=os.getenv("DB_USER"),
    password=os.getenv("DB_PASSWORD"),
    port=os.getenv("DB_PORT", "5432")
)


✅ Connected to PostgreSQL and test query succeeded.


## Insert data into postgresql database

In [597]:
import os
import psycopg2
import pandas as pd
from dotenv import load_dotenv
from psycopg2.extras import Json

class databaseProcessor:
    def __init__(self, df):
        self.df = df
    
    def insert_student(self):
        # Load environment variables
        load_dotenv(override=True)

        conn = psycopg2.connect(
            host=os.getenv("DB_HOST"),
            database=os.getenv("DB_NAME"),
            user=os.getenv("DB_USER"),
            password=os.getenv("DB_PASSWORD"),
            port=os.getenv("DB_PORT", "5432")
        )
        # Create cursor
        cur = conn.cursor()

        query = '''
            INSERT INTO student (
                name,
                email,
                contact_information,
                education_level,
                degree_field,
                university,
                gpa,
                work_experience_years,
                hard_skills,
                soft_skills
            ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
            ON CONFLICT DO NOTHING;
        '''

        # Insert row-by-row
        for _, row in self.df.iterrows():
            try:
                cur.execute(query, (
                    row.get("Name"),
                    row.get("Email")[0] if isinstance(row.get("Email"), list) else row.get("Email"),
                    row.get("Contact Information"),
                    row.get("Education Level"),
                    row.get("Degree Field"),
                    row.get("University"),
                    row.get("GPA"),
                    float(row["Work Experience"]) if pd.notna(row["Work Experience"]) else None,
                    Json(row.get("Hard Skills", [])),
                    Json(row.get("Soft Skills", []))
                ))
            except Exception as e:
                print(f"Error inserting row: {row.to_dict()}")
                print("Exception:", e)

        # Commit and close
        conn.commit()
        cur.close()
        conn.close()
        print("✅ Insert student complete.")
    
    def insert_job(self, student):
        # Load environment variables
        load_dotenv(override = True)

        conn = psycopg2.connect(
            host=os.getenv("DB_HOST"),
            database=os.getenv("DB_NAME"),
            user=os.getenv("DB_USER"),
            password=os.getenv("DB_PASSWORD"),
            port=os.getenv("DB_PORT", "5432")
        )
        # Create cursor
        cur = conn.cursor()

        query = '''
            INSERT INTO recommendation (
                name,
                first_recommendation,
                second_recommendation,
                third_recommendation,
                fourth_recommendation,
                fifth_recommendation
            ) VALUES (%s, %s, %s, %s, %s, %s)
            ON CONFLICT DO NOTHING;
        '''


        cur.execute(query, (
            student["name"],
            student["first_recommendation"],
            student["second_recommendation"],
            student["third_recommendation"],
            student["fourth_recommendation"],
            student["fifth_recommendation"]
        ))
        conn.commit()
        cur.close()
        conn.close()
        print("✅ Insert job recommendation complete.")

In [602]:
db_processor = databaseProcessor(nlp_df)
db_processor.insert_student()
db_processor.insert_job(new_student)

✅ Insert student complete.
✅ Insert job recommendation complete.


In [594]:
import os
import psycopg2
import pandas as pd
from dotenv import load_dotenv
from psycopg2.extras import Json

def insert_student(df):
    # Load environment variables
    load_dotenv(override=True)

    conn = psycopg2.connect(
        host=os.getenv("DB_HOST"),
        database=os.getenv("DB_NAME"),
        user=os.getenv("DB_USER"),
        password=os.getenv("DB_PASSWORD"),
        port=os.getenv("DB_PORT", "5432")
    )
    # Create cursor
    cur = conn.cursor()

    query = '''
        INSERT INTO student (
            name,
            email,
            contact_information,
            education_level,
            degree_field,
            university,
            gpa,
            work_experience_years,
            hard_skills,
            soft_skills
        ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        ON CONFLICT DO NOTHING;
    '''

    # Insert row-by-row
    for _, row in df.iterrows():
        try:
            cur.execute(query, (
                row.get("Name"),
                row.get("Email")[0] if isinstance(row.get("Email"), list) else row.get("Email"),
                row.get("Contact Information"),
                row.get("Education Level"),
                row.get("Degree Field"),
                row.get("University"),
                row.get("GPA"),
                float(row["Work Experience"]) if pd.notna(row["Work Experience"]) else None,
                Json(row.get("Hard Skills", [])),
                Json(row.get("Soft Skills", []))
            ))
        except Exception as e:
            print(f"Error inserting row: {row.to_dict()}")
            print("Exception:", e)

    # Commit and close
    conn.commit()
    cur.close()
    conn.close()
    print("✅ Insert complete.")

def insert_job(rec_row):
    # Load environment variables
    load_dotenv(override = True)

    conn = psycopg2.connect(
        host=os.getenv("DB_HOST"),
        database=os.getenv("DB_NAME"),
        user=os.getenv("DB_USER"),
        password=os.getenv("DB_PASSWORD"),
        port=os.getenv("DB_PORT", "5432")
    )
    # Create cursor
    cur = conn.cursor()

    query = '''
        INSERT INTO recommendation (
            name,
            first_recommendation,
            second_recommendation,
            third_recommendation,
            fourth_recommendation,
            fifth_recommendation
        ) VALUES (%s, %s, %s, %s, %s, %s)
        ON CONFLICT DO NOTHING;
    '''


    cur.execute(query, (
        rec_row["name"],
        rec_row["first_recommendation"],
        rec_row["second_recommendation"],
        rec_row["third_recommendation"],
        rec_row["fourth_recommendation"],
        rec_row["fifth_recommendation"]
    ))
    conn.commit()
    cur.close()
    conn.close()
    print("✅ Insert job complete.")


## Clustering Recommendation

In [642]:
import ast
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

class ClusterProcessor:
    def __init__(self, student_profile, job_data, n_clusters: int = 5):
        self.model = SentenceTransformer("all-MiniLM-L6-v2")
        self.student = student_profile
        self.job_data = job_data.copy()
        self.kmeans = KMeans(n_clusters = n_clusters, random_state = 42)

        # Preprocess skills
        # self.job_data["Hard Skills"] = self.job_data["Hard Skills"].apply(ast.literal_eval)
        # self.job_data["Soft Skills"] = self.job_data["Soft Skills"].apply(ast.literal_eval)
        self.job_data['Hard Skills'] = self.job_data['Hard Skills'].apply(self.preprocess_skills)
        self.job_data['Soft Skills'] = self.job_data['Soft Skills'].apply(self.preprocess_skills)
    
    def preprocess_skills(self, skills):
        return [skill.strip().lower() for skill in skills]
    
    def embed_job(self, job_row):
        """
        Embeds a job posting into a vector using SentenceTransformer.
        """
        text = (
            f"Job Title: {job_row['Job Title']}. "
            f"Industry: {job_row['Industry']}. "
            f"Hard Skills required: {', '.join(job_row['Hard Skills'])}. "
            f"Soft Skills required: {', '.join(job_row['Soft Skills'])}. "
            f"Required degree field: {job_row['Required Degree Field']}. "
            f"Required education level: {job_row['Required Education']}. "
            f"Years of experience required: {job_row['Years of Experience']}."
        )
        return self.model.encode(text, convert_to_numpy = True)

    
    def embed_student(self):
        """
        Embeds the student profile into a vector using SentenceTransformer.
        """
        text = (
            f"Student profile. "
            f"Hard Skills: {', '.join(self.student['Hard Skills'])}. "
            f"Soft Skills: {', '.join(self.student['Soft Skills'])}. "
            f"Degree field: {self.student['Degree Field']}. "
            f"Education level: {self.student['Education Level']}. "
            f"Work experience: {self.student['Work Experience']} years."
        )
        return self.model.encode(text, convert_to_numpy = True)
    
    def compute_job_score(self):
        """
        Embeds all jobs, clusters them, and returns the jobs in the same cluster
        as the student with a similarity score.
        """
        # Embed jobs
        self.job_data['vectors'] = self.job_data.apply(lambda row: self.embed_job(row), axis = 1)
        job_vectors = np.stack(self.job_data['vectors'].values).astype(np.float64)

        # Embed student
        self.student_vector = self.embed_student().astype(np.float64)
        
        # Fit KMeans
        self.job_data['cluster'] = self.kmeans.fit_predict(job_vectors)

        # Predict student's cluster
        student_cluster = self.kmeans.predict([self.student_vector])[0]

        # Filter jobs in the same cluster
        cluster_jobs = self.job_data[self.job_data['cluster'] == student_cluster].copy()

         # Compute cosine similarities
        cluster_matrix = np.stack(cluster_jobs['vectors'].values)
        similarities = cosine_similarity([self.student_vector], cluster_matrix)[0]
        cluster_jobs['similarity'] = similarities
        
        self.cluster_jobs = cluster_jobs
        return cluster_jobs
            
    def recommend_top_k(self, k = 5):
        """
        Prints the top k most similar jobs to the student.
        """
        if not hasattr(self, 'cluster_jobs'):
            self.compute_job_score()

        top_k_jobs = self.cluster_jobs.sort_values(by = 'similarity', ascending = False).head(k)

        # Display top 5 jobs
        print(f"\nTop {k} Recommended Jobs for {self.student['Name']}:\n\nScore:")
        for _, row in top_k_jobs.iterrows():
            print(f"{row['Job Title']}: {row['similarity']:.3f}")

processor = ClusterProcessor(student_profile, job_df)
processor.compute_job_score()
processor.recommend_top_k(k = 5)


# def embed_job(job_row, model):
#     text = (
#         f"Job Title: {job_row['Job Title']}. "
#         f"Industry: {job_row['Industry']}. "
#         f"Hard Skills required: {', '.join(job_row['Hard Skills'])}. "
#         f"Soft Skills required: {', '.join(job_row['Soft Skills'])}. "
#         f"Required degree field: {job_row['Required Degree Field']}. "
#         f"Required education level: {job_row['Required Education']}. "
#         f"Years of experience required: {job_row['Years of Experience']}."
#     )
#     return model.encode(text, convert_to_numpy=True)

# def embed_student(student_dict, model):
#     text = (
#         f"Student profile. "
#         f"Hard Skills: {', '.join(student_dict['Hard Skills'])}. "
#         f"Soft Skills: {', '.join(student_dict['Soft Skills'])}. "
#         f"Degree field: {student_dict['Degree Field']}. "
#         f"Education level: {student_dict['Education Level']}. "
#         f"Work experience: {student_dict['Work Experience']} years."
#     )
#     return model.encode(text, convert_to_numpy=True)


# df['vectors'] = df.apply(lambda row: embed_job(row, model), axis = 1)
# job_vectors = np.stack(df['vectors'].values).astype(np.float64)

# student_vector = embed_student(student_profile, model).astype(np.float64)

# # Train KMeans
# kmeans = KMeans(n_clusters = 5, random_state = 42)
# df['cluster'] = kmeans.fit_predict(job_vectors)

# # Predict cluster for student
# student_cluster = kmeans.predict([student_vector])[0]

# # Get jobs from same cluster
# cluster_jobs = df[df['cluster'] == student_cluster].copy()

# # Stack job vectors from the cluster into a matrix
# cluster_matrix = np.stack(cluster_jobs['vectors'].values)
# similarities = cosine_similarity([student_vector], cluster_matrix)[0]
# cluster_jobs['similarity'] = similarities

# # Step 4: Sort and return top 5 jobs
# top_5_jobs = cluster_jobs.sort_values(by = 'similarity', ascending = False)

# # Display top 5 jobs
# print(f"\nTop 5 Recommended Jobs for {student_profile['Name']}:\n\nScore:")
# for _, row in top_5_jobs.head(5).iterrows():
#     print(f"{row['Job Title']}: {row['similarity']:.3f}")


Top 5 Recommended Jobs for DIONE TAY JIA WEN:

Score:
Marketing Analyst: 0.695
Product Manager (Tech): 0.596
Management Consultant: 0.574
Fund Manager: 0.565
Entrepreneur / Startup Founder: 0.560


In [630]:
student_profile

Name                                                   DIONE TAY JIA WEN
Email                                             [Dionetay00@gmail.com]
Contact Information                                                     
Education Level                                                 Bachelor
Degree Field                                      Marketing with Honours
University                                               Yale University
GPA                                                              Honours
Work Experience                                                      1.1
Hard Skills            [advertising campaign, brand awareness, brand ...
Soft Skills            [attention management, change management, coll...
Name: 0, dtype: object