# Importing Libraries

In [2]:
import pandas as pd
import random
import difflib
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Generating Projects and Roles

In [4]:
# --- Define Employee Roles with Themes ---
employee_roles = [
    {"Role Name": "Solution Architect", "Role Description": "Designs high-level technical solutions for enterprise customers.", "Theme": "Technical"},
    {"Role Name": "Sales Account Manager", "Role Description": "Manages customer accounts and drives sales processes.", "Theme": "Sales"},
    {"Role Name": "Digital Marketing Specialist", "Role Description": "Executes online campaigns, SEO, and branding strategies.", "Theme": "Marketing"},
    {"Role Name": "Senior HR Manager", "Role Description": "Manages HR operations and employee relations.", "Theme": "HR"},
    {"Role Name": "Legal Counsel", "Role Description": "Provides legal support for contracts and compliance.", "Theme": "Legal"},
    {"Role Name": "IT Systems Engineer", "Role Description": "Maintains and optimizes internal IT infrastructure.", "Theme": "Technical"},
    {"Role Name": "Workflow Consultant", "Role Description": "Analyzes business processes and recommends workflow improvements.", "Theme": "Consulting"},
    {"Role Name": "Project Manager", "Role Description": "Oversees project delivery and coordinates cross-functional teams.", "Theme": "Consulting"},
    {"Role Name": "Data Analyst", "Role Description": "Analyzes data and delivers business insights.", "Theme": "Technical"},
    {"Role Name": "Customer Success Manager", "Role Description": "Supports post-sales success and client satisfaction.", "Theme": "Sales"},
    {"Role Name": "Field Support Engineer", "Role Description": "Provides onsite technical support for Canon products.", "Theme": "Technical"},
    {"Role Name": "Pre-Sales Engineer", "Role Description": "Prepares technical demos and solution proposals for prospects.", "Theme": "Sales"},
    {"Role Name": "Compliance Manager", "Role Description": "Ensures adherence to regulations and company standards.", "Theme": "Legal"},
    {"Role Name": "HR Business Partner", "Role Description": "Collaborates with leadership to align HR strategy.", "Theme": "HR"},
    {"Role Name": "Corporate Trainer", "Role Description": "Designs and delivers employee training programs.", "Theme": "HR"},
    {"Role Name": "Technical Support Specialist", "Role Description": "Resolves technical issues reported by customers.", "Theme": "Technical"},
    {"Role Name": "Content Creator", "Role Description": "Develops written, video, and visual content for marketing.", "Theme": "Marketing"},
    {"Role Name": "Quality Assurance Specialist", "Role Description": "Tests systems and processes to ensure quality.", "Theme": "Technical"},
    {"Role Name": "Integration Developer", "Role Description": "Develops integrations between Canon products and third-party systems.", "Theme": "Technical"},
    {"Role Name": "Strategy Consultant", "Role Description": "Advises leadership on business growth and optimization strategies.", "Theme": "Consulting"},
]

# --- Define Project Summaries with Themes ---
project_summaries = [
    {"Project Summary": "Implement scalable workflow automation system", "Scope and Deliverables": "Deploy Workflow2000, integrate with client systems", "Theme": "Technical"},
    {"Project Summary": "CRM integration for loyalty program", "Scope and Deliverables": "Customize CRM modules and train sales team", "Theme": "Sales"},
    {"Project Summary": "Launch digital marketing portal", "Scope and Deliverables": "Create website, SEO, lead funnels", "Theme": "Marketing"},
    {"Project Summary": "HR digital onboarding system", "Scope and Deliverables": "Implement HRIS system, self-service portals", "Theme": "HR"},
    {"Project Summary": "Contract management system deployment", "Scope and Deliverables": "Deploy document archiving and e-signature workflows", "Theme": "Legal"},
    {"Project Summary": "Upgrade internal IT infrastructure", "Scope and Deliverables": "Replace old servers, migrate systems to cloud", "Theme": "Technical"},
    {"Project Summary": "Business workflow audit", "Scope and Deliverables": "Map processes and suggest automation improvements", "Theme": "Consulting"},
    {"Project Summary": "Manage ERP migration project", "Scope and Deliverables": "Deliver milestones for new ERP roll-out", "Theme": "Consulting"},
    {"Project Summary": "Data warehouse design", "Scope and Deliverables": "Create new analytics-ready database", "Theme": "Technical"},
    {"Project Summary": "Post-sale onboarding program", "Scope and Deliverables": "Develop client onboarding workflow", "Theme": "Sales"},
    {"Project Summary": "Onsite print solutions setup", "Scope and Deliverables": "Install Print2.0 platform for retail client", "Theme": "Technical"},
    {"Project Summary": "Pre-sales technical proof-of-concept setup", "Scope and Deliverables": "Build demo environments for prospects", "Theme": "Sales"},
    {"Project Summary": "Regulatory compliance documentation project", "Scope and Deliverables": "Standardize processes, deliver compliance documentation", "Theme": "Legal"},
    {"Project Summary": "Organizational culture development initiative", "Scope and Deliverables": "Conduct workshops, employee surveys", "Theme": "HR"},
    {"Project Summary": "Employee training platform upgrade", "Scope and Deliverables": "Implement Learning Management System (LMS)", "Theme": "HR"},
    {"Project Summary": "Customer remote support setup", "Scope and Deliverables": "Setup online ticketing and remote assistance systems", "Theme": "Technical"},
    {"Project Summary": "Content library migration", "Scope and Deliverables": "Migrate marketing content to new CMS", "Theme": "Marketing"},
    {"Project Summary": "Quality assurance framework rollout", "Scope and Deliverables": "Implement QA policies across departments", "Theme": "Technical"},
    {"Project Summary": "API and system integration project", "Scope and Deliverables": "Develop middleware for integration of ERP/CRM", "Theme": "Technical"},
    {"Project Summary": "Business strategy development program", "Scope and Deliverables": "Assist C-suite with market expansion strategy", "Theme": "Consulting"},
]

# --- Create DataFrames ---
roles_df = pd.DataFrame(employee_roles)
projects_df = pd.DataFrame(project_summaries)

In [5]:
roles_df.head()

Unnamed: 0,Role Name,Role Description,Theme
0,Solution Architect,Designs high-level technical solutions for ent...,Technical
1,Sales Account Manager,Manages customer accounts and drives sales pro...,Sales
2,Digital Marketing Specialist,"Executes online campaigns, SEO, and branding s...",Marketing
3,Senior HR Manager,Manages HR operations and employee relations.,HR
4,Legal Counsel,Provides legal support for contracts and compl...,Legal


In [6]:
# Controlled Product Pools per Theme
product_pools = {
    "Technical": ["Workflow2000", "Print2.0", "AIScan", "CloudSuite", "IntegrationHub"],
    "Sales": ["CRM Pro", "Sales Enablement Suite", "Loyalty CRM", "SalesForce Light"],
    "Marketing": ["Digital Campaign Manager", "SEO Toolkit", "Content CMS", "Social Media Manager"],
    "HR": ["HRIS Plus", "Onboarding Suite", "Employee Experience Platform"],
    "Legal": ["Compliance Suite", "Contract Manager Pro", "Regulatory Tracker"],
    "Consulting": ["ERP Migration Tool", "Business Analysis Framework", "Strategy Kit"]
}

# Controlled Skill Pools per Theme
skill_pools = {
    "Technical": ["Data Analysis", "Workflow Automation", "Cloud Services", "IT Infrastructure", "API Development"],
    "Sales": ["CRM Integration", "Negotiation", "Client Management", "Customer Relationship Management"],
    "Marketing": ["SEO Optimization", "Content Strategy", "Campaign Management", "Copywriting", "Branding"],
    "HR": ["Digital HR", "Organizational Development", "Talent Management", "Communication Skills"],
    "Legal": ["Contract Management", "Regulatory Knowledge", "Document Review", "Compliance Documentation"],
    "Consulting": ["Business Analysis", "Strategic Planning", "Workflow Optimization", "Project Management", "Change Management"]
}

# New: Controlled Certifications Pool per Theme
certification_pools = {
    "Technical": ["ITIL", "ISO 27001", "Microsoft Azure Certification"],
    "Sales": ["Certified Sales Professional (CSP)", "CRM Specialist Certification"],
    "Marketing": ["Digital Marketing Certification", "Google Ads Certification", "HubSpot Marketing Certification"],
    "HR": ["PMP", "SHRM-CP", "HR Analytics Certification"],
    "Legal": ["Certified Compliance Officer", "GDPR Certification", "Contract Law Certification"],
    "Consulting": ["PMP", "Six Sigma", "Agile Practitioner", "Business Analysis Certification"]
}

# New: Controlled Expertise Areas Pool per Theme
expertise_pools = {
    "Technical": ["Scripting", "API Integration", "Cloud Infrastructure", "Networking", "Cybersecurity"],
    "Sales": ["CRM Integration", "Sales Pipeline Automation", "Client Relationship Systems"],
    "Marketing": ["SEO Optimization", "Content Management Systems", "Social Media Integration"],
    "HR": ["HRIS Systems", "Employee Experience Platforms", "Organizational Development Systems"],
    "Legal": ["Document Archiving", "Contract Management Systems", "Regulatory Compliance Tools"],
    "Consulting": ["Strategic Planning", "Business Workflow Optimization", "ERP Systems Integration"]
}


# Predefined vocabularies
locations_master = ["Berlin", "Vienna", "London"]
work_flexibility_options = ["onsite", "remote", "hybrid"]
languages_master = ["English", "French", "German", "Italian"]
fluency_levels = ["A1", "A2", "B1", "B2", "C1", "C2"]
industries_master = ["Healthcare", "Education", "Finance", "Manufacturing", "Retail"]

In [7]:
# --- Typo Function ---
def introduce_typo(text):
    if len(text) < 4:
        return text
    idx = random.randint(0, len(text) - 2)
    return text[:idx] + text[idx+1] + text[idx] + text[idx+2:]

# --- Smart Project Generator (Full Version) ---
def generate_smart_projects_full(n, project_templates):
    projects = []
    for i in range(n):
        proj = random.choice(project_templates)
        theme = proj["Theme"]

        products = [introduce_typo(p) if random.random() < 0.4 else p
                    for p in random.sample(product_pools[theme], k=min(len(product_pools[theme]), random.randint(1, 3)))]

        required_skills = {
            (introduce_typo(skill) if random.random() < 0.4 else skill): random.randint(5, 10)
            for skill in random.sample(skill_pools[theme], k=min(len(skill_pools[theme]), random.randint(2, 5)))
        }

        certifications = random.sample(certification_pools[theme], k=min(len(certification_pools[theme]), random.randint(1, 2)))
        expertise = random.sample(expertise_pools[theme], k=min(len(expertise_pools[theme]), random.randint(1, 2)))

        project_industry = introduce_typo(random.choice(industries_master)) if random.random() < 0.4 else random.choice(industries_master)

        project = {
            "ProjectID": f"P{i+1}",
            "Project Summary": proj["Project Summary"],
            "Scope and Deliverables": proj["Scope and Deliverables"],
            "Theme": theme,
            "Products Involved": products,
            "Required Skills and Expertise": required_skills,
            "Customer Preferences (Certifications)": certifications,
            "Integration Requirements (Expertise Areas)": expertise,
            "Customer Industry": project_industry,
            "Work Location": introduce_typo(random.choice(locations_master)) if random.random() < 0.3 else random.choice(locations_master),
            "Work Flexibility": random.choice(work_flexibility_options),
            "Languages Required": {introduce_typo(lang) if random.random() < 0.3 else lang: random.choice(fluency_levels)
                                   for lang in random.sample(languages_master, k=random.randint(1, 3))},
            "Complexity": random.randint(1, 10)
        }

        projects.append(project)
    return pd.DataFrame(projects)

# --- Smart Employee Generator (Full Version) ---
def generate_smart_employees_full(n, role_templates):
    employees = []
    for i in range(n):
        emp = random.choice(role_templates)
        theme = emp["Theme"]

        product_experience = random.sample(product_pools[theme], k=min(len(product_pools[theme]), random.randint(1, 3)))
        core_competencies = {skill: random.randint(4, 10) for skill in random.sample(skill_pools[theme], k=min(len(skill_pools[theme]), random.randint(2, 5)))}
        certifications = random.sample(certification_pools[theme], k=min(len(certification_pools[theme]), random.randint(1, 2)))
        expertise = random.sample(expertise_pools[theme], k=min(len(expertise_pools[theme]), random.randint(1, 2)))

        industry_experience = random.sample(industries_master, k=random.randint(1, 3))

        employee = {
            "EmployeeID": f"E{i+1}",
            "Role Name": emp["Role Name"],
            "Role Description": emp["Role Description"],
            "Theme": theme,
            "Products Experience": product_experience,
            "Core Competencies": core_competencies,
            "External/Internal Certifications": certifications,
            "Expertise Areas": expertise,
            "Industry Experience": industry_experience,
            "Work Location": random.choice(locations_master),
            "Work Flexibility": random.choice(work_flexibility_options),
            "Languages Known": {lang: random.choice(fluency_levels) 
                                for lang in random.sample(languages_master, k=random.randint(1, 3))}
        }

        employees.append(employee)
    return pd.DataFrame(employees)


In [8]:
smart_projects_df = generate_smart_projects_full(20, project_summaries)
smart_employees_df = generate_smart_employees_full(20, employee_roles)

In [9]:
smart_projects_df.head()

Unnamed: 0,ProjectID,Project Summary,Scope and Deliverables,Theme,Products Involved,Required Skills and Expertise,Customer Preferences (Certifications),Integration Requirements (Expertise Areas),Customer Industry,Work Location,Work Flexibility,Languages Required,Complexity
0,P1,HR digital onboarding system,"Implement HRIS system, self-service portals",HR,[Employee Experience Platform],"{'Digital HR': 10, 'Taletn Management': 6, 'Or...",[SHRM-CP],[Organizational Development Systems],dEucation,Belrin,remote,"{'French': 'A1', 'Greman': 'A2', 'Itlaian': 'C2'}",3
1,P2,Content library migration,Migrate marketing content to new CMS,Marketing,"[oCntent CMS, Digital Campaign Manager]","{'Copwyriting': 9, 'Branding': 7, 'oCntent Str...",[Digital Marketing Certification],"[Social Media Integration, SEO Optimization]",iFnance,London,hybrid,"{'Frenhc': 'B1', 'German': 'B1'}",2
2,P3,Data warehouse design,Create new analytics-ready database,Technical,"[AIScan, Print2.0, IntegrtaionHub]","{'Data Analysis': 7, 'Clou dServices': 7}",[Microsoft Azure Certification],[Cybersecurity],Retail,Vienna,hybrid,"{'Iatlian': 'A2', 'Englsih': 'C1'}",5
3,P4,Data warehouse design,Create new analytics-ready database,Technical,[AIScan],"{'Cloud Services': 6, 'Workflow Automation': 5}","[ITIL, ISO 27001]","[Networking, API Integration]",Education,Berlin,onsite,"{'Italian': 'C1', 'French': 'B1'}",2
4,P5,API and system integration project,Develop middleware for integration of ERP/CRM,Technical,"[CloudSuite, IntegrationHub]","{'IT Infrastructure': 7, 'Cloud Services': 9}","[Microsoft Azure Certification, ISO 27001]","[Scripting, Networking]",eHalthcare,Berlin,hybrid,"{'French': 'C1', 'Italian': 'A2'}",6


In [10]:
smart_employees_df.head()

Unnamed: 0,EmployeeID,Role Name,Role Description,Theme,Products Experience,Core Competencies,External/Internal Certifications,Expertise Areas,Industry Experience,Work Location,Work Flexibility,Languages Known
0,E1,HR Business Partner,Collaborates with leadership to align HR strat...,HR,"[HRIS Plus, Onboarding Suite, Employee Experie...","{'Digital HR': 10, 'Organizational Development...","[PMP, SHRM-CP]","[HRIS Systems, Organizational Development Syst...",[Healthcare],Vienna,onsite,{'Italian': 'A1'}
1,E2,Workflow Consultant,Analyzes business processes and recommends wor...,Consulting,[ERP Migration Tool],"{'Workflow Optimization': 10, 'Business Analys...",[Business Analysis Certification],[ERP Systems Integration],[Finance],Vienna,remote,"{'English': 'A2', 'German': 'C1', 'Italian': '..."
2,E3,Solution Architect,Designs high-level technical solutions for ent...,Technical,[CloudSuite],"{'Data Analysis': 8, 'Cloud Services': 7}",[ISO 27001],[Cybersecurity],[Education],Vienna,hybrid,{'German': 'C1'}
3,E4,HR Business Partner,Collaborates with leadership to align HR strat...,HR,[Employee Experience Platform],"{'Communication Skills': 6, 'Organizational De...",[HR Analytics Certification],"[Employee Experience Platforms, HRIS Systems]",[Manufacturing],London,remote,"{'German': 'A1', 'French': 'C1', 'English': 'C2'}"
4,E5,Pre-Sales Engineer,Prepares technical demos and solution proposal...,Sales,[Sales Enablement Suite],"{'Client Management': 10, 'CRM Integration': 6...","[CRM Specialist Certification, Certified Sales...",[Sales Pipeline Automation],[Education],London,onsite,"{'English': 'B1', 'Italian': 'A1'}"


# Creating Scoring Functions

In [12]:
fuzzy_match_threshold = 0.7

In [13]:
# Normalizing text and doing fuzzy match
def normalize(text):
    return text.lower().strip()

def fuzzy_match(val1, val2, threshold = fuzzy_match_threshold):
    val1, val2 = normalize(val1), normalize(val2)
    return difflib.SequenceMatcher(None, val1, val2).ratio() >= threshold

### Product Matching

In [15]:
def product_score(project_products, employee_products):
    match_count = 0
    for p_prod in project_products:
        if any(fuzzy_match(p_prod, e_prod) for e_prod in employee_products):
            match_count += 1
    return match_count / len(project_products) if project_products else 0

### Location Matching with Work Flexibility Logic

In [17]:
def location_score(project_location, project_flex, employee_location, employee_flex):
    if project_flex == "remote":
        return 1.0
    location_match = fuzzy_match(project_location, employee_location)
    
    if project_flex == "onsite":
        if employee_flex == "onsite" and location_match:
            return 1.0
        elif employee_flex == "hybrid" and location_match:
            return 0.5
        else:
            return 0.0
    elif project_flex == "hybrid":
        if employee_flex == "onsite" and location_match:
            return 1.0
        elif employee_flex == "hybrid" and location_match:
            return 1.0
        elif employee_flex == "remote" and location_match:
            return 0.5
        else:
            return 0.0
    return 0.0

### Language Matching and Fluency Scoring

In [19]:
cefr_scale = {"A1": 1, "A2": 2, "B1": 3, "B2": 4, "C1": 5, "C2": 6}

def best_fuzzy_match(input_lang, employee_langs, threshold= fuzzy_match_threshold):
    best_match = None
    best_score = 0
    for e_lang in employee_langs:
        score = difflib.SequenceMatcher(None, normalize(input_lang), normalize(e_lang)).ratio()
        if score > best_score:
            best_match = e_lang
            best_score = score
    return best_match if best_score >= threshold else None

def language_score(project_langs, employee_langs):
    matched = []
    for p_lang, p_level in project_langs.items():
        matched_lang = best_fuzzy_match(p_lang, employee_langs)
        if matched_lang:
            matched.append((p_lang, matched_lang, p_level, employee_langs[matched_lang]))

    if not matched:
        return 0.0

    coverage = len(matched) / len(project_langs)
    scores = []
    for _, _, p_level, e_level in matched:
        required = cefr_scale.get(p_level, 0)
        actual = cefr_scale.get(e_level, 0)
        if actual >= required:
            score = 1.0
        else:
            score = max(0, 1 - (required - actual) / 6)
        scores.append(score)
    avg_fit = sum(scores) / len(scores)
    return round(coverage * avg_fit, 2)




### Industry Matching Score

In [21]:
def fuzzy_in_list(value, lst, threshold=fuzzy_match_threshold):
    for item in lst:
        if difflib.SequenceMatcher(None, value.lower(), item.lower()).ratio() >= threshold:
            return True
    return False

def industry_score(project_industry, employee_industries, threshold=fuzzy_match_threshold):
    return 1.0 if fuzzy_in_list(project_industry, employee_industries, threshold) else 0.0


### Required Skills Matching

In [23]:
# Skill Match Score with fuzzy key match

def skill_match_score_with_fuzzy_keys(project_skills_dict, complexity, core_competency):
    matched_pairs = []
    for p_skill, required_level in project_skills_dict.items():
        match = best_fuzzy_match(p_skill, core_competency)
        if match:
            matched_pairs.append((p_skill, match, required_level, core_competency[match]))
    if not matched_pairs:
        return 0.0
    coverage = len(matched_pairs) / len(project_skills_dict)
    expertise_scores = []
    for _, _, required, actual in matched_pairs:
        score = 1.0 if actual >= required else 1 - (required - actual) / 10
        expertise_scores.append(score)
    expertise_fit = sum(expertise_scores) / len(expertise_scores)
    capability = coverage * expertise_fit
    complexity_target = complexity / 10
    return 1.0 if capability >= complexity_target else round(capability / complexity_target, 2)


### Customer Preferences (Integration Requirements) Matching

In [25]:
# Certification Match Score (Set Overlap)
def certification_score(project_certs, employee_certs, threshold = fuzzy_match_threshold):
    if not project_certs:
        return 1.0  # if project does not require certifications, full score
    match_count = 0
    for p_cert in project_certs:
        if any(fuzzy_match(p_cert, e_cert) for e_cert in employee_certs):
            match_count += 1
    return match_count / len(project_certs)

### Expertise Requirement Matching

In [27]:
# Expertise Areas Match Score (Set Overlap)
def expertise_score(project_expertise, employee_expertise, threshold = fuzzy_match_threshold):
    if not project_expertise:
        return 1.0  # if project does not require expertise, full score
    match_count = 0
    for p_area in project_expertise:
        if any(fuzzy_match(p_area, e_area) for e_area in employee_expertise):
            match_count += 1
    return match_count / len(project_expertise)

### Project Summary and Scope & Deliverables Matching with Employee Job Role

In [29]:
# Define once globally (for reuse)
tfidf_vectorizer = TfidfVectorizer()


In [30]:
def text_similarity_tfidf(text1, text2):
    vectors = tfidf_vectorizer.fit_transform([text1, text2])
    sim_score = cosine_similarity(vectors[0:1], vectors[1:2])[0][0]
    return round(sim_score, 3)

# Creating Merged Table For Scoring All Employees by Project

In [32]:
smart_projects_df["key"] = 1
smart_employees_df["key"] = 1
merged_df = pd.merge(smart_projects_df, smart_employees_df, on="key").drop(columns="key")


In [33]:
merged_df.head()

Unnamed: 0,ProjectID,Project Summary,Scope and Deliverables,Theme_x,Products Involved,Required Skills and Expertise,Customer Preferences (Certifications),Integration Requirements (Expertise Areas),Customer Industry,Work Location_x,...,Role Description,Theme_y,Products Experience,Core Competencies,External/Internal Certifications,Expertise Areas,Industry Experience,Work Location_y,Work Flexibility_y,Languages Known
0,P1,HR digital onboarding system,"Implement HRIS system, self-service portals",HR,[Employee Experience Platform],"{'Digital HR': 10, 'Taletn Management': 6, 'Or...",[SHRM-CP],[Organizational Development Systems],dEucation,Belrin,...,Collaborates with leadership to align HR strat...,HR,"[HRIS Plus, Onboarding Suite, Employee Experie...","{'Digital HR': 10, 'Organizational Development...","[PMP, SHRM-CP]","[HRIS Systems, Organizational Development Syst...",[Healthcare],Vienna,onsite,{'Italian': 'A1'}
1,P1,HR digital onboarding system,"Implement HRIS system, self-service portals",HR,[Employee Experience Platform],"{'Digital HR': 10, 'Taletn Management': 6, 'Or...",[SHRM-CP],[Organizational Development Systems],dEucation,Belrin,...,Analyzes business processes and recommends wor...,Consulting,[ERP Migration Tool],"{'Workflow Optimization': 10, 'Business Analys...",[Business Analysis Certification],[ERP Systems Integration],[Finance],Vienna,remote,"{'English': 'A2', 'German': 'C1', 'Italian': '..."
2,P1,HR digital onboarding system,"Implement HRIS system, self-service portals",HR,[Employee Experience Platform],"{'Digital HR': 10, 'Taletn Management': 6, 'Or...",[SHRM-CP],[Organizational Development Systems],dEucation,Belrin,...,Designs high-level technical solutions for ent...,Technical,[CloudSuite],"{'Data Analysis': 8, 'Cloud Services': 7}",[ISO 27001],[Cybersecurity],[Education],Vienna,hybrid,{'German': 'C1'}
3,P1,HR digital onboarding system,"Implement HRIS system, self-service portals",HR,[Employee Experience Platform],"{'Digital HR': 10, 'Taletn Management': 6, 'Or...",[SHRM-CP],[Organizational Development Systems],dEucation,Belrin,...,Collaborates with leadership to align HR strat...,HR,[Employee Experience Platform],"{'Communication Skills': 6, 'Organizational De...",[HR Analytics Certification],"[Employee Experience Platforms, HRIS Systems]",[Manufacturing],London,remote,"{'German': 'A1', 'French': 'C1', 'English': 'C2'}"
4,P1,HR digital onboarding system,"Implement HRIS system, self-service portals",HR,[Employee Experience Platform],"{'Digital HR': 10, 'Taletn Management': 6, 'Or...",[SHRM-CP],[Organizational Development Systems],dEucation,Belrin,...,Prepares technical demos and solution proposal...,Sales,[Sales Enablement Suite],"{'Client Management': 10, 'CRM Integration': 6...","[CRM Specialist Certification, Certified Sales...",[Sales Pipeline Automation],[Education],London,onsite,"{'English': 'B1', 'Italian': 'A1'}"


# Scoring Each Employee per Project

In [35]:
scores = []

for _, row in merged_df.iterrows():
    # Text similarity from summary and scope to role description
    summary_sim = text_similarity_tfidf(row["Project Summary"], row["Role Description"])
    scope_sim = text_similarity_tfidf(row["Scope and Deliverables"], row["Role Description"])
    role_fit_score = round((summary_sim + scope_sim) / 2, 3)

    scores.append({
        "ProjectID": row["ProjectID"],
        "EmployeeID": row["EmployeeID"],


        "Product Match Score": round(product_score(row["Products Involved"], row["Products Experience"]), 2),
        "Location Match Score": round(location_score(row["Work Location_x"], row["Work Flexibility_x"],
                                                     row["Work Location_y"], row["Work Flexibility_y"]), 2),
        "Language Match Score": language_score(row["Languages Required"], row["Languages Known"]),
        "Industry Match Score": industry_score(row["Customer Industry"], row["Industry Experience"]),
        "Skill Match Score": skill_match_score_with_fuzzy_keys(row["Required Skills and Expertise"], row["Complexity"], row["Core Competencies"]),


        "Certification Match Score": certification_score(row["Customer Preferences (Certifications)"], row["External/Internal Certifications"]),
        "Expertise Match Score": expertise_score(row["Integration Requirements (Expertise Areas)"], row["Expertise Areas"]),
        "Job Description Match Score": role_fit_score
    })

# Final dataframe
scored_df = pd.DataFrame(scores)
scored_df.head()


Unnamed: 0,ProjectID,EmployeeID,Product Match Score,Location Match Score,Language Match Score,Industry Match Score,Skill Match Score,Certification Match Score,Expertise Match Score,Job Description Match Score
0,P1,E1,1.0,1.0,0.06,0.0,1.0,1.0,1.0,0.053
1,P1,E2,0.0,1.0,0.56,0.0,0.83,0.0,0.0,0.0
2,P1,E3,0.0,1.0,0.33,1.0,0.0,0.0,0.0,0.0
3,P1,E4,1.0,1.0,0.61,0.0,1.0,0.0,0.0,0.053
4,P1,E5,0.0,1.0,0.06,1.0,0.83,0.0,0.0,0.0


In [36]:
scored_df.head()

Unnamed: 0,ProjectID,EmployeeID,Product Match Score,Location Match Score,Language Match Score,Industry Match Score,Skill Match Score,Certification Match Score,Expertise Match Score,Job Description Match Score
0,P1,E1,1.0,1.0,0.06,0.0,1.0,1.0,1.0,0.053
1,P1,E2,0.0,1.0,0.56,0.0,0.83,0.0,0.0,0.0
2,P1,E3,0.0,1.0,0.33,1.0,0.0,0.0,0.0,0.0
3,P1,E4,1.0,1.0,0.61,0.0,1.0,0.0,0.0,0.053
4,P1,E5,0.0,1.0,0.06,1.0,0.83,0.0,0.0,0.0


In [37]:
merged_df = merged_df.merge(scored_df, how = 'left', left_on = ['ProjectID','EmployeeID'], right_on = ['ProjectID','EmployeeID'])

In [66]:
pd.set_option('display.max_columns', None)
pd.DataFrame(merged_df.head(100))

Unnamed: 0,ProjectID,Project Summary,Scope and Deliverables,Theme_x,Products Involved,Required Skills and Expertise,Customer Preferences (Certifications),Integration Requirements (Expertise Areas),Customer Industry,Work Location_x,Work Flexibility_x,Languages Required,Complexity,EmployeeID,Role Name,Role Description,Theme_y,Products Experience,Core Competencies,External/Internal Certifications,Expertise Areas,Industry Experience,Work Location_y,Work Flexibility_y,Languages Known,Product Match Score,Location Match Score,Language Match Score,Industry Match Score,Skill Match Score,Certification Match Score,Expertise Match Score,Job Description Match Score
0,P1,HR digital onboarding system,"Implement HRIS system, self-service portals",HR,[Employee Experience Platform],"{'Digital HR': 10, 'Taletn Management': 6, 'Or...",[SHRM-CP],[Organizational Development Systems],dEucation,Belrin,remote,"{'French': 'A1', 'Greman': 'A2', 'Itlaian': 'C2'}",3,E1,HR Business Partner,Collaborates with leadership to align HR strat...,HR,"[HRIS Plus, Onboarding Suite, Employee Experie...","{'Digital HR': 10, 'Organizational Development...","[PMP, SHRM-CP]","[HRIS Systems, Organizational Development Syst...",[Healthcare],Vienna,onsite,{'Italian': 'A1'},1.0,1.0,0.06,0.0,1.00,1.0,1.0,0.053
1,P1,HR digital onboarding system,"Implement HRIS system, self-service portals",HR,[Employee Experience Platform],"{'Digital HR': 10, 'Taletn Management': 6, 'Or...",[SHRM-CP],[Organizational Development Systems],dEucation,Belrin,remote,"{'French': 'A1', 'Greman': 'A2', 'Itlaian': 'C2'}",3,E2,Workflow Consultant,Analyzes business processes and recommends wor...,Consulting,[ERP Migration Tool],"{'Workflow Optimization': 10, 'Business Analys...",[Business Analysis Certification],[ERP Systems Integration],[Finance],Vienna,remote,"{'English': 'A2', 'German': 'C1', 'Italian': '...",0.0,1.0,0.56,0.0,0.83,0.0,0.0,0.000
2,P1,HR digital onboarding system,"Implement HRIS system, self-service portals",HR,[Employee Experience Platform],"{'Digital HR': 10, 'Taletn Management': 6, 'Or...",[SHRM-CP],[Organizational Development Systems],dEucation,Belrin,remote,"{'French': 'A1', 'Greman': 'A2', 'Itlaian': 'C2'}",3,E3,Solution Architect,Designs high-level technical solutions for ent...,Technical,[CloudSuite],"{'Data Analysis': 8, 'Cloud Services': 7}",[ISO 27001],[Cybersecurity],[Education],Vienna,hybrid,{'German': 'C1'},0.0,1.0,0.33,1.0,0.00,0.0,0.0,0.000
3,P1,HR digital onboarding system,"Implement HRIS system, self-service portals",HR,[Employee Experience Platform],"{'Digital HR': 10, 'Taletn Management': 6, 'Or...",[SHRM-CP],[Organizational Development Systems],dEucation,Belrin,remote,"{'French': 'A1', 'Greman': 'A2', 'Itlaian': 'C2'}",3,E4,HR Business Partner,Collaborates with leadership to align HR strat...,HR,[Employee Experience Platform],"{'Communication Skills': 6, 'Organizational De...",[HR Analytics Certification],"[Employee Experience Platforms, HRIS Systems]",[Manufacturing],London,remote,"{'German': 'A1', 'French': 'C1', 'English': 'C2'}",1.0,1.0,0.61,0.0,1.00,0.0,0.0,0.053
4,P1,HR digital onboarding system,"Implement HRIS system, self-service portals",HR,[Employee Experience Platform],"{'Digital HR': 10, 'Taletn Management': 6, 'Or...",[SHRM-CP],[Organizational Development Systems],dEucation,Belrin,remote,"{'French': 'A1', 'Greman': 'A2', 'Itlaian': 'C2'}",3,E5,Pre-Sales Engineer,Prepares technical demos and solution proposal...,Sales,[Sales Enablement Suite],"{'Client Management': 10, 'CRM Integration': 6...","[CRM Specialist Certification, Certified Sales...",[Sales Pipeline Automation],[Education],London,onsite,"{'English': 'B1', 'Italian': 'A1'}",0.0,1.0,0.06,1.0,0.83,0.0,0.0,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,P5,API and system integration project,Develop middleware for integration of ERP/CRM,Technical,"[CloudSuite, IntegrationHub]","{'IT Infrastructure': 7, 'Cloud Services': 9}","[Microsoft Azure Certification, ISO 27001]","[Scripting, Networking]",eHalthcare,Berlin,hybrid,"{'French': 'C1', 'Italian': 'A2'}",6,E16,Legal Counsel,Provides legal support for contracts and compl...,Legal,"[Compliance Suite, Contract Manager Pro, Regul...","{'Regulatory Knowledge': 4, 'Contract Manageme...",[GDPR Certification],"[Regulatory Compliance Tools, Document Archiving]",[Education],Vienna,hybrid,{'French': 'B1'},0.0,0.0,0.33,0.0,0.00,0.0,0.0,0.085
96,P5,API and system integration project,Develop middleware for integration of ERP/CRM,Technical,"[CloudSuite, IntegrationHub]","{'IT Infrastructure': 7, 'Cloud Services': 9}","[Microsoft Azure Certification, ISO 27001]","[Scripting, Networking]",eHalthcare,Berlin,hybrid,"{'French': 'C1', 'Italian': 'A2'}",6,E17,Integration Developer,Develops integrations between Canon products a...,Technical,"[Print2.0, AIScan]","{'Workflow Automation': 7, 'Cloud Services': 5...",[Microsoft Azure Certification],"[Networking, Cloud Infrastructure]","[Manufacturing, Healthcare, Retail]",Berlin,remote,{'French': 'C2'},0.0,0.5,0.50,1.0,0.50,0.5,0.5,0.041
97,P5,API and system integration project,Develop middleware for integration of ERP/CRM,Technical,"[CloudSuite, IntegrationHub]","{'IT Infrastructure': 7, 'Cloud Services': 9}","[Microsoft Azure Certification, ISO 27001]","[Scripting, Networking]",eHalthcare,Berlin,hybrid,"{'French': 'C1', 'Italian': 'A2'}",6,E18,Content Creator,"Develops written, video, and visual content fo...",Marketing,[Content CMS],"{'Copywriting': 4, 'Content Strategy': 6, 'SEO...",[Digital Marketing Certification],"[Social Media Integration, SEO Optimization]",[Finance],Berlin,hybrid,{'Italian': 'B1'},0.0,1.0,0.50,0.0,0.00,0.0,0.0,0.079
98,P5,API and system integration project,Develop middleware for integration of ERP/CRM,Technical,"[CloudSuite, IntegrationHub]","{'IT Infrastructure': 7, 'Cloud Services': 9}","[Microsoft Azure Certification, ISO 27001]","[Scripting, Networking]",eHalthcare,Berlin,hybrid,"{'French': 'C1', 'Italian': 'A2'}",6,E19,Corporate Trainer,Designs and delivers employee training programs.,HR,"[HRIS Plus, Onboarding Suite]","{'Organizational Development': 9, 'Communicati...",[SHRM-CP],"[Organizational Development Systems, HRIS Syst...","[Education, Retail]",London,remote,"{'German': 'A1', 'English': 'B2', 'French': 'B2'}",0.0,0.0,0.42,0.0,0.00,0.0,0.0,0.051


In [39]:
merged_df.sort_values(by = 'Job Description Match Score', ascending = False).head()

Unnamed: 0,ProjectID,Project Summary,Scope and Deliverables,Theme_x,Products Involved,Required Skills and Expertise,Customer Preferences (Certifications),Integration Requirements (Expertise Areas),Customer Industry,Work Location_x,...,Work Flexibility_y,Languages Known,Product Match Score,Location Match Score,Language Match Score,Industry Match Score,Skill Match Score,Certification Match Score,Expertise Match Score,Job Description Match Score
180,P10,Business strategy development program,Assist C-suite with market expansion strategy,Consulting,"[ERP Migration Tool, Strateg yKit, Business An...","{'Workflow Optimization': 10, 'Projec tManagem...","[Agile Practitioner, Six Sigma]",[ERP Systems Integration],Healtchare,Berlin,...,onsite,{'Italian': 'A1'},0.0,1.0,0.06,1.0,0.31,0.0,0.0,0.145
340,P18,Business strategy development program,Assist C-suite with market expansion strategy,Consulting,[ERP Migration Tool],"{'Business Anlaysis': 8, 'Strategic Planning':...","[Business Analysis Certification, Six Sigma]","[Business Workflow Optimization, ERP Systems I...",Education,London,...,onsite,{'Italian': 'A1'},0.0,1.0,0.22,0.0,0.42,0.0,0.0,0.145
140,P8,Business strategy development program,Assist C-suite with market expansion strategy,Consulting,"[ERP Migration Tool, Startegy Kit, Business An...","{'Change Management': 5, 'Project Management':...","[Six Sigma, PMP]","[Business Workflow Optimization, ERP Systems I...",Retail,Londno,...,onsite,{'Italian': 'A1'},0.0,1.0,0.28,0.0,1.0,0.5,0.0,0.145
143,P8,Business strategy development program,Assist C-suite with market expansion strategy,Consulting,"[ERP Migration Tool, Startegy Kit, Business An...","{'Change Management': 5, 'Project Management':...","[Six Sigma, PMP]","[Business Workflow Optimization, ERP Systems I...",Retail,Londno,...,remote,"{'German': 'A1', 'French': 'C1', 'English': 'C2'}",0.0,1.0,0.61,0.0,1.0,0.0,0.0,0.145
153,P8,Business strategy development program,Assist C-suite with market expansion strategy,Consulting,"[ERP Migration Tool, Startegy Kit, Business An...","{'Change Management': 5, 'Project Management':...","[Six Sigma, PMP]","[Business Workflow Optimization, ERP Systems I...",Retail,Londno,...,hybrid,"{'German': 'B2', 'French': 'C2'}",0.0,1.0,0.67,1.0,0.0,0.0,0.0,0.145


In [106]:
merged_df.to_csv('MVPScoring2.csv')