In [None]:
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv('employee_skills_ratings_V3.csv')
print(data.head)

# Define the departments and levels
departments = ['Data Science', 'Marketing', 'Finance', 'HR', 'IT']
levels = ['Lead', 'Senior Analyst', 'Analyst']
data['Current_Level'] = np.random.choice(levels, size=len(data))

# Assign current level based on the total years of experience in all departments
def assign_level(total_experience):
    if total_experience >= 10:
        return 'Lead'
    elif total_experience >= 5:
        return 'Senior Analyst'
    else:
        return 'Analyst'

# Assign realistic years of experience for each department
def assign_experience(level):
    if level == 'Lead':
        return np.random.randint(5, 10)  # 5 to 10 years
    elif level == 'Senior Analyst':
        return np.random.randint(3, 5)   # 3 to 5 years
    else:
        return np.random.randint(0, 3)    # 0 to 3 years

# Assign years of experience for each department
for dept in departments:
    data[f'{dept}_exp'] = data['Current_Level'].apply(assign_experience)

# Calculate total years of experience
data['Total_Experience'] = data[[f'{dept}_exp' for dept in departments]].sum(axis=1)

# Assign Current_Level based on Total_Experience
data['Current_Level'] = data['Total_Experience'].apply(assign_level)

# Define the relevant skills for each department
department_skills = {
    'Data Science': ['ML', 'DL', 'Python', 'Statistics', 'Data Wrangling', 'R'],
    'Marketing': ['SEO', 'Content Creation', 'Analytics', 'Social Media', 'Brand Management', 'Market Research'],
    'Finance': ['Accounting', 'Financial Analysis', 'Excel', 'Budgeting', 'Taxation', 'Investment Analysis'],
    'HR': ['Recruitment', 'Training', 'Communication', 'Conflict Resolution', 'Employee Engagement', 'HR Policies'],
    'IT': ['Java', 'C++', 'Algorithms', 'Data Structures', 'Git']
}

# Build the knowledge graph
def build_knowledge_graph(departments_skills, employee_data):
    G = nx.Graph()

    # Add department nodes and their skills with required ratings
    for dept, skills in departments_skills.items():
        G.add_node(dept, type='Department')
        for skill in skills:
            G.add_node(skill, type='Skill')

    # Add employee nodes and their skills/experience
    for i, row in employee_data.iterrows():
        employee_name = f"Employee_{row['Employee ID']}"
        G.add_node(employee_name, type='Employee')

        # Add edges for the employee's skills and their ratings
        for skill in department_skills.get(row['Department'], []):
            if skill in row:
                G.add_edge(employee_name, skill, rating=row[skill], type='Skill')

        # Add edges for the employee's experience in departments
        for dept in departments:
            dept_exp_key = f"{dept}_exp"
            if dept_exp_key in row:
                G.add_edge(employee_name, dept, experience=row[dept_exp_key], type='Experience')

    return G

# Build the knowledge graph
G = build_knowledge_graph(department_skills, data)

# Function to calculate average skill ratings from the knowledge graph
def calculate_average_skill_ratings(G, departments):
    average_skills = {dept: {} for dept in departments}

    for dept in departments:
        skill_sums = {skill: 0 for skill in department_skills[dept]}  # Initialize skill sums
        skill_counts = {skill: 0 for skill in department_skills[dept]}  # Initialize skill counts

        # Find employees who belong to the current department
        for employee in [n for n, d in G.nodes(data=True) if d['type'] == 'Employee']:
            if G.has_edge(employee, dept):  # Check if employee has experience in the department
                # Aggregate skill ratings
                for skill in department_skills[dept]:
                    if G.has_edge(employee, skill):
                        skill_sums[skill] += G.edges[employee, skill]['rating']
                        skill_counts[skill] += 1

        # Calculate average ratings
        avg_skills = {skill: (skill_sums[skill] / skill_counts[skill]) if skill_counts[skill] > 0 else 0
                      for skill in department_skills[dept]}
        average_skills[dept] = avg_skills

    return average_skills

# Calculate average skill ratings for each department
average_skill_ratings = calculate_average_skill_ratings(G, departments)

# Display average skill ratings
print("Average Skill Ratings by Department:")
print(average_skill_ratings)

# Function to evaluate fit with the knowledge graph
# Function to evaluate fit with the knowledge graph
def evaluate_fit_with_graph(G, departments_skills, candidate):
    best_department = None
    best_match_score = float('-inf')
    best_fit_details = {}

    # Calculate candidate's total experience
    total_experience = sum(candidate['experience'].values())

    for department, required_skills in departments_skills.items():
        matching_skills = {}
        missing_skills = {}
        skills_to_improve = {}

        delta = 0
        total_required_skills = len(required_skills)

        # Compare candidate's skills to the department's requirements
        for skill in required_skills:
            if G.has_edge(candidate['name'], skill):
                candidate_rating = G.edges[candidate['name'], skill]['rating']
                # Assuming a threshold rating of 3 for a skill to be considered adequate
                required_rating = 3
                delta += abs(required_rating - candidate_rating)
                if candidate_rating >= required_rating:
                    matching_skills[skill] = candidate_rating
                else:
                    skills_to_improve[skill] = (candidate_rating, required_rating)
            else:
                missing_skills[skill] = 3  # Assume a default requirement of 3 for missing skills

        if total_required_skills > 0:
            delta /= total_required_skills

        # Calculate experience score
        experience_score = 0
        if G.has_edge(candidate['name'], department):
            experience_years = G.edges[candidate['name'], department]['experience']
            avg_exp = np.mean([G.edges[emp, department]['experience'] for emp in G.nodes if G.has_edge(emp, department)])
            experience_score = min(experience_years / avg_exp, 1)

        # Combine skill and experience scores
        skill_weight = 0.7
        seniority_weight = 0.3
        total_experience_weight = 0.2  # Weight for total experience
        match_score = skill_weight * (1 - delta) + seniority_weight * experience_score + total_experience_weight * (total_experience / 10)

        if match_score > best_match_score:
            best_match_score = match_score
            best_department = department
            best_fit_details = {
                "matching_skills": matching_skills,
                "missing_skills": missing_skills,
                "skills_to_improve": skills_to_improve,
                "skill_score": 1 - delta,
                "experience_score": experience_score
            }

    return best_department, best_fit_details

# Example data for a new candidate
new_candidate = {
    'name': 'Candidate1',
    'skills': {'ML': 3, 'DL': 4, 'Python': 5, 'SEO': 2, 'Java': 4},
    'experience': {'Data Science_exp': 3, 'Marketing_exp': 1, 'Finance_exp': 0, 'HR_exp': 2, 'IT_exp': 4}
}

# Add candidate to the knowledge graph
G.add_node(new_candidate['name'], type='Candidate')
for skill, rating in new_candidate['skills'].items():
    if G.has_node(skill):
        G.add_edge(new_candidate['name'], skill, rating=rating, type='Skill')
for dept, exp in new_candidate['experience'].items():
    G.add_edge(new_candidate['name'], dept, experience=exp, type='Experience')

# Suggest the best department for the new candidate
best_department, fit_details = evaluate_fit_with_graph(G, department_skills, new_candidate)

# Output the results
print("\nBest Department for the Candidate:", best_department)
print("Fit Details:", fit_details)


<bound method NDFrame.head of      Employee ID       Department  ML  DL  Python  Statistics  Data Wrangling  \
0              1        Marketing   0   0       0           0               0   
1              2  Human Resources   0   0       0           0               0   
2              3          Finance   0   0       0           0               0   
3              4        Marketing   0   0       0           0               0   
4              5        Marketing   0   0       0           0               0   
..           ...              ...  ..  ..     ...         ...             ...   
145          146        Marketing   0   0       0           0               0   
146          147  Human Resources   0   0       0           0               0   
147          148        Marketing   0   0       0           0               0   
148          149     Data Science  10   6       2           7               2   
149          150  Human Resources   0   0       0           0               0  

In [10]:
# Visualization of the Knowledge Graph
plt.figure(figsize=(15, 10))
pos = nx.spring_layout(G, seed=42)

# Define colors for different node types
node_colors = {
    'Department': 'lightblue',
    'SeniorityLevel': 'lightgreen',
    'Skill': 'lightcoral',
    'Employee': 'lightgoldenrodyellow'
}
edge_colors = {
    'RequiresSkill': 'blue',
    'RequiredSkill': 'red',
    'Skill': 'green',
    'Experience': 'purple'
}

# Draw nodes with different colors
node_type_colors = [node_colors[G.nodes[n]['type']] for n in G.nodes]
nx.draw_networkx_nodes(G, pos, node_size=500, node_color=node_type_colors, alpha=0.8)

# Draw edges with different colors
edge_type_colors = [edge_colors.get(G.edges[e]['type'], 'black') for e in G.edges]
nx.draw_networkx_edges(G, pos, width=1.0, alpha=0.5, edge_color=edge_type_colors)

# Draw labels
nx.draw_networkx_labels(G, pos, font_size=10, font_family='sans-serif')
edge_labels = nx.get_edge_attributes(G, 'required_rating')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_color='black')

plt.title("Knowledge Graph with Skill Requirements and Experience")
plt.show()


KeyError: 'Candidate'

<Figure size 1500x1000 with 0 Axes>

In [11]:
# Function to print nodes of a subgraph centered around a selected department
def print_subgraph_nodes(G, department):
    # Get all nodes connected to the department
    nodes = set([department])
    nodes.update(nx.node_connected_component(G, department))

    # Create the subgraph
    subgraph = G.subgraph(nodes)

    # Print nodes
    print(f"Nodes in the subgraph for Department: {department}")
    for node in subgraph.nodes:
        node_type = subgraph.nodes[node].get('type', 'Unknown')
        print(f"Node: {node}, Type: {node_type}")

# Print nodes for the best department
print_subgraph_nodes(G, best_department)

Nodes in the subgraph for Department: Finance
Node: Data Science, Type: Department
Node: ML, Type: Skill
Node: DL, Type: Skill
Node: Python, Type: Skill
Node: Statistics, Type: Skill
Node: Data Wrangling, Type: Skill
Node: R, Type: Skill
Node: Marketing, Type: Department
Node: SEO, Type: Skill
Node: Content Creation, Type: Skill
Node: Analytics, Type: Skill
Node: Social Media, Type: Skill
Node: Brand Management, Type: Skill
Node: Market Research, Type: Skill
Node: Finance, Type: Department
Node: Accounting, Type: Skill
Node: Financial Analysis, Type: Skill
Node: Excel, Type: Skill
Node: Budgeting, Type: Skill
Node: Taxation, Type: Skill
Node: Investment Analysis, Type: Skill
Node: HR, Type: Department
Node: IT, Type: Department
Node: Java, Type: Skill
Node: Employee_1, Type: Employee
Node: Employee_2, Type: Employee
Node: Employee_3, Type: Employee
Node: Employee_4, Type: Employee
Node: Employee_5, Type: Employee
Node: Employee_6, Type: Employee
Node: Employee_7, Type: Employee
Node: E