<a href="https://colab.research.google.com/github/adhyapratheesh/SkillSync/blob/main/SkillSync.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

In [3]:
# Load datasets
employee_df = pd.read_csv("/content/employee_dataset (1).csv", converters={"skills": eval, "proficiency_levels": eval})
project_df = pd.read_csv("/content/project_dataset (1).csv", converters={"skills_required": eval})

In [4]:
# Preprocess employee data
mlb = MultiLabelBinarizer()
skills_encoded = mlb.fit_transform(employee_df["skills"])

# Create a proficiency matrix
proficiency_matrix = []
for _, row in employee_df.iterrows():
    proficiency = [row["proficiency_levels"].get(skill, 0) for skill in mlb.classes_]
    proficiency_matrix.append(proficiency)
proficiency_matrix = np.array(proficiency_matrix)
# Weighted skill matrix (combining presence and proficiency)
weighted_skills_matrix = skills_encoded * proficiency_matrix

In [5]:
# Prepare training data
def prepare_training_data():
    X = []
    y = []

    for _, project in project_df.iterrows():
        project_skills = project["skills_required"]
        project_skills_vector = np.array([1 if skill in project_skills else 0 for skill in mlb.classes_])

        # Calculate similarity scores
        similarity_scores = cosine_similarity([project_skills_vector], weighted_skills_matrix)[0]
        employee_df["similarity_score"] = similarity_scores

        # Sort employees and select top team_size
        top_employees = employee_df.sort_values(by="similarity_score", ascending=False).head(project["team_size"])
        team_labels = [1 if i in top_employees.index else 0 for i in range(len(employee_df))]

        X.append(np.concatenate((project_skills_vector, [project["team_size"]])))
        y.append(team_labels)

    return np.array(X), np.array(y)

In [6]:
# Prepare data
X, y = prepare_training_data()
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train a multi-output classifier
model = MultiOutputClassifier(RandomForestClassifier(random_state=42))
model.fit(X_train, y_train)
# Evaluate the model
y_pred = model.predict(X_test)

In [7]:
# Predict team for a new project
def predict_team(project_skills, team_size):
    project_skills_vector = np.array([1 if skill in project_skills else 0 for skill in mlb.classes_])
    input_vector = np.concatenate((project_skills_vector, [team_size]))
    predictions = model.predict([input_vector])[0]

    # Select employees based on predictions
    employee_df["predicted"] = predictions
    recommended_employees = employee_df.sort_values(by="similarity_score", ascending=False)

    # Ensure at least 'team_size' members are returned
    recommended_team = recommended_employees.head(team_size)
    if len(recommended_team) < team_size:
        print(f"Warning: Only {len(recommended_team)} suitable team members found.")

    # Assign team leader as the top-ranked employee
    team_leader = recommended_team.iloc[0] if not recommended_team.empty else None

    return recommended_team, team_leader

In [11]:
# Take user input for project details
project_id = int(input("Enter Project ID: "))
project_name = input("Enter Project Name: ")
required_skills = input("Enter Required Skills (comma-separated): ").split(",")
required_skills = [skill.strip() for skill in required_skills]  # Clean whitespace
team_size = int(input("Enter Team Size: "))
# Recommend team based on input
team, leader = predict_team(required_skills, team_size)

# Display results
print("Project Name:", project_name)
print("Required Skills:", required_skills)
print("Recommended Team:")
print(team[["employee_id", "employee_name", "similarity_score"]].to_string(index=False))
print("\nTeam Leader:")
print(leader[["employee_id", "employee_name", "similarity_score"]].to_string(index=False))

Enter Project ID: 23
Enter Project Name: skillsync
Enter Required Skills (comma-separated): python,java
Enter Team Size: 4
Project Name: skillsync
Required Skills: ['python', 'java']
Recommended Team:
 employee_id employee_name  similarity_score
         664   Sam Johnson          1.000000
         274   Jamie Jones          0.816497
          69     Sam Davis          0.811503
         715   Casey Brown          0.792594

Team Leader:
        664
Sam Johnson
        1.0
