In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Define job roles and their skills
data = {
    "Role": [
        "Data Scientist", "ML Engineer", "Data Analyst", "Data Engineer", 
        "AI Researcher", "Business Analyst", "NLP Engineer"
    ],
    "Skills": [
        "Python, Statistics, Machine Learning, Data Visualization",
        "Python, Machine Learning, Deployment, Algorithms",
        "SQL, Python, Data Visualization, Excel",
        "Python, SQL, ETL, Cloud Computing",
        "Python, Deep Learning, Machine Learning, Algorithms",
        "Excel, SQL, Data Visualization, Business Intelligence",
        "Python, NLP, Machine Learning, Deep Learning"
    ]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Convert text skills into vectors using TF-IDF
vectorizer = TfidfVectorizer(tokenizer=lambda x: x.split(", "))
skill_matrix = vectorizer.fit_transform(df["Skills"])

# Compute Cosine Similarity
cosine_sim = cosine_similarity(skill_matrix)

# Function to get top 3 similar roles
def recommend_roles(input_role):
    idx = df[df["Role"] == input_role].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:4]
    top_roles = [df["Role"][i] for i, _ in sim_scores]
    return top_roles

# Example usage
input_role = "Data Scientist"
print(f"Top 3 similar roles to {input_role}: {recommend_roles(input_role)}")



Top 3 similar roles to Data Scientist: ['Data Analyst', 'AI Researcher', 'ML Engineer']
