In [2]:
import nltk

# Download the Punkt tokenizer
nltk.download('punkt')
import nltk

# Download the stopwords corpus
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import string

# Sample CSV file with employee details
csv_file_path = 'employee_data.csv'

# Load the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Function to clean and tokenize text
def clean_and_tokenize(text):
    # Convert to lowercase
    text = text.lower()
    # Remove punctuation and numbers
    text = ''.join([char for char in text if char not in string.punctuation and not char.isdigit()])
    # Tokenize
    tokens = word_tokenize(text)
    # Remove stopwords
    tokens = [word for word in tokens if word not in stopwords.words('english')]
    return ' '.join(tokens)

# Clean and tokenize the 'current_tech_stack', 'current_job', and 'current_soft_skills' columns
df['clean_tech_stack'] = df['current_tech_stack'].apply(clean_and_tokenize)
df['clean_job'] = df['current_job'].apply(clean_and_tokenize)
df['clean_soft_skills'] = df['current_soft_skills'].apply(clean_and_tokenize)

# Create a TF-IDF vectorizer for tech stack, job roles, and soft skills
vectorizer = TfidfVectorizer()

# Combine tech stack, job roles, and soft skills into a single text
df['combined_text'] = df['clean_tech_stack'] + ' ' + df['clean_job'] + ' ' + df['clean_soft_skills']

# Create TF-IDF matrices
tfidf_matrix = vectorizer.fit_transform(df['combined_text'])

# Compute cosine similarity
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

# Function to recommend career options based on tech stack, job role, and soft skills
def recommend_career_options(employee_index, df):
    # Get the cosine similarity scores for the employee
    similarity_scores = list(enumerate(cosine_similarities[employee_index]))

    # Sort the career options based on similarity scores
    sorted_career_options = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    # Extract top 3 career options
    top_career_options = sorted_career_options[1:4]  # Exclude the employee itself (similarity score with itself is 1)

    # Display the recommended career options
    print(f"Recommended Career Options for {df.at[employee_index, 'EMPLOYEE NAME']}:")
    for option_index, similarity_score in top_career_options:
        recommended_job = df.at[option_index, 'current_job']
        print(f"{recommended_job} (Similarity Score: {similarity_score:.2f})")

        # Identify missing skills for the recommended job
        missing_skills = set(df.at[option_index, 'clean_tech_stack'].split(',')) - set(df.at[employee_index, 'clean_tech_stack'].split(','))

        # Display missing skills
        print("Missing Skills:")
        print(', '.join(missing_skills))

        # Infer development track based on recommended job's skills
        development_track = f"Develop skills in {', '.join(missing_skills)} to progress toward {recommended_job}"
        print("Development Track:")
        print(development_track)
        print("\n")

# Example: Recommend career options for the employee at index 0
recommend_career_options(0, df)

Recommended Career Options for Arnav Kapoor:
Chief Marketing Officer (Similarity Score: 0.46)
Missing Skills:
marketing automation crm
Development Track:
Develop skills in marketing automation crm to progress toward Chief Marketing Officer


HR Business Partner (Similarity Score: 0.37)
Missing Skills:
hris performance management systems
Development Track:
Develop skills in hris performance management systems to progress toward HR Business Partner


Chief Financial Officer (CFO) (Similarity Score: 0.34)
Missing Skills:
sap hyperion financial reporting
Development Track:
Develop skills in sap hyperion financial reporting to progress toward Chief Financial Officer (CFO)


