In [1]:
import pandas as pd
import requests
import os
import smtplib
from email.message import EmailMessage
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:

# ------------------ Setup ------------------

# Set your working directory
working_dir = r'C:\Users\<YourName>\Downloads\IAPA\\'

# Email settings
SENDER_EMAIL = 'youremail@gmail.com'        # <<< your email
SENDER_PASSWORD = 'yourapppassword'          # <<< your Gmail App Password

# Load BERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Load CSVs
employee_data = pd.read_csv(os.path.join(working_dir, 'employee_data.csv'))
role_requirements = pd.read_csv(os.path.join(working_dir, 'role_requirements.csv'))

In [3]:
# Preprocessing
def preprocess_skills(skill_str):
    return [skill.strip().lower() for skill in skill_str.split(',')]

In [4]:
# Skill correction map
skill_correction_map = {
    "python": "python programming",
    "power bi": "microsoft power bi",
    "data visualization": "data visualization with python",
    "statistics": "introduction to statistics",
    "rest api": "rest api development",
    "ci/cd": "devops ci cd pipelines",
    "oop": "object oriented programming",
    "algorithms": "algorithms and data structures",
    "aws/azure/gcp": "cloud computing fundamentals",
    "terraform": "terraform infrastructure as code",
    "kubernetes": "kubernetes for beginners",
    "networking": "computer networking basics",
    "content marketing": "content marketing strategy",
    "ppc": "digital advertising ppc",
    "social media metrics": "social media analytics"
}

In [5]:
# Build role-to-required-skills map
role_to_skills = {}
for _, row in role_requirements.iterrows():
    role_to_skills[row['Role']] = preprocess_skills(row['Required Skills'])

In [6]:
# Search Coursera API
def search_coursera_api(skill):
    try:
        url = f"https://api.coursera.org/api/courses.v1?q=search&query={skill}&limit=3"
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        
        results = []
        for course in data.get('elements', []):
            title = course.get('name', 'No title')
            slug = course.get('slug', '')
            link = f"https://www.coursera.org/learn/{slug}" if slug else ''
            results.append((title, link))
        return results
    except Exception as e:
        print(f"❌ Coursera API search failed for {skill}: {e}")
        return []

In [7]:

# Send email (formatted nicely)
def send_email(name, recipient, role, missing_skills, recommended_trainings):
    msg = EmailMessage()
    msg['Subject'] = f'Skill Gap Report for Your Role: {role}'
    msg['From'] = SENDER_EMAIL
    msg['To'] = recipient

    # Format Recommended Trainings properly
    formatted_courses = ""
    skills_blocks = recommended_trainings.split('\n')
    
    for block in skills_blocks:
        if ':' in block:
            skill, courses = block.split(':', 1)
            formatted_courses += f"- {skill.strip()}:\n"
            course_list = courses.split(';')
            for course in course_list:
                formatted_courses += f"    - {course.strip()}\n"
        else:
            formatted_courses += f"- {block.strip()}\n"

    # Format Missing Skills nicely
    formatted_missing_skills = '\n'.join([f"- {skill}" for skill in missing_skills.split(', ')])

    # Full Email Body
    body = f"""\
Dear {name},

We’ve conducted a skills review based on your current role: {role}.

🧩 Missing Skills:
{formatted_missing_skills}

📚 Recommended Trainings:
{formatted_courses}

Please complete these trainings to enhance your professional skills.

Best regards,  
HR Skills Team
"""

    msg.set_content(body)

    try:
        with smtplib.SMTP_SSL('smtp.gmail.com', 465) as smtp:
            smtp.login(SENDER_EMAIL, SENDER_PASSWORD)
            smtp.send_message(msg)
            print(f"✅ Email sent successfully to {name} ({recipient})")
    except Exception as e:
        print(f"❌ Failed to send email to {recipient}: {e}")


In [8]:

# ------------------ Main Logic ------------------

# Cache search results
skill_course_cache = {}

# Prepare recommendations
recommendations = []

for _, emp in employee_data.iterrows():
    emp_id = emp['Employee ID']
    name = emp['Name']
    role = emp['Role']
    current_skills_raw = emp['Current Skills']
    performance = emp['Performance Rating']
    email = emp['Email']

    current_skills = preprocess_skills(current_skills_raw)
    required_skills = role_to_skills.get(role, [])

    current_embeddings = model.encode(current_skills)
    required_embeddings = model.encode(required_skills)

    # Identify missing skills
    skill_matches = {}
    for idx, req_skill in enumerate(required_skills):
        similarities = cosine_similarity(
            [required_embeddings[idx]], current_embeddings
        )[0]
        max_sim = max(similarities) if len(similarities) > 0 else 0
        skill_matches[req_skill] = max_sim

    missing_skills = [skill for skill, score in skill_matches.items() if score < 0.7]

    # Recommend courses
    recommended_courses = []
    for skill in missing_skills:
        search_skill = skill_correction_map.get(skill.lower(), skill)

        if search_skill in skill_course_cache:
            courses = skill_course_cache[search_skill]
        else:
            courses = search_coursera_api(search_skill)
            skill_course_cache[search_skill] = courses

        if courses:
            course_list = [f"{title} ({link})" for title, link in courses]
            recommended_courses.append(f"{skill.title()}: " + "; ".join(course_list))
        else:
            recommended_courses.append(f"{skill.title()}: No relevant Coursera courses found.")

    # Save recommendation
    recommendations.append({
        'Employee ID': emp_id,
        'Name': name,
        'Role': role,
        'Performance Rating': performance,
        'Email': email,
        'Missing Skills': ', '.join(missing_skills),
        'Recommended Trainings': '\n'.join(recommended_courses)
    })

# Save output CSV
output_path = os.path.join(working_dir, 'skill_gap_recommendations_coursera_api.csv')
output_df = pd.DataFrame(recommendations)
output_df.to_csv(output_path, index=False)

print(f"✅ Skill Gap Analysis + Coursera API Recommendation Complete! Saved to '{output_path}'.")

# ------------------ Send Emails ------------------

for rec in recommendations:
    send_email(
        name=rec['Name'],
        recipient=rec['Email'],
        role=rec['Role'],
        missing_skills=rec['Missing Skills'],
        recommended_trainings=rec['Recommended Trainings']
    )

print("✅ All Emails Sent Successfully!")


✅ Skill Gap Analysis + Coursera API Recommendation Complete! Saved to 'C:\Users\Haameem Shimar\Downloads\IAPA\\skill_gap_recommendations_coursera_api.csv'.
✅ Email sent successfully to Alice Smith (haameemshimar7@gmail.com)
✅ Email sent successfully to Bob Johnson (bobppp0@gmail.com)
✅ Email sent successfully to Carol Lee (fedrty56@gmail.com)
✅ Email sent successfully to David Wright (x23330155@student.ncirl.ie)
✅ Email sent successfully to Emma Davis (x23330175@student.ncirl.ie)
✅ Email sent successfully to Michael Brown (michael.brown@techcorp.com)
✅ Email sent successfully to Sophia Martinez (sophiamartinez.design@mail.com)
✅ Email sent successfully to James Wilson (jwilson.pm@biznet.org)
✅ Email sent successfully to Olivia Taylor (olivia.taylor@datalabs.io)
✅ Email sent successfully to Ethan Anderson (ethan.dev@webmakers.dev)
✅ Email sent successfully to Ava Thomas (ava.thomas@securenet.com)
✅ Email sent successfully to Noah White (noah.white@products.co)
✅ Email sent successfully 