<a href="https://colab.research.google.com/github/bgsw404notfound/SkiSphe/blob/main/Keerthana's.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# 1. Load all files
print("Loading files...")
associate_profiles = pd.read_csv('associate_profiles.csv')
skill_inventory = pd.read_csv('skill_inventory.csv')
learning_goals = pd.read_csv('learning_goals.csv')
projects = pd.read_csv('projects.csv')

# 2. Merge associate data
print("\nMerging associate files...")
merged_associates = pd.merge(
    associate_profiles,
    skill_inventory,
    on='associate_id',
    how='left',
    validate='one_to_one'
)
merged_associates = pd.merge(
    merged_associates,
    learning_goals,
    on='associate_id',
    how='left',
    validate='one_to_one'
)

# --- Process Associate Skills ---
print("\nGrouping associate skills by level...")

# Define associate skill columns
assoc_first_skill_col = 'System Design'
assoc_last_skill_col = 'Tekton'
assoc_skill_columns = merged_associates.columns[
    merged_associates.columns.get_loc(assoc_first_skill_col):
    merged_associates.columns.get_loc(assoc_last_skill_col) + 1
]

# Initialize level columns for associates
for level in ['L1', 'L2', 'L3', 'L4']:
    merged_associates[level] = ""

# Track unexpected levels
unexpected_levels = set()

# Group associate skills by level
for index, row in merged_associates.iterrows():
    for skill in assoc_skill_columns:
        level = row[skill]
        if pd.notna(level):
            level = str(level).strip()
            if level in ['L1', 'L2', 'L3', 'L4']:
                if merged_associates.at[index, level]:
                    merged_associates.at[index, level] += f", {skill}"
                else:
                    merged_associates.at[index, level] = skill
            else:
                unexpected_levels.add(level)

# --- Process Project Skills ---
print("\nGrouping project skills by level...")

# Define project skill columns
proj_first_skill_col = 'Microservices Architecture'
proj_skill_columns = projects.columns[projects.columns.get_loc(proj_first_skill_col):]

# Initialize level columns for projects
for level in ['L1', 'L2', 'L3', 'L4']:
    projects[level] = ""

# Group project skills by level
for index, row in projects.iterrows():
    for skill in proj_skill_columns:
        level = row[skill]
        if pd.notna(level):
            level = str(level).strip()
            if level in ['L1', 'L2', 'L3', 'L4']:
                if projects.at[index, level]:
                    projects.at[index, level] += f", {skill}"
                else:
                    projects.at[index, level] = skill
            else:
                unexpected_levels.add(level)

# Report any unexpected skill levels
if unexpected_levels:
    print(f"\nWarning: Unexpected skill levels found: {unexpected_levels}")

# Save outputs
merged_associates.to_csv('associates_with_grouped_skills.csv', index=False)
projects.to_csv('projects_with_grouped_skills.csv', index=False)

print("\nProcessing complete!")
print("Saved outputs:")
print("- 'associates_with_grouped_skills.csv' (merged associate data with L1-L4 skill groups)")
print("- 'projects_with_grouped_skills.csv' (project data with L1-L4 skill groups)")