# Importing the Libraries

In [32]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import mean_squared_error

In [33]:
df_final= pd.read_csv('../data/output/df_final_reduced.csv')
df_final.head()

Unnamed: 0,employee_id,course_id_x,course_title,Normalized_Performance_Score,Min_Normalized_Performance_Score,Assigned_Courses
0,EMP001,C15,C15 - Ethical Hacking Techniques,0.259614,0.007043,['C11 - iOS App Development with Swift']
1,EMP001,C07,C07 - Deep Learning with TensorFlow,0.423662,0.007043,['C11 - iOS App Development with Swift']
2,EMP001,C09,C09 - Cybersecurity Fundamentals,0.008539,0.007043,['C11 - iOS App Development with Swift']
3,EMP001,C01,C01 - User Research and Testing Methods,0.264857,0.007043,['C11 - iOS App Development with Swift']
4,EMP001,C12,C12 - Prototyping with Figma,0.206106,0.007043,['C11 - iOS App Development with Swift']


In [34]:
df_final.columns

Index(['employee_id', 'course_id_x', 'course_title',
       'Normalized_Performance_Score', 'Min_Normalized_Performance_Score',
       'Assigned_Courses'],
      dtype='object')

# Model Development and recommendation 

In [37]:


# Assuming df_final is already defined
# Create a pivot table: employee_id as rows, course_title as columns, and performance score as values
df_pivot = df_final.pivot(index='employee_id', columns='course_title', values='Normalized_Performance_Score').fillna(0)

# Convert the pivot table to a sparse matrix
df_matrix = csr_matrix(df_pivot.values)

# Fit the NearestNeighbors model
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(df_matrix)

# Randomly choose an employee for recommendations
query_index = np.random.choice(df_pivot.shape[0])
query_employee_id = df_pivot.index[query_index]
print("Query employee ID:", query_employee_id)

# Get recommendations
distances, indices = model_knn.kneighbors(df_pivot.iloc[query_index, :].values.reshape(1, -1), n_neighbors=6)

# Display nearest employees
print(f'\nNearest Employees for Employee ID {query_employee_id}:\n')
recommended_ids = []
for i in range(len(distances.flatten())):
    if i == 0:
        print('Self Match (Distance = 0):')
    else:
        recommended_id = df_pivot.index[indices.flatten()[i]]
        recommended_ids.append(recommended_id)
        print(f'Employee ID {recommended_id}, with distance of {distances.flatten()[i]:.4f}')

# Gather courses from nearest employees
all_courses = set()
employee_courses = set(df_final[df_final['employee_id'] == query_employee_id]['course_title'])

for emp_id in recommended_ids:
    courses_taken = df_final[df_final['employee_id'] == emp_id]['course_title'].unique()
    all_courses.update(courses_taken)

# Determine courses to recommend
unique_courses = all_courses.difference(employee_courses)

if unique_courses:
    print(f'\nRecommended Courses for Employee ID {query_employee_id} (not previously taken):\n')
    for course in unique_courses:
        print(course)
else:
    # If no unique courses, suggest any course from the nearest employees
    print(f'\nAll courses have been taken by Employee ID {query_employee_id}. Suggesting courses from nearest employees:\n')
    suggested_courses = list(all_courses)
    for course in suggested_courses:
        print(course)

# Optionally calculate RMSE (if needed)
def calculate_rmse(recommended_ids, actual_scores):
    relevant_scores = df_final[df_final['employee_id'].isin(recommended_ids)]
    
    if relevant_scores.empty:
        return float('nan')  # Return NaN if no relevant scores are found
    
    y_true = relevant_scores['Normalized_Performance_Score']
    y_pred = relevant_scores['Normalized_Performance_Score'].mean()  # Using the mean as a simple prediction

    rmse = np.sqrt(mean_squared_error(y_true, [y_pred] * len(y_true)))
    return rmse

# Calculate RMSE (if desired)
rmse_value = calculate_rmse(recommended_ids, df_final)
print(f'\nRMSE: {rmse_value:.4f}')
    

Query employee ID: EMP154

Nearest Employees for Employee ID EMP154:

Self Match (Distance = 0):
Employee ID EMP043, with distance of 0.1232
Employee ID EMP063, with distance of 0.2026
Employee ID EMP122, with distance of 0.2134
Employee ID EMP171, with distance of 0.2260
Employee ID EMP144, with distance of 0.2354

Recommended Courses for Employee ID EMP154 (not previously taken):

C08 - Automation Testing with Selenium
C06 - Continuous Integration and Continuous Deployment (CI/CD)
C13 - Data Pipeline Development

RMSE: 0.1776
