# 3.2: Recommender System

In [1]:
#importing necessary libraries.
import pandas as pd
import numpy as np

# import TfidfVector from sklearn.
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

## Data Loading and Processing

In [2]:
employees = pd.read_csv(r"fau_onboarding.csv")
employees.columns

Index(['id', 'teams', 'previous_experience', 'hobbies', 'sports'], dtype='object')

In [3]:
def create_soup(x):
    return ''.join(x['teams']) + ''.join(x['hobbies']) + '' + ''.join(x['sports'])
employees['soup'] = employees.apply(create_soup, axis=1)

In [4]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(employees['soup'])
tfidf_matrix.shape

(33, 25)

In [5]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [6]:
# construct a reverse map of indices and employee IDs
indices = pd.Series(employees.index, index=employees['id']).drop_duplicates()

In [7]:
def get_recommendations(ID, cosine_sim=cosine_sim):
    
    # get the index of the employee that matches the employee ID
    IDx = indices[ID]
    
    # get the pairwise similarity scores of all employees with the specified employee ID
    sim_scores = list(enumerate(cosine_sim[IDx]))
    
    # sort employees based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # get the scores of the three most similar employees
    sim_scores = sim_scores[1:6]
    
    # get employee indices
    employees_indices = [i[0] for i in sim_scores]
    
    # return the top five most similar employees
    return employees['id'].iloc[employees_indices]

In [8]:
get_recommendations('emp_033', cosine_sim)

1     emp_002
28    emp_029
24    emp_025
26    emp_027
17    emp_018
Name: id, dtype: object