In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity, cosine_distances
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
from nltk.stem import SnowballStemmer
from nltk.tokenize import word_tokenize
import squarify
import seaborn as sns
from surprise import accuracy
from surprise.model_selection.validation import cross_validate
from surprise.dataset import Dataset
from surprise.reader import Reader
from surprise import SVD, KNNBasic, KNNWithMeans, KNNWithZScore, KNNBaseline
from surprise.model_selection import train_test_split
import random

## Calling the Saved Dataset

In [2]:
course_dataset = pd.read_csv("data/courseData.csv", encoding= 'unicode_escape')
course_dataset.head()

Unnamed: 0.1,Unnamed: 0,sr_,degree_1,degree_1_specializations,campus,key_skills,key_skills_str
0,0,1001,B.E.,Mechanical,MITCOE,['CATIA'],CATIA
1,1,1002,B.E.,Mechanical,MITCOE,['CATIA'],CATIA
2,2,1003,B.E.,Mechanical,MITAOE,['CATIA'],CATIA
3,3,1004,B.E.,Mechanical,MITCOE,['CATIA'],CATIA
4,4,1005,B.E.,Mechanical,MITCOE,['CATIA'],CATIA


In [3]:
user_dataset = pd.read_csv("data/userData.csv", encoding= 'unicode_escape')
user_dataset.head()

Unnamed: 0.1,Unnamed: 0,userid,degree_1,degree_1_specializations,known_languages,key_skills,career_objective,key_skills_str
0,0,1001,B.E.,Computer Science & Engineering,"['English', ' Marathi', ' Hindi']","['C', ' Java', ' Keras', ' Flask', ' Deep Lear...",Computer Engineering student with good technic...,"C, Java, Keras, Flask, Deep Learning, Sel..."
1,1,1002,B.E.,Computer Science & Engineering,['Hindi English'],"['Java', ' Neural Networks', ' AI', ' Python',...",Interested in working under company offering A...,"Java, Neural Networks, AI, Python, Html5, ..."
2,2,1003,B.E.,Computer Science & Engineering,['Missing'],['Missing'],Missing,Missing
3,3,1004,B.E.,Computer Science & Engineering,"['English', ' Hindi', ' Marathi', ' Marwari']","['XML', ' C', ' Java', ' Data Structures', ' P...",Currently a final year student of Computer Eng...,"XML, C, Java, Data Structures, Python, Mo..."
4,4,1005,B.E.,Computer Science & Engineering,"['English', 'Hindi ', 'Kashmiri ', ' Urdu']","['XML', ' Word', ' Data Structures', ' Communi...",To have a growth oriented and challenging care...,"XML, Word, Data Structures, Communication, ..."


In [4]:
ratings_df = pd.read_csv("data/ratingData.csv", encoding= 'unicode_escape')
ratings_df.head()

Unnamed: 0,course_id,user_id,rating
0,2001,1001,5
1,2001,1002,3
2,2001,1003,1
3,2001,1004,0
4,2001,1005,2


## Collaborative Filtering

In [5]:
#Collaborative Filtering to make recommendations to Movie Watchers. 
#Collaborative Filtering is based on the idea that users similar to a me can be used to predict how much I will 
#like a particular product or service those users have used/experienced but I have not.
#I will not be implementing Collaborative Filtering from scratch. Instead, I will use the Surprise library 
#that used extremely powerful algorithms like Singular Value Decomposition (SVD) to minimise RMSE (Root Mean Square Error)
#and give great recommendations.
#Implementation of SVD for surprise library is given on this link

Lower values of RMSE indicate better fit. RMSE is a good measure of how accurately the model predicts the response. It's the most important criterion for fit if the main purpose of the model is prediction.
Based on a rule of thumb, it can be said that RMSE values between **0.2 and 0.5** shows that the model can relatively predict the data accurately.

There is no correct value for MSE. Simply put, the lower the value the better and 0 means the model is perfect.

MAE: A metric that tells us the **mean absolute difference** between the predicted values and the actual values in a dataset. The lower the MAE, the better a model fits a dataset.

### Using Surprise and testing with different algorithms (SVD, KnnBasic, KnnBaseline, KnnWithMeans, KnnWithZScore) to find the best one to use based on MAE and RMAE scores. 

In [6]:
#using surprise
reader = Reader(rating_scale=(0, 5))
data = Dataset.load_from_df(ratings_df[['user_id','course_id','rating']], reader)

#split into training and test sets using 80:20 ratio
raw_ratings = data.raw_ratings
random.shuffle(raw_ratings)                

ratio = int(len(raw_ratings)*0.8)
train_raw = raw_ratings[:ratio] 
test_raw = raw_ratings[ratio:] 

data.raw_ratings = train_raw       
trainset = data.build_full_trainset() 
testset = data.construct_testset(test_raw)

#create dict for different models 
models=[SVD(), KNNBasic(), KNNBaseline(), KNNWithMeans(), KNNWithZScore()] 
results = {} #to store the scores

#perform cross validation of MAE and RMSE for all models
for model in models:
    #kfold set to 5
    crossval_scores = cross_validate(model, data, measures=["MAE","RMSE"], cv=5, n_jobs=-1)  
    
    #saving and renaming appropraitely
    result = pd.DataFrame.from_dict(crossval_scores).mean(axis=0).\
             rename({'test_mae':'MAE', 'test_rmse': 'RMSE', 'fit_time': 'Fit Time', 'test_time': 'Test Time'})
    results[str(model).split("algorithms.")[1].split("object ")[0]] = result
    
#printing all models results
all_models = pd.DataFrame.from_dict(results)
all_models.T.sort_values(by='RMSE') #models sorted by RMSE

Unnamed: 0,MAE,RMSE,Fit Time,Test Time
matrix_factorization.SVD,1.120438,1.479856,0.051221,0.052366
knns.KNNWithMeans,1.292236,1.655888,0.003556,0.026408
knns.KNNBaseline,1.29911,1.660817,0.006161,0.059605
knns.KNNWithZScore,1.304127,1.679629,0.005802,0.030892
knns.KNNBasic,1.333314,1.692393,0.002383,0.062152


####  Using the SVD model to generate collaborative filtering recommendations since it has the least MAE and RMSE results

In [7]:
def svd_cf_recommendations(user):
    
    ''' This function generates "get_recommend" number of book recommendations 
        using Singular value decomposition. The function needs as input two 
        different parameters:
        (1) user i.e., user for which recommendations need to be generated 
        (2) get_recommend i.e., number of recommendations to generate for the user
        Default values are: user=13552, get_recommend=10
    '''
    
    #usisng the "best" parameters found using the gridsearchcv method from experiments notebook 
    model = SVD(n_factors = 90, n_epochs = 20, lr_all = 0.005, reg_all = 0.2)
    model.fit(trainset)
    
    #building test set and predict the ratings
    testset = trainset.build_anti_testset()
    predictions = model.test(testset)
    
    #save into dataframe
    rating_predictions = pd.DataFrame(predictions)
    
    #find the top 30 predictions for given user
    user_predictions = rating_predictions[rating_predictions['uid'] == user].\
                         sort_values(by="est", ascending = False).head(30)
    
    #create recommendations array and append user predictions as recommendations
    recommendations = []
    recommendations.append(list(user_predictions['iid']))
    recommendations = recommendations[0]
    
    return(recommendations)

In [8]:
ratings_df.head()

Unnamed: 0,course_id,user_id,rating
0,2001,1001,5
1,2001,1002,3
2,2001,1003,1
3,2001,1004,0
4,2001,1005,2


In [9]:
ratings_df[ratings_df['user_id']== 2150]

Unnamed: 0,course_id,user_id,rating


In [10]:
ratings_df[ratings_df['course_id']== 2150]

Unnamed: 0,course_id,user_id,rating
2980,2150,1001,4
2981,2150,1002,5
2982,2150,1003,3
2983,2150,1004,2
2984,2150,1005,4
2985,2150,1006,5
2986,2150,1007,3
2987,2150,1008,4
2988,2150,1009,4
2989,2150,1010,5


In [11]:
svd_cf_recommendations(1001) #input is a user and output is a course!

#1001 is a user (shown above no course is 1001)
#2082 is course since no user rating is found (check above)

[2179,
 2391,
 2154,
 2113,
 2097,
 2093,
 2218,
 2301,
 2042,
 2369,
 2290,
 2423,
 2326,
 2103,
 2349,
 2126,
 2419,
 2386,
 2327,
 2356,
 2374,
 2163,
 2288,
 2031,
 2241,
 2229,
 2311,
 2388,
 2377,
 2052]

In [12]:
svd_cf_recommendations(2150) #svd doeesnt work with input of courses

[]

In [13]:
svd_cf_recommendations("2150") #svd doeesnt work with input of courses

[]

## Content Based Filtering

In [14]:
#https://github.com/ry05/couReco/blob/master/recommender.py
#https://github.com/jalajthanaki/Movie_recommendation_engine/blob/master/Movie_recommendation_engine.ipynb

### User Dataset - Based on Career Objective and Key Skills

In [15]:
user_dataset.head(2)

Unnamed: 0.1,Unnamed: 0,userid,degree_1,degree_1_specializations,known_languages,key_skills,career_objective,key_skills_str
0,0,1001,B.E.,Computer Science & Engineering,"['English', ' Marathi', ' Hindi']","['C', ' Java', ' Keras', ' Flask', ' Deep Lear...",Computer Engineering student with good technic...,"C, Java, Keras, Flask, Deep Learning, Sel..."
1,1,1002,B.E.,Computer Science & Engineering,['Hindi English'],"['Java', ' Neural Networks', ' AI', ' Python',...",Interested in working under company offering A...,"Java, Neural Networks, AI, Python, Html5, ..."


In [16]:
#creating a new dataset dataset using the career objetcive, key skills and their user id
cf_user_dataset = pd.DataFrame()
cf_user_dataset['userid'] = user_dataset['userid']
cf_user_dataset['career_objective'] = user_dataset['career_objective']
cf_user_dataset['key_skills_str'] = user_dataset['key_skills_str']
cf_user_dataset.head(5)

#store the career objective and key skills in description
cf_user_dataset['description'] = cf_user_dataset['career_objective'] + cf_user_dataset['key_skills_str']
cf_user_dataset.head(5)

Unnamed: 0,userid,career_objective,key_skills_str,description
0,1001,Computer Engineering student with good technic...,"C, Java, Keras, Flask, Deep Learning, Sel...",Computer Engineering student with good technic...
1,1002,Interested in working under company offering A...,"Java, Neural Networks, AI, Python, Html5, ...",Interested in working under company offering A...
2,1003,Missing,Missing,MissingMissing
3,1004,Currently a final year student of Computer Eng...,"XML, C, Java, Data Structures, Python, Mo...",Currently a final year student of Computer Eng...
4,1005,To have a growth oriented and challenging care...,"XML, Word, Data Structures, Communication, ...",To have a growth oriented and challenging care...


In [17]:
#vectorize using countvectorize that converts into a matrix of token counts
user_count = CountVectorizer(stop_words='english', analyzer='word', ngram_range=(1, 2), min_df=0)
user_count_matrix = user_count.fit_transform(cf_user_dataset['description'].values.astype('U'))

#cosine similarity between the the matrix itself (normalized dot product of X and Y)
user_cosine_sim = cosine_similarity(user_count_matrix, user_count_matrix)

#create indices for the user is using series
cf_user_dataset = cf_user_dataset.reset_index()
user_ids = cf_user_dataset
indices = pd.Series(cf_user_dataset.index, index=cf_user_dataset['userid'])


#function to get content-filtered recommendations
def get_user_cf_recommendations(user):
    
    #get index of user
    user_id = indices[user]
    
    #find the most similar 30 users using cosine_sim and sorting with highest similarity
    sim_scores = list(enumerate(user_cosine_sim[user_id]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:31]
    user_indices = [i[0] for i in sim_scores]
    
    return user_ids.iloc[user_indices]

In [18]:
#getting the similar users recs for user 1001
get_user_cf_recommendations("1001")

Unnamed: 0,index,userid,career_objective,key_skills_str,description
894,894,1847,Computer Engineering student with good technic...,"C, Java, Keras, Flask, Deep Learning, Sel...",Computer Engineering student with good technic...
996,996,1946,Computer Engineering student with good technic...,"C, Java, Keras, Flask, Deep Learning, Sel...",Computer Engineering student with good technic...
93,93,1087,Dedicated and passionate computer engineering ...,"Java, Python, Machine Learning, CPP, Andro...",Dedicated and passionate computer engineering ...
256,256,1231,To enhance my expertise in the field of softwa...,"NLP, Java, Neural Networks, Keras, Python,...",To enhance my expertise in the field of softwa...
201,201,1182,Missing,"C, Data Analysis, Java, Neural Networks, D...","MissingC, Data Analysis, Java, Neural Netwo..."
180,180,1161,I am a student programmer currently seeking ex...,"Python, Data Science, Artificial Intelligenc...",I am a student programmer currently seeking ex...
91,91,1085,Team oriented individual with strong communica...,"C, Statistics, Java, MS Office, Python, D...",Team oriented individual with strong communica...
464,464,1416,Missing,"Database, Embedded C, Machine Learning","MissingDatabase, Embedded C, Machine Learning"
981,981,1931,Missing,"Deep Learning, Selenium, Cpp","MissingDeep Learning, Selenium, Cpp"
165,165,1146,Missing,"Data Structures, Android, Algorithms, Probl...","MissingData Structures, Android, Algorithms,..."


In [19]:
get_user_cf_recommendations("1847").head(10)

Unnamed: 0,index,userid,career_objective,key_skills_str,description
996,996,1946,Computer Engineering student with good technic...,"C, Java, Keras, Flask, Deep Learning, Sel...",Computer Engineering student with good technic...
0,0,1001,Computer Engineering student with good technic...,"C, Java, Keras, Flask, Deep Learning, Sel...",Computer Engineering student with good technic...
93,93,1087,Dedicated and passionate computer engineering ...,"Java, Python, Machine Learning, CPP, Andro...",Dedicated and passionate computer engineering ...
981,981,1931,Missing,"Deep Learning, Selenium, Cpp","MissingDeep Learning, Selenium, Cpp"
110,110,1102,Dedicated and Passionate computer engineering ...,"C, Java, Javascript, cpp, Object-Oriented ...",Dedicated and Passionate computer engineering ...
256,256,1231,To enhance my expertise in the field of softwa...,"NLP, Java, Neural Networks, Keras, Python,...",To enhance my expertise in the field of softwa...
180,180,1161,I am a student programmer currently seeking ex...,"Python, Data Science, Artificial Intelligenc...",I am a student programmer currently seeking ex...
91,91,1085,Team oriented individual with strong communica...,"C, Statistics, Java, MS Office, Python, D...",Team oriented individual with strong communica...
165,165,1146,Missing,"Data Structures, Android, Algorithms, Probl...","MissingData Structures, Android, Algorithms,..."
201,201,1182,Missing,"C, Data Analysis, Java, Neural Networks, D...","MissingC, Data Analysis, Java, Neural Netwo..."


### Course Dataset - Based on Degree 1, Degree 1 Specialization and Key Skills

In [20]:
course_dataset.head(2)

Unnamed: 0.1,Unnamed: 0,sr_,degree_1,degree_1_specializations,campus,key_skills,key_skills_str
0,0,1001,B.E.,Mechanical,MITCOE,['CATIA'],CATIA
1,1,1002,B.E.,Mechanical,MITCOE,['CATIA'],CATIA


In [21]:
#creating a new dataset dataset using the degree1, degree1specializations and key skills and the course id
cf_course_dataset = pd.DataFrame()
cf_course_dataset['sr_'] = course_dataset['sr_']
cf_course_dataset['degree_1'] = course_dataset['degree_1']
cf_course_dataset['degree_1_specializations'] = course_dataset['degree_1_specializations']
cf_course_dataset['key_skills_str'] = course_dataset['key_skills_str']
cf_course_dataset.head(5)

#store the career objective and key skills in description
cf_course_dataset['description'] = cf_course_dataset['degree_1'] + cf_course_dataset['degree_1_specializations'] + cf_course_dataset['key_skills_str']
cf_course_dataset.head(5)

Unnamed: 0,sr_,degree_1,degree_1_specializations,key_skills_str,description
0,1001,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
1,1002,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
2,1003,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
3,1004,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
4,1005,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA


In [22]:
#vectorize using countvectorize that converts into a matrix of token counts
course_count = CountVectorizer(stop_words='english', analyzer='word', ngram_range=(1, 2), min_df=0)
course_count_matrix = course_count.fit_transform(cf_course_dataset['description'].values.astype('U'))

#cosine similarity between the the matrix itself (normalized dot product of X and Y)
course_cosine_sim = cosine_similarity(course_count_matrix, course_count_matrix)

#create indices for the courses is using series
cf_course_dataset = cf_course_dataset.reset_index()
courses = cf_course_dataset
indices = pd.Series(cf_course_dataset.index, index=cf_course_dataset['sr_'])

#function to get content-filtered recommendations
def get_course_cf_recommendations(course): 
    
    #get index of course
    course_id = indices[course]
    
    #find the most similar 30 courses using cosine_sim and sorting with highest similarity
    sim_scores = list(enumerate(course_cosine_sim[course_id]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:31]
    course_indices = [i[0] for i in sim_scores]
    
    return courses.iloc[course_indices]

In [23]:
#getting the similar course recs for course 1001
get_course_cf_recommendations(1001).head(10)

Unnamed: 0,index,sr_,degree_1,degree_1_specializations,key_skills_str,description
1,1,1002,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
2,2,1003,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
3,3,1004,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
4,4,1005,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
5,5,1006,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
6,6,1007,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
7,7,1008,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
8,8,1009,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
9,9,1010,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
10,10,1011,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA


In [24]:
get_course_cf_recommendations(1003).head(10)

Unnamed: 0,index,sr_,degree_1,degree_1_specializations,key_skills_str,description
1,1,1002,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
2,2,1003,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
3,3,1004,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
4,4,1005,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
5,5,1006,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
6,6,1007,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
7,7,1008,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
8,8,1009,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
9,9,1010,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA
10,10,1011,B.E.,Mechanical,CATIA,B.E.MechanicalCATIA


## The Hybrid Recommender 

This recommender will take as input the user and output a number of course recommendations for the user using a hybrid approach. The Collaborative Filtering model will be SVD and the Content Filtering is done using a CountVectorizer and similarity matrix.

To get the recommendations, courses that have been previously taken and considered completed by the user will be removed from the recommendations list.

Furthermore, the predictor will be developed after this is complete. 

Finally, the cluster technique might not be used since collaborative filtering was implemented using surprise

In [25]:
#https://github.com/jalajthanaki/Movie_recommendation_engine/blob/master/Movie_recommendation_engine.ipynb

In [26]:
svd_cf_recommendations(user) #in: user, out:course
get_user_cf_recommendations(user) #in user, out: user
get_course_cf_recommendations(course) #in course, out: course

NameError: name 'user' is not defined

In [27]:
def hybrid_recommender(user):
    
    svd_recs = [] #pd.DataFrame()
    user_recs = pd.DataFrame()
    course_recs = pd.DataFrame()
    
    #check with svd first 
    svd_recs = svd_cf_recommendations(user)
    
    
    #when svd dataset has no data on a user, recommendations will be made solely on the cf recommenders
    #gets similar users
    user_recs = get_user_cf_recommendations(user)
    
    #cross check courses with cf courser recommender
    
    #CURRENTLY GETTING USER NOT COURSE
    #get a random course?
    course = user#random.randint(1001, 11000)
    course_recs = get_course_cf_recommendations(course) 
    
    #get similarity between the course dataset and user using cosine thing
    #user
    user_count = CountVectorizer(stop_words='english', analyzer='word', ngram_range=(1, 2), min_df=0)
    user_count_matrix = user_count.fit_transform(user_recs['key_skills_str'].values.astype('U'))
    
    #course
    course_count = CountVectorizer(stop_words='english', analyzer='word', ngram_range=(1, 2), min_df=0)
    course_count_matrix = course_count.fit_transform(course_recs['key_skills_str'].values.astype('U'), y = user_count_matrix.shape[1])
    
    print(course_count_matrix.shape)
    print(user_count_matrix.shape)
    
    #to fix matrix shapes so they are equal    
    if(course_count_matrix.shape != user_count_matrix.shape):
        if(course_count_matrix.shape[1] < user_count_matrix.shape[1]):
            
            temp_user_m = user_count_matrix
            new_user_temp = temp_user_m[:, course_count_matrix.shape[1]] 

            new_course_temp = course_count_matrix
            print(new_user_temp.shape)
            print(course_count_matrix.shape)
            print(user_count_matrix.shape)
            
        else:
            
            temp_user_m = course_count_matrix
            new_course_temp = temp_user_m[:, user_count_matrix.shape[1]] 

            new_user_temp = user_count_matrix
            print(new_course_temp.shape)
            print(course_count_matrix.shape)
            print(user_count_matrix.shape)
        
    else:
        new_course_temp = course_count_matrix
        new_user_temp = user_count_matrix
    
    #cosine similarity between the the matrix itself (normalized dot product of X and Y)
    course_cosine_sim = cosine_distances(new_course_temp, new_user_temp)

    #create indices for the courses is using series
    course_recs = course_recs.reset_index()
    courses = course_recs['sr_']
    indices = pd.Series(course_recs.index, index=course_recs['sr_'])
    
    new_course_rec = get_course_cf_recommendations(course)
    
    
    return svd_recs, user_recs[["userid", "career_objective", "key_skills_str", "description"]], course_recs[["sr_", "key_skills_str", "description"]]

In [28]:
svd, cf, ccf = hybrid_recommender(1001)

(30, 1)
(30, 237)
(30, 1)
(30, 1)
(30, 237)


In [29]:
svd

[2179,
 2391,
 2154,
 2113,
 2093,
 2218,
 2097,
 2423,
 2103,
 2301,
 2419,
 2349,
 2369,
 2326,
 2327,
 2126,
 2377,
 2042,
 2386,
 2288,
 2374,
 2290,
 2031,
 2396,
 2388,
 2052,
 2241,
 2229,
 2251,
 2356]

In [30]:
cf["userid"].iloc[0]

'1847'

In [31]:
ccf

Unnamed: 0,sr_,key_skills_str,description
0,1002,CATIA,B.E.MechanicalCATIA
1,1003,CATIA,B.E.MechanicalCATIA
2,1004,CATIA,B.E.MechanicalCATIA
3,1005,CATIA,B.E.MechanicalCATIA
4,1006,CATIA,B.E.MechanicalCATIA
5,1007,CATIA,B.E.MechanicalCATIA
6,1008,CATIA,B.E.MechanicalCATIA
7,1009,CATIA,B.E.MechanicalCATIA
8,1010,CATIA,B.E.MechanicalCATIA
9,1011,CATIA,B.E.MechanicalCATIA


In [32]:
get_course_cf_recommendations(2411)

Unnamed: 0,index,sr_,degree_1,degree_1_specializations,key_skills_str,description
58,58,1059,B.E.,Mechanical,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
59,59,1060,B.E.,Mechanical,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
76,76,1077,B.E.,Mechanical,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
77,77,1078,B.E.,Mechanical,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
78,78,1079,B.E.,Mechanical,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
80,80,1081,B.E.,Mechanical,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
108,108,1109,B.E.,Mechanical,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
109,109,1110,B.E.,Mechanical,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
110,110,1111,B.E.,Mechanical,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
111,111,1112,B.E.,Mechanical,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"


In [33]:
#hybrid_recommender(1847)

In [45]:
a, b, c = hybrid_recommender(1847) #user 1847 has no svd recs 
c

(30, 5)
(30, 1)
(30, 1)
(30, 5)
(30, 1)


Unnamed: 0,sr_,key_skills_str,description
0,1059,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
1,1060,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
2,1077,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
3,1078,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
4,1079,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
5,1081,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
6,1109,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
7,1110,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
8,1111,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
9,1112,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"


In [48]:
c["key_skills_str"][c['sr_']== "1059"]

Series([], Name: key_skills_str, dtype: object)

In [46]:
user_dataset["key_skills"][user_dataset['userid']== "1847"]

894    ['C', ' Java', ' Keras', ' Flask', ' Deep Lear...
Name: key_skills, dtype: object

In [44]:
get_user_cf_recommendations(1847).head(10) #its different because we changes the values of before the recommender in the hybrid recommender

Unnamed: 0,index,userid,career_objective,key_skills_str,description
5,5,1006,Missing,Missing,MissingMissing
6,6,1007,Missing,Missing,MissingMissing
10,10,1011,Missing,Missing,MissingMissing
13,13,1014,Missing,Missing,MissingMissing
17,17,1018,Missing,Missing,MissingMissing
27,27,1025,Missing,Missing,MissingMissing
30,30,1028,Missing,Missing,MissingMissing
32,32,1030,Missing,Missing,MissingMissing
34,34,1032,Missing,Missing,MissingMissing
36,36,1034,Missing,Missing,MissingMissing


In [40]:
get_course_cf_recommendations(1847)

Unnamed: 0,index,sr_,degree_1,degree_1_specializations,key_skills_str,description
58,58,1059,B.E.,Mechanical,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
59,59,1060,B.E.,Mechanical,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
76,76,1077,B.E.,Mechanical,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
77,77,1078,B.E.,Mechanical,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
78,78,1079,B.E.,Mechanical,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
80,80,1081,B.E.,Mechanical,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
108,108,1109,B.E.,Mechanical,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
109,109,1110,B.E.,Mechanical,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
110,110,1111,B.E.,Mechanical,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"
111,111,1112,B.E.,Mechanical,"SOLIDWORKS, AUTOCAD, CREO","B.E.MechanicalSOLIDWORKS, AUTOCAD, CREO"


In [41]:
hybrid_recommender(1003)

#ratings user 1003 - 
#course 2279: 4, (1006 also rated it 4, 1007 rated it 3)
#course 2332: 2, (1006 also rated it 5, 1007 rated it 1)
#2403: 3, (1006 also rated it 4) (1006 is the first user rec given by cf)
#2418: 4 (1006 also rated it 4) (1006 is the first user rec given by cf) (1007 rated it 5 so close)(1007 is second rec user)

(30, 1)
(30, 1)


([2279,
  2099,
  2179,
  2081,
  2332,
  2202,
  2252,
  2153,
  2335,
  2242,
  2228,
  2026,
  2399,
  2417,
  2377,
  2050,
  2159,
  2103,
  2105,
  2094,
  2373,
  2065,
  2265,
  2193,
  2312,
  2033,
  2223,
  2206,
  2054,
  2400],
     userid career_objective key_skills_str     description
 5     1006          Missing        Missing  MissingMissing
 6     1007          Missing        Missing  MissingMissing
 10    1011          Missing        Missing  MissingMissing
 13    1014          Missing        Missing  MissingMissing
 17    1018          Missing        Missing  MissingMissing
 27    1025          Missing        Missing  MissingMissing
 30    1028          Missing        Missing  MissingMissing
 32    1030          Missing        Missing  MissingMissing
 34    1032          Missing        Missing  MissingMissing
 36    1034          Missing        Missing  MissingMissing
 43    1041          Missing        Missing  MissingMissing
 44    1042          Missing        Mis

In [42]:
ratings_df[ratings_df['user_id']== 1006]

Unnamed: 0,course_id,user_id,rating
5,2001,1006,1
25,2002,1006,0
45,2003,1006,3
65,2004,1006,2
85,2005,1006,2
...,...,...,...
8385,2420,1006,3
8405,2421,1006,4
8425,2422,1006,4
8445,2423,1006,3


In [43]:
ratings_df[ratings_df['course_id']== 2099]

Unnamed: 0,course_id,user_id,rating
1960,2099,1001,4
1961,2099,1002,5
1962,2099,1003,4
1963,2099,1004,5
1964,2099,1005,5
1965,2099,1006,5
1966,2099,1007,3
1967,2099,1008,4
1968,2099,1009,5
1969,2099,1010,3


## Recommender

In [None]:
#https://github.com/rochitasundar/Collaborative-Filtering-Book-Recommendation-System/blob/master/Book%20Recommendation%20Project.ipynb
#where i got the collab-filter

#https://github.com/SebastianRokholt/Hybrid-Recommender-System/blob/main/analysis-and-modelling.ipynb
#hybrid recommender

