In [1]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
import numpy as np
from surprise import accuracy
from surprise.model_selection.validation import cross_validate
from surprise.dataset import Dataset
from surprise.reader import Reader
from surprise import SVD, KNNBasic, KNNWithMeans, KNNWithZScore, KNNBaseline
from surprise.model_selection import train_test_split, GridSearchCV
import random

## Calling the Saved Dataset

In [2]:
course_dataset = pd.read_csv("data/courseData.csv", encoding= 'unicode_escape')
course_dataset.head()

Unnamed: 0.1,Unnamed: 0,sr_,degree_1,degree_1_specializations,campus,key_skills,key_skills_str
0,0,1001,B.E.,Mechanical,MITCOE,['CATIA'],CATIA
1,1,1002,B.E.,Mechanical,MITCOE,['CATIA'],CATIA
2,2,1003,B.E.,Mechanical,MITAOE,['CATIA'],CATIA
3,3,1004,B.E.,Mechanical,MITCOE,['CATIA'],CATIA
4,4,1005,B.E.,Mechanical,MITCOE,['CATIA'],CATIA


In [3]:
user_dataset = pd.read_csv("data/userData.csv", encoding= 'unicode_escape')
user_dataset.head()

Unnamed: 0.1,Unnamed: 0,userid,degree_1,degree_1_specializations,known_languages,key_skills,career_objective,key_skills_str
0,0,1001,B.E.,Computer Science & Engineering,"['English', ' Marathi', ' Hindi']","['C', ' Java', ' Keras', ' Flask', ' Deep Lear...",Computer Engineering student with good technic...,"C, Java, Keras, Flask, Deep Learning, Sel..."
1,1,1002,B.E.,Computer Science & Engineering,['Hindi English'],"['Java', ' Neural Networks', ' AI', ' Python',...",Interested in working under company offering A...,"Java, Neural Networks, AI, Python, Html5, ..."
2,2,1003,B.E.,Computer Science & Engineering,['Missing'],['Missing'],Missing,Missing
3,3,1004,B.E.,Computer Science & Engineering,"['English', ' Hindi', ' Marathi', ' Marwari']","['XML', ' C', ' Java', ' Data Structures', ' P...",Currently a final year student of Computer Eng...,"XML, C, Java, Data Structures, Python, Mo..."
4,4,1005,B.E.,Computer Science & Engineering,"['English', 'Hindi ', 'Kashmiri ', ' Urdu']","['XML', ' Word', ' Data Structures', ' Communi...",To have a growth oriented and challenging care...,"XML, Word, Data Structures, Communication, ..."


In [4]:
ratings_df = pd.read_csv("data/ratingData.csv", encoding= 'unicode_escape')
ratings_df.head()

Unnamed: 0,course_id,user_id,rating
0,2001,1001,5
1,2001,1002,3
2,2001,1003,1
3,2001,1004,0
4,2001,1005,2


## Collaborative Filtering

### Using Surprise and testing with different algorithms (SVD, KnnBasic, KnnBaseline, KnnWithMeans, KnnWithZScore) to find the best one to use based on MAE and RMAE scores. 

In [5]:
#using surprise
reader = Reader(rating_scale=(0, 5))
data = Dataset.load_from_df(ratings_df[['user_id','course_id','rating']], reader)

#split into training and test sets using 80:20 ratio
raw_ratings = data.raw_ratings
random.shuffle(raw_ratings)                

ratio = int(len(raw_ratings)*0.8)
train_raw = raw_ratings[:ratio] 
test_raw = raw_ratings[ratio:] 

data.raw_ratings = train_raw       
trainset = data.build_full_trainset() 
testset = data.construct_testset(test_raw)

####  Using the SVD model to generate collaborative filtering recommendations since it has the least MAE and RMSE results

In [6]:
#Since the svd recommender is making recommmendations from the ratings dataset, 
#its recommemnding courses the users have already done
def svd_cf_recommendations(user):
    
    #using the "best" parameters found using the gridsearchcv method from experiments notebook 
    model = SVD(n_factors = 90, n_epochs = 20, lr_all = 0.005, reg_all = 0.2)
    model.fit(trainset)
    
    #building test set and predict the ratings
    testset = trainset.build_anti_testset()
    predictions = model.test(testset)
    
    #save into dataframe
    rating_predictions = pd.DataFrame(predictions)
    #print(rating_predictions)
    
    #find the predictions for the given user
    user_predictions = rating_predictions[rating_predictions['uid'] == user].\
                         sort_values(by="est", ascending = False)
        
    #removing courses already done by user (assuming they are rated 1 to 5)  
    finalcourserecs = [] #to store the final course recs
    
    #uid is the user and iid is the course id
    for i, row in user_predictions.iterrows():
        #get rating details
        rating_dets = ratings_df[(ratings_df['course_id'] == row[1]) & (ratings_df['user_id'] == user)]
        
        if rating_dets["rating"].values[0] == 0:
            #print("Not rated!")
            finalcourserecs.append(row[1])
    
    return(finalcourserecs)

In [7]:
svd_cf_recommendations(1001) #input is a user and output is a course!
#1001 is a user (shown above no course is 1001)
#2082 is course since no user rating is found (check above)

[2246,
 2076,
 2027,
 2241,
 2396,
 2184,
 2393,
 2320,
 2091,
 2336,
 2136,
 2130,
 2407,
 2253,
 2281,
 2310,
 2259,
 2302,
 2178,
 2124,
 2129]

In [8]:
user_dataset[user_dataset["userid"] == "1001"]

Unnamed: 0.1,Unnamed: 0,userid,degree_1,degree_1_specializations,known_languages,key_skills,career_objective,key_skills_str
0,0,1001,B.E.,Computer Science & Engineering,"['English', ' Marathi', ' Hindi']","['C', ' Java', ' Keras', ' Flask', ' Deep Lear...",Computer Engineering student with good technic...,"C, Java, Keras, Flask, Deep Learning, Sel..."


In [10]:
course_dataset[course_dataset["sr_"] == 2246]

Unnamed: 0.1,Unnamed: 0,sr_,degree_1,degree_1_specializations,campus,key_skills,key_skills_str
1245,1245,2246,M TeCh,Electronics Telecommunication Engineering,MIT WPU,"['EAGLE, MiCrosoftoffiCe, ProgrammingLanguageC...","EAGLE, MiCrosoftoffiCe, ProgrammingLanguageCCP..."


In [11]:
course_dataset[course_dataset["sr_"] == 2076]

Unnamed: 0.1,Unnamed: 0,sr_,degree_1,degree_1_specializations,campus,key_skills,key_skills_str
1075,1075,2076,B.E.,Mechanical,MITAOE,"['SOLIDWORKS, AUTOCAD, CREO']","SOLIDWORKS, AUTOCAD, CREO"


In [12]:
course_dataset[course_dataset["sr_"] == 2027]

Unnamed: 0.1,Unnamed: 0,sr_,degree_1,degree_1_specializations,campus,key_skills,key_skills_str
1026,1026,2027,B.E.,Mechanical,MITCOE,['CATIA'],CATIA


In [13]:
course_dataset[course_dataset["sr_"] == 2241]

Unnamed: 0.1,Unnamed: 0,sr_,degree_1,degree_1_specializations,campus,key_skills,key_skills_str
1240,1240,2241,B.E.,Electronics Telecommunication Engineering,MITAOE,"['EAGLE, MiCrosoftoffiCe, ProgrammingLanguageC...","EAGLE, MiCrosoftoffiCe, ProgrammingLanguageCCP..."


In [14]:
course_dataset[course_dataset["sr_"] == 2396]

Unnamed: 0.1,Unnamed: 0,sr_,degree_1,degree_1_specializations,campus,key_skills,key_skills_str
1395,1395,2396,B.E.,Electronics Telecommunication Engineering,"MIT,Pune","['C,PCB design']","C,PCB design"
