In [12]:
from surprise import Dataset,accuracy
from surprise import Reader
from surprise import KNNBaseline
from surprise import KNNBasic
from surprise import SVD
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from surprise.model_selection import GridSearchCV
from surprise.accuracy import rmse
import pandas as pd
from sklearn.metrics import r2_score, accuracy_score, mean_squared_error

df=pd.read_csv("processed_data.csv")
reader = Reader(rating_scale=(1,5))

def read_item_names():
    rid_to_name = {}
    name_to_rid = {}
    for index, row in df.iterrows():
        rid_to_name[row['course_id']] = row['course_name']
        name_to_rid[row['course_name']] = row['course_id']
    return rid_to_name, name_to_rid

data = Dataset.load_from_df(df[['student_id', 'course_id', 'course_rating']], reader)
trainSet = data.build_full_trainset()

sim_options = {"name": "pearson_baseline", "user_based": False}
algo = KNNBasic(k=10,sim_options=sim_options)
algo.fit(trainSet)

rid_to_name, name_to_rid = read_item_names()

idx="Robotics"
# Retrieve inner id of the movie Toy Story
course_raw_id = name_to_rid[idx]
course_inner_id = algo.trainset.to_inner_iid(course_raw_id)

# Retrieve inner ids of the nearest neighbors of Toy Story.
course_neighbors = algo.get_neighbors(course_inner_id, k=10)

# Convert inner ids of the neighbors into names.
course_neighbors = (
    algo.trainset.to_raw_iid(inner_id) for inner_id in course_neighbors
)
course_neighbors = (rid_to_name[rid] for rid in course_neighbors)
print("-------------------------------------")
print("The 10 Recommendations for",idx,":")
print("-------------------------------------")
for movie in course_neighbors:
    print(movie)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
-------------------------------------
The 10 Recommendations for Robotics :
-------------------------------------
Young Designer
Game and App Development
Graphic Designing
Scratch Coding
3D Designing and Printing
Young Coder
VEX Robotics Competition
Game Development with Unity
Game Combat Championship
VEX Robotics


In [17]:
def precision_recall_at_k(predictions, k=10, threshold=4):
    """Return precision and recall at k metrics for each user"""

    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    for uid, user_ratings in user_est_true.items():

        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)

        # Number of relevant items
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)

        # Number of recommended items in top k
        n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])

        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(
            ((true_r >= threshold) and (est >= threshold))
            for (est, true_r) in user_ratings[:k]
        )

        # Precision@K: Proportion of recommended items that are relevant
        # When n_rec_k is 0, Precision is undefined. We here set it to 0.

        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 0

        # Recall@K: Proportion of relevant items that are recommended
        # When n_rel is 0, Recall is undefined. We here set it to 0.

        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 0

    return precisions, recalls

from collections import defaultdict
from surprise.model_selection import KFold

data = Dataset.load_from_df(df[['student_id', 'course_id', 'course_rating']], reader)
kf = KFold(n_splits=5)
algo = SVD()

for trainset, testset in kf.split(data):
    print("Loop ")
    algo.fit(trainset)
    predictions = algo.test(testset)
    precisions, recalls = precision_recall_at_k(predictions, k=5, threshold=4)

    # Precision and recall can then be averaged over all users
    print("Precision")
    print(sum(prec for prec in precisions.values()) / len(precisions))
    print("Recall")
    print(sum(rec for rec in recalls.values()) / len(recalls))

Loop 
Precision
0.04727272727272727
Recall
0.043515151515151514
Loop 
Precision
0.06465833945628215
Recall
0.05963752143032083
Loop 
Precision
0.051224944320712694
Recall
0.046523137837169014
Loop 
Precision
0.038063562453806356
Recall
0.03461443705346145
Loop 
Precision
0.06325301204819277
Recall
0.05760542168674699
