# **Content-based Course Recommender System using Course Similarities**

### Importing Libraries and Loading the dataset

In [72]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [74]:
# Loading the similarity matrix
sim_df = pd.read_csv("similarity_matrix.csv", index_col=0)
sim_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,297,298,299,300,301,302,303,304,305,306
0,1.000000,0.000000,0.000000,0.457013,0.019795,0.000000,0.234371,0.253012,0.141643,0.262177,...,0.542503,0.040291,0.538850,0.149241,0.055670,0.000000,0.000000,0.000000,0.064391,0.071191
1,0.000000,1.000000,0.345033,0.000000,0.062776,0.033150,0.092908,0.000000,0.000000,0.024454,...,0.028674,0.000000,0.009600,0.000000,0.000000,0.000000,0.000000,0.000000,0.011345,0.028221
2,0.000000,0.345033,1.000000,0.000000,0.000000,0.000000,0.067318,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.457013,0.000000,0.000000,1.000000,0.060311,0.000000,0.267777,0.201611,0.134860,0.213010,...,0.424238,0.024551,0.405824,0.129914,0.033923,0.000000,0.000000,0.000000,0.069754,0.040669
4,0.019795,0.062776,0.000000,0.060311,1.000000,0.036418,0.030620,0.000000,0.046263,0.098505,...,0.078752,0.070186,0.073828,0.074278,0.032325,0.000000,0.000000,0.000000,0.062315,0.077508
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
302,0.000000,0.000000,0.000000,0.000000,0.000000,0.346688,0.038866,0.000000,0.000000,0.025575,...,0.029988,0.000000,0.000000,0.035355,0.000000,1.000000,0.000000,0.131306,0.011864,0.007379
303,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.083045,0.000000,0.037333
304,0.000000,0.000000,0.000000,0.000000,0.000000,0.218507,0.040827,0.000000,0.000000,0.008955,...,0.031501,0.000000,0.000000,0.037139,0.000000,0.131306,0.083045,1.000000,0.012463,0.007751
305,0.064391,0.011345,0.000000,0.069754,0.062315,0.026325,0.110670,0.012254,0.161636,0.106808,...,0.085390,0.076102,0.152476,0.067116,0.058417,0.011864,0.000000,0.012463,1.000000,0.221307


In [76]:
course_df = pd.read_csv("courses.csv", index_col=0)
course_df

Unnamed: 0,COURSE_ID,TITLE,DESCRIPTION
0,ML0201EN,robots are coming build iot apps with watson ...,have fun with iot and learn along the way if ...
1,ML0122EN,accelerating deep learning with gpu,training complex deep learning models with lar...
2,GPXX0ZG0EN,consuming restful services using the reactive ...,learn how to use a reactive jax rs client to a...
3,RP0105EN,analyzing big data in r using apache spark,apache spark is a popular cluster computing fr...
4,GPXX0Z2PEN,containerizing packaging and running a sprin...,learn how to containerize package and run a ...
...,...,...,...
302,excourse89,javascript jquery and json,in this course we ll look at the javascript l...
303,excourse90,programming foundations with javascript html ...,learn foundational programming concepts e g ...
304,excourse91,front end web development with react,this course explores javascript based front en...
305,excourse92,introduction to web development,this course is designed to start you on a path...


In [78]:
# Converting the "COURSE_ID" column into a dictionary
Course_dict = course_df["COURSE_ID"].to_dict()
Course_dict = {v: k for k, v in Course_dict.items()}
print(dict(list(Course_dict.items())[:10]))

{'ML0201EN': 0, 'ML0122EN': 1, 'GPXX0ZG0EN': 2, 'RP0105EN': 3, 'GPXX0Z2PEN': 4, 'CNSC02EN': 5, 'DX0106EN': 6, 'GPXX0FTCEN': 7, 'RAVSCTEST1': 8, 'GPXX06RFEN': 9}


In [80]:
users_df = pd.read_csv("users.csv", index_col=0)
users_df

Unnamed: 0,user,item,rating
0,1889878,CC0101EN,5
1,1342067,CL0101EN,3
2,1990814,ML0120ENv3,5
3,380098,BD0211EN,5
4,779563,DS0101EN,3
...,...,...,...
233301,1540125,DS0101EN,5
233302,1250651,PY0101EN,5
233303,1003832,CB0105ENv1,3
233304,922065,BD0141EN,4


### Generating course recommendations based on course similarities for one user


In [83]:
def generate_recommendations_for_one_user(enrolled_courses, unselected_courses, Course_dict, sim_matrix):
    res = {}
    threshold = 0.6
    for enrolled_course in enrolled_courses:
        i = Course_dict[enrolled_course]
        for unselected_course in unselected_courses:
            j = Course_dict[unselected_course]
            sim = 0
            sim = sim_matrix[i][j]
            if sim > threshold:
                if unselected_course not in res:
                    res[unselected_course] = sim
                else:
                    if sim >= res[unselected_course]:
                        res[unselected_course] = sim
    res = {k: v for k, v in sorted(res.items(), key=lambda item: item[1], reverse=True)}
    return res

### Generating course recommendations based on course similarities for all users

In [86]:
def generate_recommendations_for_all():
    users = []
    courses = []
    sim_scores = []
    all_courses = set(course_df['COURSE_ID'])
    sim_matrix = sim_df.to_numpy()
    # Get unique user IDs from the test dataset
    user_ids = set(users_df['user'].to_list())
    
    # Iterate through each user
    for user_id in user_ids:
        users.append(user_id)
        
        # Find enrolled courses for the current user
        enrolled_courses = users_df[users_df['user'] == user_id]['item'].tolist()
        
        # Find unselected courses for the current user
        unselected_courses= all_courses.difference(enrolled_courses)
        
        # Generate recommendations for the current user
        recommendations = generate_recommendations_for_one_user(enrolled_courses, unselected_courses, Course_dict, sim_matrix)
        
        # Extract recommended courses and similarity scores
        recommended_courses = list(recommendations.keys())
        similarity_scores = list(recommendations.values())
        
        # Append results to the lists
        courses.append(recommended_courses)
        sim_scores.append(similarity_scores)
    
    return users, courses, sim_scores

In [88]:
res_dict = {}
users, courses, sim_scores = generate_recommendations_for_all()
res_dict['USER'] = users
res_dict['COURSE_ID'] = courses
res_dict['SCORE'] = sim_scores
res_df = pd.DataFrame(res_dict, columns=['USER', 'COURSE_ID', 'SCORE'])
res_df

Unnamed: 0,USER,COURSE_ID,SCORE
0,2,"[excourse19, GPXX0XENEN, ML0101ENv3, CC0121EN,...","[0.9211323729436766, 0.8894991799933214, 0.841..."
1,4,"[CO0101EN, LB0103ENv1, excourse19, CC0103EN, C...","[1.0, 1.0, 0.9211323729436766, 0.8826374336422..."
2,5,"[CC0201EN, GPXX04MXEN, excourse06, GPXX0XENEN,...","[0.9797958971132712, 0.9, 0.9, 0.8894991799933..."
3,1048581,"[CC0103EN, excourse84, COM001EN, GPXX0QR3EN, e...","[0.8826374336422902, 0.7397041774816828, 0.739..."
4,7,[],[]
...,...,...,...
33896,393197,[],[]
33897,1048561,"[SC0103EN, GPXX0RL8EN, excourse81, excourse58,...","[1.0, 0.6542158930724487, 0.6542158930724487, ..."
33898,1703928,"[SC0103EN, DAI101EN, CC0103EN, DV0151EN, PA010...","[1.0, 0.9797958971132712, 0.8826374336422902, ..."
33899,1441786,"[CC0103EN, DV0151EN, GPXX0HZ2EN]","[0.8826374336422902, 0.8418729120241367, 0.666..."


### Recommended Courses for a given User 

In [93]:
# Let's say user_id = 2
user_id = 2
recommended_courses = []
for x in res_df[res_df["USER"] == user_id]["COURSE_ID"]:
    recommended_courses.extend(x)
recommended_courses = course_df[course_df['COURSE_ID'].isin(recommended_courses)]
print(f"Recommended Courses of User \033[1m{user_id}\033[0m:")
recommended_courses

Recommended Courses of User [1m2[0m:


Unnamed: 0,COURSE_ID,TITLE,DESCRIPTION
9,GPXX06RFEN,create your first mongodb database,in this guided project you will get started w...
17,GPXX0QR3EN,enabling distributed tracing in microservices ...,explore how to enable and customize tracing of...
56,CO0401EN,beyond the basics istio and ibm cloud kuberne...,start managing your microservices with istio o...
62,GPXX048OEN,action classification task based on internet f...,learn how to build a mathematical model of an ...
88,GPXX0QS6EN,monitoring the metrics of java microservices u...,you will explore how to provide system and app...
89,GPXX07YGEN,configuring microservices running in kubernetes,explore how to externalize configuration using...
103,GPXX0HZ2EN,deploying microservices to kubernetes,deploy microservices in open liberty docker co...
111,GPXX0E3QEN,building fault tolerant microservices with the...,explore how to manage the impact of failures b...
113,CB0201EN,build chatbots with watson assistant,in this course you ll explore the watson conv...
118,GPXX0XENEN,playing tictactoe with reinforcement learning ...,learn how to create and teach an agent that ne...
