E.g. "Users who liked this course also liked these other courses."

In [2]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

In [3]:
# Load datasets
courses_df = pd.read_csv('../data/Coursera_courses.csv').drop_duplicates()
reviews_df = pd.read_csv('../data/Coursera_reviews.csv').drop_duplicates()

In [4]:
reviews_df['date_reviews'] = pd.to_datetime(reviews_df['date_reviews'])

# Sort reviews by date
reviews_df = reviews_df.sort_values(by='date_reviews')

print(courses_df.isnull().sum())
print(reviews_df.isnull().sum())
courses_df = courses_df.drop_duplicates(subset='course_id')
reviews_df = reviews_df.drop_duplicates(subset=['reviewers', 'course_id'])

# Define a cutoff date for splitting the data (e.g., the last 20% of dates)
cutoff_date = reviews_df['date_reviews'].quantile(0.8)

# Split the data into training and testing sets
train_data = reviews_df[reviews_df['date_reviews'] < cutoff_date]
test_data = reviews_df[reviews_df['date_reviews'] >= cutoff_date]


name           0
institution    0
course_url     0
course_id      0
dtype: int64
reviews         56
reviewers        0
date_reviews     0
rating           0
course_id        0
dtype: int64


In [11]:
test_data

Unnamed: 0,reviews,reviewers,date_reviews,rating,course_id
1324402,Concise and easy to understand,By Kwon S H,2020-07-03,4,introduction-genomics
1433646,"Explanation is really good, I am a science gro...",By Ajay k P,2020-07-03,5,technical-support-fundamentals
615969,Good for beginners to build concept,By Rajendra P K,2020-07-03,4,marketing-digital
611766,Professor Bushee is a brilliant teacher. The i...,By Gergana D,2020-07-03,5,wharton-accounting
614783,Very informative. I loved the way the course w...,By Isha L,2020-07-03,4,marketing-digital
...,...,...,...,...,...
1453861,I had no prior knowledge about programming and...,By Amrita S,2020-10-10,5,python
1391282,"A good learning experience on ML concepts, wel...",By Rajagopal S,2020-10-10,5,machine-learning
1451699,Really liked the course and also thoroughly en...,By Shanthi T,2020-10-10,5,financial-markets-global
1371002,The short video format allows to make progress...,By Olivier C,2020-10-10,5,machine-learning


In [5]:
# user-item interaction matrix using TRAINING data
train_interaction_matrix = train_data.pivot(index='course_id', columns='reviewers', values='rating').fillna(0)
test_interaction_matrix = test_data.pivot(index='course_id', columns='reviewers', values='rating').fillna(0)


In [6]:
interaction_matrix_sparse = csr_matrix(train_interaction_matrix.values)

In [7]:
# collaborative filtering algorithm using k-nearest neighbors
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1)
model_knn.fit(interaction_matrix_sparse)

In [8]:
def recommend_course(course_id, interaction_matrix, model_knn, courses_df, k=5):
    # takes in a course_id and uses a nearest neighbors model to recommend similar courses.
    course_index = interaction_matrix.index.tolist().index(course_id)
    
    distances, indices = model_knn.kneighbors(interaction_matrix.iloc[course_index,:].values.reshape(1, -1), n_neighbors=k+1)
    
    courses_indices = [interaction_matrix.index[i] for i in indices.flatten()][1:]
    
    # Map the indices to course names
    recommended_courses = courses_df[courses_df['course_id'].isin(courses_indices)]
    return recommended_courses

In [9]:
course_id = 'machine-learning'
recommended_courses = recommend_course(course_id, train_interaction_matrix, model_knn, courses_df)
print(recommended_courses)

                                                  name  \
7    Programming for Everybody (Getting Started wit...   
12                   Neural Networks and Deep Learning   
33   Improving Deep Neural Networks: Hyperparameter...   
71               Structuring Machine Learning Projects   
153                                    Sequence Models   

                institution  \
7    University of Michigan   
12          DeepLearning.AI   
33          DeepLearning.AI   
71          DeepLearning.AI   
153         DeepLearning.AI   

                                            course_url  \
7                https://www.coursera.org/learn/python   
12   https://www.coursera.org/learn/neural-networks...   
33   https://www.coursera.org/learn/deep-neural-net...   
71   https://www.coursera.org/learn/machine-learnin...   
153  https://www.coursera.org/learn/nlp-sequence-mo...   

                         course_id  
7                           python  
12   neural-networks-deep-learning  
33  

In [16]:
test_data[test_data['course_id'] == 'machine-learning']

Unnamed: 0,reviews,reviewers,date_reviews,rating,course_id
1362059,This course could have been more better if it ...,By Siddhesh S,2020-07-03,4,machine-learning
1363461,Объяснения очень понятны),By Яна Ч,2020-07-03,4,machine-learning
1358097,This is a very good course to get an overview ...,By Anna T,2020-07-03,4,machine-learning
1368166,As I am currently a student and I cannot waste...,By Praveen J,2020-07-03,5,machine-learning
1383432,This course offered me a deep understanding of...,By Syed A A S,2020-07-03,5,machine-learning
...,...,...,...,...,...
1387667,"Great, awesome, outstanging course. Professor ...",By Isaac E V,2020-10-09,5,machine-learning
1379424,Very good course. I've been thinking for a lon...,By Indra A,2020-10-10,5,machine-learning
1386696,Andrew ng is best teacher i ever seen. His exp...,By Dinesh S P,2020-10-10,5,machine-learning
1391282,"A good learning experience on ML concepts, wel...",By Rajagopal S,2020-10-10,5,machine-learning


In [21]:
recommended_courses

Unnamed: 0,name,institution,course_url,course_id
7,Programming for Everybody (Getting Started wit...,University of Michigan,https://www.coursera.org/learn/python,python
12,Neural Networks and Deep Learning,DeepLearning.AI,https://www.coursera.org/learn/neural-networks...,neural-networks-deep-learning
33,Improving Deep Neural Networks: Hyperparameter...,DeepLearning.AI,https://www.coursera.org/learn/deep-neural-net...,deep-neural-network
71,Structuring Machine Learning Projects,DeepLearning.AI,https://www.coursera.org/learn/machine-learnin...,machine-learning-projects
153,Sequence Models,DeepLearning.AI,https://www.coursera.org/learn/nlp-sequence-mo...,nlp-sequence-models


In [20]:
def hit_rate(recommended_courses, test_data):
    hits = 0
    total = 0

    for course_id in recommended_courses['course_id']:
        reviewers = test_data[test_data['course_id'] == course_id]['reviewers']
        
        # Get the reviewers in recommended_courses
        recommended_reviewers = 
        
        hit = recommended_courses['reviewers'].isin(reviewers).any()
        
        if hit:
            hits += 1
            
        total += 1
        
    return hits / total


# hit rate for the recommended courses
print(hit_rate(recommended_courses, test_data))


46689       By Bryan J F A
52265        By sree v r i
58433      By Fahmida K C 
28440               By 劉晏伶
27251          By SANJAY S
                ...       
1454106      By PRAJNA M U
1453876        By Asante C
1453873      By VETUKURI S
1453861        By Amrita S
1454124        By Syed M K
Name: reviewers, Length: 2658, dtype: object


KeyError: 'reviewers'