In [27]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import math

class UniversityRecommender:

    def __init__(self):
        self.user_inputs = []
        num_rec = None

    def add_user_inputs(self, inputs):
        self.user_inputs.extend(inputs)

    def set_num_rec(self, num_rec):
        self.num_rec = num_rec

    # Recommend universities based on user inputs and return top k recommendations
    def recommend(self, df):
        recommended_all = []
        actual_all = []
        
        for input in self.user_inputs:
            course, max_fee, min_rank = input

            # Filter dataset
            filtered_df = df[(df['course'] == course) & 
                             (df['average_fee'] <= max_fee) &
                             (df['uk_rank'] <= min_rank)]
            
            # Calculate similarity
            features = ['uk_rank', 'average_fee']
            X = filtered_df[features].values  
            similarity = cosine_similarity(X)
            
            # Get top k indices of filtered dataset
            indices = filtered_df.index.values
            sim_scores = list(enumerate(similarity))
            top_indices_filtered = sorted(sim_scores, key=lambda x: x[1][indices[0]], reverse=True)[:self.num_rec]
            
            # Recommend
            recommended_names = filtered_df['university'].iloc[[index[0] for index in top_indices_filtered]]
            recommended_all.append(list(recommended_names))
            
            # Get top k indices of original dataset
            top_indices_original = sorted(sim_scores, key=lambda x: x[1][indices[0]], reverse=True)[:self.num_rec]
            
            # Actual
            actual_all.append(filtered_df['university'].head(self.num_rec).tolist())
            
        return recommended_all, actual_all

    # Mean Average Precision
    def map(self, actual, predicted):
        map_scores = []
        for i in range(len(actual)):
            map_scores.append(self.avg_precision(actual[i], predicted[i], self.num_rec))
        return np.mean(map_scores)
    
    # Average Precision 
    def avg_precision(self, actual, predicted, n):

        actual_set = set(actual)
        predicted_set = set(predicted[:n])
    
        common = actual_set.intersection(predicted_set)

        num_relevant = len(common)
        if len(predicted_set) > n:
            precision = num_relevant / n
        else:
            precision = num_relevant / len(predicted_set)

        return precision
    
    # Normalized Discounted Cumulative Gain
    def ndcg(self,actual, predicted):
        # Calculate DCG
        dcg = 0
        idcg = 0
        for i, p in enumerate(predicted[:self.num_rec]):
            if p in actual:
                rel = 1
            else:
                rel = 0    
            dcg += (2**rel - 1) / math.log2(i+2)    
        
        # Calculate IDCG
        ideal_predicted = actual[:self.num_rec]
        for i, p in enumerate(ideal_predicted):
            rel = 1
            idcg += (2**rel - 1) / math.log2(i+2)
        if idcg == 0:
            return 0
        ndcg = dcg / idcg
        return ndcg
    
    # Mean Reciprocal Rank
    def mrr(self, actual, predicted):
        rr_scores = []
        for i in range(len(actual)):
            act_list = actual[i] 
            pred_list = predicted[i]
        
            reciprocal_rank = 0
            if pred_list[0] == act_list[0]:
                reciprocal_rank = 1
            rr_scores.append(reciprocal_rank)
        return np.mean(rr_scores)

In [77]:
df = pd.read_csv("dataset.csv") 
recommender = UniversityRecommender()

# Set number of recommendations
recommender.set_num_rec(5)  

# Add user inputs
#recommender.add_user_inputs([['Computing', 30000, 25],['Social Sciences', 28000, 30],['Engineering and Technology', 35000, 35]]) 
recommender.add_user_inputs([['Business & Management Studies', 24000, 21]])
# Get recommendations
predicted, actual = recommender.recommend(df)

# Print the results
print("Actual:", actual)
print("Predicted:", predicted)

# Evaluate and Print MAP
map_score = recommender.map(actual, predicted)
print("MAP Score: ", map_score)

# Evaluate and Print NDCG
for i in range(len(actual)):
    ndcg = recommender.ndcg(actual[i], predicted[i])
    print(f"NDCG {i+1}: {ndcg}")

# Evaluate and Print MRR
mrr_score = recommender.mrr(actual, predicted)
print("MRR Score:", mrr_score)

Actual: [['University of St Andrews', 'Lancaster University', 'University of Bath', 'University of Exeter', 'University of Leeds']]
Predicted: [['University of Nottingham', 'University of Southampton', 'Royal Holloway, University of London', 'University of Glasgow', 'University of Leeds']]
MAP Score:  0.2
NDCG 1: 0.13120507751234178
MRR Score: 0.0
