In [83]:
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import LabelEncoder
import numpy as np
import pandas as pd
import math

In [92]:
class UniversityRecommender:
    
    # Constructor
    def __init__(self):
        self.k_value =0
        self.user_inputs = []
    # Add user inputs    
    def add_user_inputs(self, new_inputs):
        for input_array in new_inputs:
            self.user_inputs.append(input_array)
    # Set k value    
    def set_k_value(self, k_value):
        self.k_value = k_value

    # Recommend universities  
    def recommend(self, df):
        recommendations = []
        actual = []
        
        for user_input in self.user_inputs:
            # Filter dataframe 
            df_filtered = self.filter_df(df, user_input)  
            
            if len(df_filtered) == 0:
                print("No matches found")
                continue
            
            # Get actual recommendations
            actual.append(df_filtered['university'].head(self.k_value).tolist())

            # Get KNN recommendations
            knn_recommended = self.get_knn_recommendations(df_filtered)
            recommendations.append(knn_recommended)

        print("Actual: ", actual)
        print("Predicted: ", recommendations)    
        return recommendations, actual
    
    # Filter dataframe based on user input
    def filter_df(self, df, user_input):
        preferred_course=user_input[0] 
        max_fee=user_input[1]
        min_rank = user_input[2]
        
        # Encode course 
        le = LabelEncoder()
        le.fit(df['course'])
        df['course'] = le.transform(df['course'])
        
        # Encode preferred course
        courses = ['Computing', 'Social Sciences', 'Business & Management Studies', 'Engineering and Technology']
        le.fit(courses)
        preferred_course_encoded = le.transform([preferred_course])
        
        # Filter dataframe
        df_filtered = df[(df['uk_rank'] <= min_rank) & 
                        (df['average_fee'] <= max_fee) &
                        (df['course'] == preferred_course_encoded[0])]
        return df_filtered
    
    # Get KNN recommendations
    def get_knn_recommendations(self, df_filtered):
        knn_recommended = []
        
        # Create NearestNeighbors model
        model = NearestNeighbors(n_neighbors=self.k_value)
        
        # Fit model 
        model.fit(df_filtered[['uk_rank', 'average_fee','course']]) 

        # Get nearest neighbors
        distances, indices = model.kneighbors(df_filtered[['uk_rank', 'average_fee','course']])
        
        # Append university names based on k value
        for i in indices[0][:self.k_value]:  
            knn_recommended.append(df_filtered['university'].iloc[i]) 
        return knn_recommended
    
    # Mean Average Precision
    def map(self, actual, predicted):
        map_scores = []
        for i in range(len(actual)):
            map_scores.append(self.avg_precision_at_k(actual[i], predicted[i], self.k_value))
        return np.mean(map_scores)
    
    # Average Precision at K
    def avg_precision_at_k(self, actual, predicted, k):

        actual_set = set(actual)
        predicted_set = set(predicted[:k])
    
        common = actual_set.intersection(predicted_set)

        num_relevant = len(common)
        if len(predicted_set) > k:
            precision = num_relevant / k 
        else:
            precision = num_relevant / len(predicted_set)

        return precision
    
    # Normalized Discounted Cumulative Gain
    def ndcg(self,actual, predicted):
        # Calculate DCG
        dcg = 0
        idcg = 0
        for i, p in enumerate(predicted[:self.k_value]):
            if p in actual:
                rel = 1
            else:
                rel = 0    
            dcg += (2**rel - 1) / math.log2(i+2)    
        
        # Calculate IDCG
        ideal_predicted = actual[:self.k_value]
        for i, p in enumerate(ideal_predicted):
            rel = 1
            idcg += (2**rel - 1) / math.log2(i+2)
        if idcg == 0:
            return 0
        ndcg = dcg / idcg
        return ndcg
    
    # Mean Reciprocal Rank
    def mrr(self, actual, predicted):
        rr_scores = []
        for i in range(len(actual)):
            act_list = actual[i] 
            pred_list = predicted[i]
        
            reciprocal_rank = 0
            if pred_list[0] == act_list[0]:
                reciprocal_rank = 1
            rr_scores.append(reciprocal_rank)
        return np.mean(rr_scores)

In [119]:

df = pd.read_csv("dataset.csv") 
recommender = UniversityRecommender()

# Add user inputs
#recommender.add_user_inputs([['Computing', 30000, 25],['Social Sciences', 28000, 30],['Engineering and Technology', 35000, 35]]) puts([['Computing', 30000, 25],['Social Sciences', 28000, 30],['Engineering and Technology', 35000, 35]]) 
recommender.add_user_inputs([['Business & Management Studies', 24000, 21]])

# Set k value
recommender.set_k_value(5)  

# Get recommendations
predicted, actual = recommender.recommend(df)

# Evaluate and Print MAP
map_score = recommender.map(actual, predicted)
print("MAP Score: ", map_score)

# Evaluate and Print NDCG
for i in range(len(actual)):
    ndcg = recommender.ndcg(actual[i], predicted[i])
    print(f"NDCG {i+1}: {ndcg}")

# Evaluate and Print MRR
mrr_score = recommender.mrr(actual, predicted)
print("MRR Score:", mrr_score)

Actual:  [['University of St Andrews', 'Lancaster University', 'University of Bath', 'University of Exeter', 'University of Leeds']]
Predicted:  [['University of St Andrews', 'University of Exeter', 'University of Nottingham', 'University of Bath', 'University of Leeds']]
MAP Score:  0.8
NDCG 1: 0.830419897363192
MRR Score: 1.0
