In [5]:
import math
import random
import numpy as np
import pandas as pd
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

class UniversityRecommender:

    # Constructor
    def __init__(self):
        self.k_value =0
        self.user_inputs = []
    # Add user inputs    
    def add_user_inputs(self, new_inputs):
        for input_array in new_inputs:
            self.user_inputs.append(input_array)
    
    # Set k value    
    def set_k_value(self, k_value):
        self.k_value = k_value

     # Filter universities based on user inputs
    def filter_universities(self, df, user_input):
        df_filtered = df[(df['uk_rank'] <= user_input[2]) & 
                        (df['average_fee'] <= user_input[1]) &
                        (df['course'] == user_input[0])]
        return df_filtered

    # Create pairs of universities
    def create_pairs(self,filtered_df):
        pairs = []
        for i in range(len(filtered_df)):
            for j in range(len(filtered_df)):
                u1 = filtered_df.iloc[i]
                u2 = filtered_df.iloc[j]

                # Assign preference based on rank
                if u1['uk_rank'] > u2['uk_rank']:
                    pairs.append((u1['university'], u2['university'], 1))
                    pairs.append((u2['university'], u1['university'], 0))
                elif u1['uk_rank'] < u2['uk_rank']:
                    pairs.append((u1['university'], u2['university'], 0))
                    pairs.append((u2['university'], u1['university'], 1))
                else:
                    pairs.append((u1['university'], u2['university'], 0)) 
                    pairs.append((u2['university'], u1['university'], 1))
        return pairs

    def train_rank_svm(self, train_pairs):
        # Create a linear SVM model
        model = LinearSVC(dual=False, max_iter=10000, random_state=1)

        # Encode university names to numerical values
        label_encoder = LabelEncoder()
        train_pairs['univ_1'] = label_encoder.fit_transform(train_pairs['univ_1'])
        train_pairs['univ_2'] = label_encoder.transform(train_pairs['univ_2'])

        # Features are univ_1 and univ_2, target is pref
        X_train = train_pairs[['univ_1', 'univ_2']]
        y_train = train_pairs['pref']

        # Fit the model
        model.fit(X_train, y_train)

        return model, label_encoder

    def recommend(self, df):
        recommendations = []
        actual=[]
        
        for user_input in self.user_inputs:
            # Filter universities based on user inputs
            filtered_df = self.filter_universities(df, user_input).copy()

            # Check if we have enough data
            if len(filtered_df) <= 2:
                # Only 2 universities match
                if len(filtered_df) == 2:
                    return '\n'.join(filtered_df['university'].values)
                # Only 1 university match
                if len(filtered_df) == 1:
                    return '\n'.join(filtered_df['university'].values)
                # No university matches
                if len(filtered_df) == 0:
                    return "No university found for your criteria"
            else:
                # Create pairs
                pairs = self.create_pairs(filtered_df)
                # Split pairs
                train_pairs, _ = train_test_split(pairs, test_size=0.2, random_state=1)
                # Train RankSVM
                model, label_encoder = self.train_rank_svm(pd.DataFrame(train_pairs, columns=['univ_1', 'univ_2', 'pref']))
                # Encode university names in filtered_df to numerical values
                filtered_df.loc[:,'univ_1'] = label_encoder.transform(filtered_df['university'])
                # Predict preferences for all universities in filtered_df
                filtered_df.loc[:,'univ_2'] = filtered_df['univ_1']
                filtered_df.loc[:,'prediction'] = model.predict(filtered_df[['univ_1', 'univ_2']])
                # Sort universities based on predicted preferences
                ranked_df = filtered_df.sort_values('prediction', ascending=False)[:self.k_value]
                # Decode numerical values back to university names
                ranked_df['university'] = label_encoder.inverse_transform(ranked_df['univ_1'])
                # Add recommendations to list
                universities = ranked_df['university'].tolist()
                recommendations.append(universities)
                # Add actual universities to list
                actual.append(filtered_df['university'].head(self.k_value).tolist())
        return (recommendations,actual)

    # Mean Average Precision
    def map(self, actual, predicted):
        map_scores = []
        for i in range(len(actual)):
            map_scores.append(self.avg_precision_at_k(actual[i], predicted[i], self.k_value))
        return np.mean(map_scores)
    
    # Mean Average Precision at K
    def avg_precision_at_k(self, actual, predicted, k):

        actual_set = set(actual)
        predicted_set = set(predicted[:k])
    
        common = actual_set.intersection(predicted_set)

        num_relevant = len(common)
        if len(predicted_set) > k:
            precision = num_relevant / k 
        else:
            precision = num_relevant / len(predicted_set)

        return precision
    
    # Normalized Discounted Cumulative Gain
    def ndcg(self,actual, predicted):
        # Calculate DCG
        dcg = 0
        idcg = 0
        for i, p in enumerate(predicted[:self.k_value]):
            if p in actual:
                rel = 1
            else:
                rel = 0    
            dcg += (2**rel - 1) / math.log2(i+2)    
        
        # Calculate IDCG
        ideal_predicted = actual[:self.k_value]
        for i, p in enumerate(ideal_predicted):
            rel = 1
            idcg += (2**rel - 1) / math.log2(i+2)
        if idcg == 0:
            return 0
        ndcg = dcg / idcg
        return ndcg
    
    # Mean Reciprocal Rank
    def mrr(self, actual, predicted):
        rr_scores = []
        for i in range(len(actual)):
            act_list = actual[i] 
            pred_list = predicted[i]
        
            reciprocal_rank = 0
            if pred_list[0] == act_list[0]:
                reciprocal_rank = 1
            rr_scores.append(reciprocal_rank)
        return np.mean(rr_scores)

In [30]:
#  load dataset
df = pd.read_csv("dataset.csv") 

# Create an instance of UniversityRecommender
recommender = UniversityRecommender()

#  set k value
recommender.set_k_value(5)  

# Add user inputs
#recommender.add_user_inputs([['Computing', 30000, 25],['Social Sciences', 28000, 30],['Engineering and Technology', 35000, 35]])
recommender.add_user_inputs([['Business & Management Studies', 24000, 21]])

# Get recommendations
predicted, actual  = recommender.recommend(df)

# Print the results
print("Actual:", actual)
print("Predicted:", predicted)

# Evaluate and Print MAP
map_score = recommender.map(actual, predicted)
print("MAP Score: ", map_score)

# Evaluate and Print NDCG
for i in range(len(actual)):
    ndcg = recommender.ndcg(actual[i], predicted[i])
    print(f"NDCG {i+1}: {ndcg}")

# Evaluate and Print MRR
mrr_score = recommender.mrr(actual, predicted)
print("MRR Score:", mrr_score)

Actual: [['University of St Andrews', 'Lancaster University', 'University of Bath', 'University of Exeter', 'University of Leeds']]
Predicted: [['University of St Andrews', 'University of Southampton', 'University of Nottingham', 'Lancaster University', 'University of Bath']]
MAP Score:  0.6
NDCG 1: 0.6164336326286644
MRR Score: 1.0
