# Metrics evaluation

In [1]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.utils import shuffle

from random import choices
import pandas as pd
import numpy as np
import random
import re

import warnings
warnings.filterwarnings("ignore")

from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.stem.snowball import DutchStemmer



In [2]:
import pymongo

client = pymongo.MongoClient("mongodb+srv://test:test@rs.qug52es.mongodb.net/?retryWrites=true&w=majority", connectTimeoutMS=30000, socketTimeoutMS=None, connect=False, maxPoolsize=1)
db = client.get_database('RS')

def find_all_interactions_history(json_data):
    # Get collection
    records = db.interactions
    result = records.find({'user_id': json_data["user_id"]})
    return result

In [3]:
STOPWORDS = [
    "aan",
    "aangaande",
    "aangezien",
    "achte",
    "achter",
    "achterna",
    "af",
    "afgelopen",
    "al",
    "aldaar",
    "aldus",
    "alhoewel",
    "alias",
    "alle",
    "allebei",
    "alleen",
    "alles",
    "als",
    "alsnog",
    "altijd",
    "altoos",
    "ander",
    "andere",
    "anders",
    "anderszins",
    "beetje",
    "behalve",
    "behoudens",
    "beide",
    "beiden",
    "ben",
    "beneden",
    "bent",
    "bepaald",
    "betreffende",
    "bij",
    "bijna",
    "bijv",
    "binnen",
    "binnenin",
    "blijkbaar",
    "blijken",
    "boven",
    "bovenal",
    "bovendien",
    "bovengenoemd",
    "bovenstaand",
    "bovenvermeld",
    "buiten",
    "bv",
    "daar",
    "daardoor",
    "daarheen",
    "daarin",
    "daarna",
    "daarnet",
    "daarom",
    "daarop",
    "daaruit",
    "daarvanlangs",
    "dan",
    "dat",
    "de",
    "deden",
    "deed",
    "der",
    "derde",
    "derhalve",
    "dertig",
    "deze",
    "dhr",
    "die",
    "dikwijls",
    "dit",
    "doch",
    "doe",
    "doen",
    "doet",
    "door",
    "doorgaand",
    "drie",
    "duizend",
    "dus",
    "echter",
    "een",
    "eens",
    "eer",
    "eerdat",
    "eerder",
    "eerlang",
    "eerst",
    "eerste",
    "eigen",
    "eigenlijk",
    "elk",
    "elke",
    "en",
    "enig",
    "enige",
    "enigszins",
    "enkel",
    "er",
    "erdoor",
    "erg",
    "ergens",
    "etc",
    "etcetera",
    "even",
    "eveneens",
    "evenwel",
    "gauw",
    "ge",
    "gedurende",
    "geen",
    "gehad",
    "gekund",
    "geleden",
    "gelijk",
    "gemoeten",
    "gemogen",
    "genoeg",
    "geweest",
    "gewoon",
    "gewoonweg",
    "haar",
    "haarzelf",
    "had",
    "hadden",
    "hare",
    "heb",
    "hebben",
    "hebt",
    "hedden",
    "heeft",
    "heel",
    "hem",
    "hemzelf",
    "hen",
    "het",
    "hetzelfde",
    "hier",
    "hierbeneden",
    "hierboven",
    "hierin",
    "hierna",
    "hierom",
    "hij",
    "hijzelf",
    "hoe",
    "hoewel",
    "honderd",
    "hun",
    "hunne",
    "ieder",
    "iedere",
    "iedereen",
    "iemand",
    "iets",
    "ik",
    "ikzelf",
    "in",
    "inderdaad",
    "inmiddels",
    "intussen",
    "inzake",
    "is",
    "ja",
    "je",
    "jezelf",
    "jij",
    "jijzelf",
    "jou",
    "jouw",
    "jouwe",
    "juist",
    "jullie",
    "kan",
    "klaar",
    "kon",
    "konden",
    "krachtens",
    "kun",
    "kunnen",
    "kunt",
    "laatst",
    "later",
    "liever",
    "lijken",
    "lijkt",
    "maak",
    "maakt",
    "maakte",
    "maakten",
    "maar",
    "mag",
    "maken",
    "me",
    "meer",
    "meest",
    "meestal",
    "men",
    "met",
    "mevr",
    "mezelf",
    "mij",
    "mijn",
    "mijnent",
    "mijner",
    "mijzelf",
    "minder",
    "miss",
    "misschien",
    "missen",
    "mits",
    "mocht",
    "mochten",
    "moest",
    "moesten",
    "moet",
    "moeten",
    "mogen",
    "mr",
    "mrs",
    "mw",
    "na",
    "naar",
    "nadat",
    "nam",
    "namelijk",
    "nee",
    "neem",
    "negen",
    "nemen",
    "nergens",
    "net",
    "niemand",
    "niet",
    "niets",
    "niks",
    "noch",
    "nochtans",
    "nog",
    "nogal",
    "nooit",
    "nu",
    "nv",
    "of",
    "ofschoon",
    "om",
    "omdat",
    "omhoog",
    "omlaag",
    "omstreeks",
    "omtrent",
    "omver",
    "ondanks",
    "onder",
    "ondertussen",
    "ongeveer",
    "ons",
    "onszelf",
    "onze",
    "onzeker",
    "ooit",
    "ook",
    "op",
    "opnieuw",
    "opzij",
    "over",
    "overal",
    "overeind",
    "overige",
    "overigens",
    "paar",
    "pas",
    "per",
    "precies",
    "recent",
    "redelijk",
    "reeds",
    "rond",
    "rondom",
    "samen",
    "sedert",
    "sinds",
    "sindsdien",
    "slechts",
    "sommige",
    "spoedig",
    "steeds",
    "tamelijk",
    "te",
    "tegen",
    "tegenover",
    "tenzij",
    "terwijl",
    "thans",
    "tien",
    "tiende",
    "tijdens",
    "tja",
    "toch",
    "toe",
    "toen",
    "toenmaals",
    "toenmalig",
    "tot",
    "totdat",
    "tussen",
    "twee",
    "tweede",
    "u",
    "uit",
    "uitgezonderd",
    "uw",
    "vaak",
    "vaakwat",
    "van",
    "vanaf",
    "vandaan",
    "vanuit",
    "vanwege",
    "veel",
    "veeleer",
    "veertig",
    "verder",
    "verscheidene",
    "verschillende",
    "vervolgens",
    "via",
    "vier",
    "vierde",
    "vijf",
    "vijfde",
    "vijftig",
    "vol",
    "volgend",
    "volgens",
    "voor",
    "vooraf",
    "vooral",
    "vooralsnog",
    "voorbij",
    "voordat",
    "voordezen",
    "voordien",
    "voorheen",
    "voorop",
    "voorts",
    "vooruit",
    "vrij",
    "vroeg",
    "waar",
    "waarom",
    "waarschijnlijk",
    "wanneer",
    "want",
    "waren",
    "was",
    "wat",
    "we",
    "wederom",
    "weer",
    "weg",
    "wegens",
    "weinig",
    "wel",
    "weldra",
    "welk",
    "welke",
    "werd",
    "werden",
    "werder",
    "wezen",
    "whatever",
    "wie",
    "wiens",
    "wier",
    "wij",
    "wijzelf",
    "wil",
    "wilden",
    "willen",
    "word",
    "worden",
    "wordt",
    "zal",
    "ze",
    "zei",
    "zeker",
    "zelf",
    "zelfde",
    "zelfs",
    "zes",
    "zeven",
    "zich",
    "zichzelf",
    "zij",
    "zijn",
    "zijne",
    "zijzelf",
    "zo",
    "zoals",
    "zodat",
    "zodra",
    "zonder",
    "zou",
    "zouden",
    "zowat",
    "zulk",
    "zulke",
    "zullen",
    "zult"]

In [4]:
from nltk.corpus import stopwords
from nltk.stem.snowball import DutchStemmer
import nltk

import string

# Loading all users interaction data
full_data = pd.read_csv("../full_data.csv")

# Loading all content data
content = pd.read_csv("../content_clean_new.csv")

# The descriptions of our dataset are in Dutch, so we need to use the stopwords in Dutch
nltk.download("stopwords")
dutch_stopwords = stopwords.words('dutch')
dutch_stopwords.extend(STOPWORDS)
dutch_stopwords = set(dutch_stopwords)

K_VALUES = {0: 3, 0.5: 5, 1: 7}

def clean_text(text):
  # Convert to lowercase
  text = text.lower()
  # Remove punctuation
  text = text.translate(str.maketrans('', '', string.punctuation))
  return text


def get_cosine_similarity(df, last_seen):
  # Descriptions to compare
  df = df.copy()

  stemmer = DutchStemmer()
  df['processed_description'] = df['Description'].apply(lambda x: ' '.join([stemmer.stem(word) for word in clean_text(x).split() if word not in dutch_stopwords]))

  # Calculate cosine similarity
  vectorizer = TfidfVectorizer(max_df=0.4, min_df=20)
  tfidf_matrix = vectorizer.fit_transform(df['processed_description'])

  selected_row_index = df[df['item_id'] == last_seen].index[0]

  df['cosine_similarity'] = cosine_similarity(tfidf_matrix[selected_row_index], tfidf_matrix).flatten()
  
  return df

# Get set from series tags
def get_tag_sets(series):
  return series.str.replace("[\]\']", "", regex=True).str.replace(" ", "").str.split(",").apply(lambda x: set(x) if isinstance(x, list) else x)

# Get jaccard similarity
def get_jaccard_similarity(df, last_seen):
  df = df.copy()
  df["sets"] = get_tag_sets(df["Tags"])
  
  # Tags to compare
  tags = df.loc[df["item_id"]== last_seen, "sets"].iloc[0]
  df["jaccard_similarity"] = df["sets"].apply(lambda x: len(x.intersection(tags))/ len(x.union(tags)))
  
  return df

# Normalize a dataframe by subtracting the mean value
def norm(dfx):
  dfx["mean"] = dfx.mean(axis = 1)
  norm = dfx.sub(dfx["mean"], axis = 0)
  norm.drop("mean", axis = 1, inplace = True)
  return (norm)

def track_format(df):
  recs = [{
      "image": row["Small_Image"],
      "large_image": row["Large_Image"],
      "item_id": row["item_id"],
      "title": row["Name"],
      "description":  row["Description"],
      "tags": row["Tags"],
      "category": row["Type"]} for _, row in df.iterrows()]
  return recs

# Tag list counter function
def type_counter(df_series, colab_recomend_df): 
  # Count the number of items by type and then order the colaborative recommendation list from "the most"  favourite 
  # content type to the least favourite content type
  top_tags = df_series.value_counts().sort_values(ascending=False)
  cat_order = pd.CategoricalDtype(categories=top_tags.index.tolist(), ordered=True)
  # Set "Type" column as categorical with defined order
  colab_recomend_df['Type'] = colab_recomend_df['Type'].astype(cat_order)
  # Sort dataframe by "Type" column
  return colab_recomend_df.sort_values('Type')


# Tag list counter function
def tag_counter(df_series, colab_recomend_df):
  colab_recomend_df = colab_recomend_df.copy()
  # Create a list of all tags in the dataframe
  tags = []
  for _, row in df_series.iteritems():
    tags.extend(eval(row))

  # Count the frequency of each tag
  tag_counts = pd.Series(tags).value_counts()
  tag_list = tag_counts.index.to_list()
  
  # create a weight variable that decreases as you move down the tag list
  tag_weight = {tag_list[i]: len(tag_list) - i for i in range(len(tag_list))}

  # count the number of common tags in each row and weight them
  colab_recomend_df['Tags'] = colab_recomend_df['Tags'].apply(lambda x: eval(x))
  colab_recomend_df['common_tags_weighted'] = colab_recomend_df['Tags'].apply(lambda x: sum([tag_weight[tag] for tag in x if tag in tag_list])/len(x))

  # sort the dataframe by the weighted number of common tags in descending order
  colab_recomend_df = colab_recomend_df.sort_values('common_tags_weighted', ascending=False)
  return colab_recomend_df

def collaborative_filtering(user_data, user_data_not_norm, user_id, diversity_level):
  k = K_VALUES.get(diversity_level, 3)

  target_user_info = user_data.iloc[user_id].values
  distances = cosine_similarity(user_data, [target_user_info]).flatten()
  distances_with_indices = list(enumerate(distances))
  distances_with_indices.sort(key=lambda x: x[1], reverse=False)
  top_k_indices = [i for i, _ in distances_with_indices[1:k+1]]

  # Get list of items that the target user has not viewed
  target_user_row = user_data_not_norm["view"].iloc[user_id]
  items_to_rate = target_user_row[target_user_row == 0].index

  # Calculate mean interaction score for not viewed items from the group of similar users
  item_ratings = []
  for item in items_to_rate:
    ratings = []
    for user_id in top_k_indices:
      user_row = user_data_not_norm.iloc[user_id]
      rating1 = user_row["rating"][item]
      rating2 = user_row["shared"][item]
      rating3 = user_row["prev"][item]
      ratings.extend([rating1, rating2, rating3])
    
    ratings = [r for r in ratings if r > 0]
    if ratings:
      mean = sum(ratings) / len(ratings)
      item_ratings.append((item, mean))
    
  # Sort items by mean rating and return top recommendations
  item_ratings.sort(key=lambda x: x[1], reverse=True)
  collab_recommendations = [item for item, _ in item_ratings[:]]

  return pd.DataFrame(collab_recommendations, columns=["item_id"])


# Content based recommendations
def content_based_filter(colab_recomend_df, user_data_not_norm, user_id):
  # Get list of positive interacted content
  target_user_row = user_data_not_norm.iloc[user_id]
  cols_with_1 = target_user_row[target_user_row == 1].index
  tags_rated_pos = [int(re.findall('\d+', str(col))[0]) for col in cols_with_1]
  positive_df = pd.DataFrame(set(tags_rated_pos), columns=["item_id"])

  # Data content enrichment
  colab_recomend_df = colab_recomend_df.merge(content, how = "inner", on = "item_id")
  positive_df = positive_df.merge(content, how = "inner", on = "item_id")
  
  colab_recomend_df = type_counter(positive_df["Type"], colab_recomend_df)
  
  colab_recomend_df = tag_counter(positive_df["Tags"], colab_recomend_df)

  return colab_recomend_df.sort_values(["Type", "common_tags_weighted"], ascending=[True, False])

[nltk_data] Downloading package stopwords to /Users/abril/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [5]:
#Function to make all tags a set
def make_set(recom_list):
  for element in recom_list:
    element["tags"] = set(element["tags"])
  return recom_list


#Function to calculate similarity between tags
def jaccard_sim(ele_1, ele_2):
  return len(ele_1["tags"].intersection(ele_2["tags"]))/ len(ele_1["tags"].union(ele_2["tags"]))


#Function to calculate similarity between types
def type_sim(ele_1, ele_2):
  return ele_1["category"]==ele_2["category"]


#Function to calculate similarity between desxcriptions
def cosine_description(ele_1,ele_2):
  df = pd.DataFrame({'Description': [ele_1["description"], ele_2["description"]]})
  
  stemmer = DutchStemmer()
  df['processed_description'] = df['Description'].apply(lambda x: ' '.join([stemmer.stem(word) for word in clean_text(x).split() if word not in dutch_stopwords]))

  # Calculate cosine similarity
  vectorizer = TfidfVectorizer()
  tfidf_matrix = vectorizer.fit_transform(df['processed_description'])
    
  # Calculating cosine similarity
  df['cosine_similarity'] = cosine_similarity(tfidf_matrix[0], tfidf_matrix).flatten()

  cosine_sim = df.loc[1,'cosine_similarity'] / 0.1
  cosine_sim = 1 if cosine_sim > 1 else cosine_sim

  return cosine_sim


def collaborative_filtering_relevance(user_data, user_data_not_norm, user_id):
  k = 5 # top 5 similar users to get ideas of posible relevant items

  target_user_info = user_data.iloc[user_id].values
  distances = cosine_similarity(user_data, [target_user_info]).flatten()
  distances_with_indices = list(enumerate(distances))
  distances_with_indices.sort(key=lambda x: x[1], reverse=False)
  top_k_indices = [i for i, _ in distances_with_indices[1:k+1]]

  # Get list of items that the target user has not viewed
  target_user_row = user_data_not_norm["view"].iloc[user_id]
  items_to_rate = target_user_row.index

  # Calculate mean interaction score for not viewed items from the group of similar users
  item_ratings = []
  for item in items_to_rate:
    ratings = []
    for user_id in top_k_indices:
      user_row = user_data_not_norm.iloc[user_id]
      rating1 = user_row["rating"][item]
      rating2 = user_row["shared"][item]
      rating3 = user_row["prev"][item]
      ratings.extend([rating1, rating2, rating3])
    
    
    if ratings:
      mean = sum(ratings) / len(ratings)
      item_ratings.append((item, mean))
    
  # Sort items by mean rating and return top recommendations
  item_ratings.sort(key=lambda x: x[1], reverse=True)
  collab_recommendations = [item for item, _ in item_ratings[:]]

  return pd.DataFrame(collab_recommendations, columns=["item_id"])


#Function to get revelance of an element for the user
def get_relevance_list(id):
  # Finding interactions by user id
  interactions = list(find_all_interactions_history({"user_id": id}))

  # Preparing DataFrame
  df = pd.pivot_table(full_data, index = "user_id", columns = "item_id").fillna(0)

  # Transform result to DataFrame
  if interactions:
    interactions = pd.DataFrame(interactions).drop("_id", axis = 1) 
    interactions = interactions.merge(content, on = "item_id")

    for _, row in interactions.iterrows():
      if row["type"] == "play":
        df["view"].loc[id, row["item_id"]] = row["value"]
      elif row["type"] == "review":
        df["rating"].loc[id, row["item_id"]] = row["value"]
      elif row["type"] == "share":
        df["shared"].loc[id, row["item_id"]] = row["value"]
      elif row["type"] == "preview":
        df["prev"].loc[id, row["item_id"]] = row["value"]

  # Normalizing ratings, shares and previews
  rating_norm = norm(df["rating"])
  shared_norm = norm(df["shared"])
  prev_norm = norm(df["prev"])

  # Union of all datasets
  user_data = pd.concat([df["view"], rating_norm, shared_norm, prev_norm], axis = 1)

  # Collaborative filtering part
  colab_recomend_df = collaborative_filtering_relevance(user_data, df, id)

  # Content based filtering part
  content_recomend_df = content_based_filter(colab_recomend_df, df, id)
        
  #Get the position of the item id
  content_recomend_df.reset_index(inplace = True, drop = True)
      
  return content_recomend_df
        
     
def get_relevance(recoms, rel_list):
  relevant_count = 0
  for i in recoms:
    pos =  rel_list[rel_list['item_id']== i['item_id']].index.values.astype(int)[0]
    prob = (len(rel_list)-pos) / (len(rel_list)-1)
    
    #Relevant or not relevant item with prob
    rel = choices([0, 1], weights=(1-prob, prob),k=1)[0]
    relevant_count = relevant_count + rel
    
  # Calculate list avg rel
  return relevant_count/len(recoms)


#Defining primitive preditor function
def get_primitive():
  # Preparing DataFrame
  df = full_data.copy()

  # Order content by number of views and rating
  grouped_df = df.groupby('item_id').agg({'view': ['sum'], 'rating': ['mean']})
  C = grouped_df[('rating', 'mean')].mean()
  m = grouped_df[('view', 'sum')].quantile(0.70)

  R = grouped_df[('rating', 'mean')].values
  v = grouped_df[('view', 'sum')].values
  weights = (R + C) / (v + m)

  grouped_df["weight"] = weights

  grouped_df = grouped_df.sort_values("weight", ascending=False)

  # Get content information to return recommendations to front
  grouped_df = grouped_df.merge(content, how = "inner", on = "item_id")
  normal_recommendations = track_format(grouped_df.head(25))

  prim_list = []
  for i in normal_recommendations:
    prim_list.append(i["item_id"])
  return prim_list


def get_expected_items(id):    
  # Finding interactions by user id
  interactions = list(find_all_interactions_history({"user_id": id}))

  # Preparing DataFrame
  df = pd.pivot_table(full_data, index = "user_id", columns = "item_id").fillna(0)

  # Transform result to DataFrame
  if interactions:
    interactions = pd.DataFrame(interactions).drop("_id", axis = 1) 
    interactions = interactions.merge(content, on = "item_id")
    
    for _, row in interactions.iterrows():
      if row["type"] == "play":
        df["view"].loc[id, row["item_id"]] = row["value"]
      elif row["type"] == "review":
        df["rating"].loc[id, row["item_id"]] = row["value"]
      elif row["type"] == "share":
        df["shared"].loc[id, row["item_id"]] = row["value"]
      elif row["type"] == "preview":
        df["prev"].loc[id, row["item_id"]] = row["value"]

 
  # Get list of positive interacted content (expectd items)
  target_user_row = df.iloc[id]
  cols_with_1 = target_user_row[target_user_row == 1].index
  tags_rated_pos = [int(re.findall('\d+', str(col))[0]) for col in cols_with_1]
  positive_items = set(tags_rated_pos)
  
  # List to append all the known and expectec content
  list_expected = []
  
  # Get the closest content to user interactions
  for item in positive_items:
      # Get jaccard distance between tags
      content_dis = get_jaccard_similarity(content,item)
      
      type_item = content.loc[content["item_id"]==item].reset_index()
        
      # Content type check
      content_dis["Type_sim"] = content_dis["Type"] == type_item.loc[0,"Type"]
    
      # Get levshtein distance between descriptions, sorting
      content_lev = get_cosine_similarity(content_dis, item).sort_values(by=["Type_sim","jaccard_similarity", "cosine_similarity"], ascending = [False, False, False])

      # Removing same item  
      content_lev = content_lev.loc[content_lev["item_id"] != item]
    
      # Appending item + 5 most similar item
      list_expected.append(item)
      for i in range(5):
          list_expected.append(content_lev.iloc[i]["item_id"])
  
  #Returning list with expected items
  return list_expected
    

#Function to calculate metrics score of each list
def get_scores(recoms,id):
  list_recommendations = make_set(recoms) 
  
  # Get relevance list
  list_rel = get_relevance_list(id)
  
  #Get relevance of the recomendation list
  relevance = get_relevance(recoms,list_rel)
    
  # Get sum
  sum_div = 0
    
  primitive = set(get_primitive())
  # List of expected items
  expected = set(get_expected_items(id))
    
  # Getting recommended ids
  rs_list = []
  for i in recoms:
    rs_list.append(i["item_id"])
  rs_set = set(rs_list)
  
  # Primitive union expected
  PE = primitive.union(expected)
  
  #Getting set diference
  unexpected = rs_set.difference(PE)
  
  # Lenght 
  n = len(list_recommendations)
  for i in range (n):
    for j in range (len(list_recommendations)):      
      #Define elements to compare
      ele_1, ele_2 = list_recommendations[i], list_recommendations[j]
            
      #Calculate our diversity measure
      div = (3 - (jaccard_sim(ele_1, ele_2)+(type_sim(ele_1,ele_2))+(cosine_description(ele_1,ele_2))))
      sum_div = sum_div + div
    
    
  #Getting ser of each unexpected item
  ser_sum= len(unexpected)  
  intra_diversity = sum_div /((n/2)* (n-1))
  serindipity = (ser_sum/n) * relevance
  return intra_diversity,relevance,serindipity


def get_random_rs():
  random_list =  shuffle(content).reset_index()
  recoms = track_format(random_list)[:15]
  return recoms


def get_personalised_recommendations(id, diversity_level):
  # Finding interactions by user id
  interactions = list(find_all_interactions_history({"user_id": id}))

  # Preparing DataFrame
  df = pd.pivot_table(full_data, index = "user_id", columns = "item_id").fillna(0)

  # Transform result to DataFrame
  if interactions:
    interactions = pd.DataFrame(interactions).drop("_id", axis = 1) 
    interactions = interactions.merge(content, on = "item_id")
    
    for _, row in interactions.iterrows():
      if row["type"] == "play":
        df["view"].loc[id, row["item_id"]] = row["value"]
      elif row["type"] == "review":
        df["rating"].loc[id, row["item_id"]] = row["value"]
      elif row["type"] == "share":
        df["shared"].loc[id, row["item_id"]] = row["value"]
      elif row["type"] == "preview":
        df["prev"].loc[id, row["item_id"]] = row["value"]

  # Normalizing ratings
  rating_norm = norm(df["rating"])
  # Normalizing shares
  shared_norm = norm(df["shared"])
  # Normalizing previews
  prev_norm = norm(df["prev"])

  # Union of all datasets
  user_data = pd.concat([df["view"], rating_norm, shared_norm, prev_norm], axis = 1)

  # Collaborative filtering part
  colab_recomend_df = collaborative_filtering(user_data, df, id, diversity_level)

  # Content based filtering part
  content_recomend_df = content_based_filter(colab_recomend_df, df, id)
  
  # Giving recommendations the correct format
  normal_recomendations = track_format(content_recomend_df)

  # Low diversity: List is not modified
  if diversity_level == 0:
    normal_recomendations = normal_recomendations[:15]
  
  # Medium diversity: Mix between random and ordered list
  elif diversity_level == 0.5:
    #Half random, Half not modified
    not_random = [0,1,2,3,4,5,6]
    yes_random = random.sample(range(8, len(normal_recomendations)), 8 if len(normal_recomendations) > 15 else len(normal_recomendations) - 8)
    
    list_ran = not_random + yes_random
    random.shuffle(list_ran)
    normal_recomendations = list(np.array(normal_recomendations)[list_ran])
  
  # High diversity: Order list randomly
  elif diversity_level == 1:
    #few not modified, the  rest random
    not_random = [0,1,2]
    yes_random = random.sample(range(3, len(normal_recomendations)), 12 if len(normal_recomendations) > 15 else len(normal_recomendations) - 3)
    
    list_ran = not_random + yes_random
    random.shuffle(list_ran)
    normal_recomendations = list(np.array(normal_recomendations)[list_ran])
  
  return normal_recomendations
    

In [6]:
list_users = random.sample(range(1000), 100)

#To store vars
user = []
l_intra_diversity, l_relevance, l_diversity_rel,l_serendipity  = [],[],[],[]
m_intra_diversity, m_relevance, m_diversity_rel,m_serendipity  = [],[],[],[]
h_intra_diversity, h_relevance, h_diversity_rel,h_serendipity  = [],[],[],[]
r_intra_diversity, r_relevance, r_diversity_rel,r_serendipity  = [],[],[],[]

for id in list_users:
    user.append(id)

    recoms = get_personalised_recommendations(id, 0)
    scores = get_scores(recoms,id)
    # Appending results
    l_intra_diversity.append(scores[0])
    l_relevance.append(scores[1])
    l_diversity_rel.append(scores[0]*scores[1])
    l_serendipity.append(scores[2])
   
    recoms = get_personalised_recommendations(id, 0.5)
    scores = get_scores(recoms,id)
    # Appending results
    m_intra_diversity.append(scores[0])
    m_relevance.append(scores[1])
    m_diversity_rel.append(scores[0]*scores[1])
    m_serendipity.append(scores[2])
   

    recoms = get_personalised_recommendations(id, 1)
    scores = get_scores(recoms,id)
    #Appending results
    h_intra_diversity.append(scores[0])
    h_relevance.append(scores[1])
    h_diversity_rel.append(scores[0]*scores[1])
    h_serendipity.append(scores[2])
   
    recoms = get_random_rs()
    scores = get_scores(recoms,id)
    #Appending results
    r_intra_diversity.append(scores[0])
    r_relevance.append(scores[1])
    r_diversity_rel.append(scores[0]*scores[1])
    r_serendipity.append(scores[2])

In [None]:
results = pd.DataFrame({"user_id": user, "low_intralist_diversity":l_intra_diversity, "med_intralist_diversity":m_intra_diversity, "high_intralist_diversity":h_intra_diversity, "random_intralist_diversity":r_intra_diversity, "low_relevance":l_relevance,"med_relevance":m_relevance , "high_relevance":h_relevance, "random_relevance":r_relevance, "low_diversity*relevance":l_diversity_rel, "med_diversity*relevance":m_diversity_rel, "high_diversity*relevance":h_diversity_rel, "random_diversity*relevance":r_diversity_rel ,"low_serendipity":l_serendipity, "med_serendipity":m_serendipity, "high_serendipity":h_serendipity, "random_serendipity":r_serendipity } )
results.to_csv("results.csv", index= 0)