# Notebook Contents

1. ### Testing `BubbleBurster` instantiation
2. ### Testing evalmetrics - `NoveltyMetric`
3. ### Testing evalmetrics - `SerendipityMetric`
4. ### Testing evalmetrics - `DiversityMetric`
5. ### Testing **all** evalmetrics

# Setup

In [5]:
# some_file.py
import sys
# caution: path[0] is reserved for script path (or '' in REPL)
sys.path.insert(1, '../../t-recs/')
from trecs.models import ContentFiltering
from trecs.metrics import *
from trecs.random import Generator
from trecs.components import Users

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import networkx as nx

random_state = np.random.seed(42)

# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)

ratings_df = pd.read_csv('../../ml-100k/u.data', 
    sep="\t", 
    names=['UserID', 'MovieID', 'Rating', 'Timestamp']
)

movie_cols = ['movie_id', 'title', 'release_date', 'video_release_date', 'IMDb_URL', 'unknown', 'Action', 'Adventure', 'Animation', 'Childrens', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']

movies_df = pd.read_csv('../../ml-100k/u.item', sep="|", names=movie_cols, encoding='latin')

# display(movies_df.head(2))
# print(movies_df.shape)

FileNotFoundError: [Errno 2] File ../ml-100k/u.data does not exist: '../ml-100k/u.data'

In [6]:
from sklearn.cluster import KMeans

def get_topic_clusters(binary_ratings_matrix, n_attrs:int=100, nmf_solver:str="mu"):
    """
    Creates clusters of movies based on their genre.
    Inputs:
        binary_ratings_matrix: a binary matrix of users and movies
        n_attrs: number of attributes to use in NMF
        nmf_solver: solver to use in NMF
    Outputs:
        clusters: a list of cluster assignments
    """
    # Create topic clusters
    #create co-occurence matrix from binary_interaction_matrix
    co_occurence_matrix = binary_ratings_matrix.T @ binary_ratings_matrix
    co_occurence_matrix

    # Matrix factorize co_occurence_matrix to get embeddings
    nmf_cooc = NMF(n_components=n_attrs, solver=nmf_solver)
    W_topics = nmf_cooc.fit_transform(co_occurence_matrix)

    # cluster W_topics
    kmeans = KMeans(n_clusters=100, random_state=random_state).fit(W_topics)

    # assign nearest cluster to observation
    cluster_ids = kmeans.predict(W_topics)

    return cluster_ids

In [9]:
from sklearn.decomposition import NMF

binary_ratings_df = ratings_df.drop(columns=['Timestamp'])
binary_ratings_df.loc[binary_ratings_df['Rating'] > 0, 'Rating'] = 1

# turn dataframe into matrix where each movie is a column and each user is a row
binary_ratings_matrix = binary_ratings_df.pivot(index='UserID', columns='MovieID', values='Rating').fillna(0).to_numpy()

from lightfm.cross_validation import random_train_test_split
from scipy import sparse

# split data into train and test sets
train_interactions, test_interactions = random_train_test_split(sparse.csr_matrix(binary_ratings_matrix), test_percentage=0.2, random_state=random_state)
train_interactions = train_interactions.toarray()
test_interactions = test_interactions.toarray()

n_attrs=100
nmf = NMF(n_components=n_attrs, solver="mu")
user_representation = nmf.fit_transform(binary_ratings_matrix)
item_representation = nmf.components_
print(user_representation.shape, item_representation.shape)

num_topics = None
item_topics = get_topic_clusters(binary_ratings_matrix, n_attrs=n_attrs, nmf_solver="mu")
user_topic_history = None
item_count = None

from models.bubble import BubbleBurster

users = Users(size=(943,100), repeat_interactions=False)

# Testing `BubbleBurster` model instantiation

In [10]:
recsys = BubbleBurster(
    # num_users=number_of_users,
    # num_items=num_items,
    # num_attributes=number_of_attributes,
    item_topics=item_topics,
    user_representation=user_representation,
    item_representation=item_representation,
    actual_user_representation=users,
    record_base_state=True,
)

In [11]:
print(recsys.num_topics)
print(recsys.item_topics.shape)
print(recsys.user_topic_history.shape)
print(np.unique(recsys.user_topic_history))
print(recsys.item_count.shape) 

100
(1682,)
(943, 100)
[0.]
(1682,)


In [12]:
recsys.add_metrics(MSEMeasurement(), InteractionSpread(), AverageFeatureScoreRange())
print("These are the current metrics:")
print(recsys.metrics)

These are the current metrics:
[<trecs.metrics.measurement.MSEMeasurement object at 0x166dd6970>, <trecs.metrics.measurement.InteractionSpread object at 0x165135640>, <trecs.metrics.measurement.AverageFeatureScoreRange object at 0x1670f5c70>]


In [13]:
# now we run the model
recsys.run(timesteps=1)
measurements = recsys.get_measurements()

100%|██████████| 1/1 [00:03<00:00,  3.50s/it]


**-> Model successfully runs for 1 timestep**

In [14]:
# keys = recsys.__dict__.keys()
# vals = recsys.__dict__.values()

# for k, v in zip(keys,vals):
#     print(k)

In [15]:
def state_update(recommender, item_count, user_topic_history, item_topics):
    interacted_items = recommender.interactions
    # Updating `item_count`
    interacted_item_val, interacted_item_count = np.unique(interacted_items, return_counts=True)
    
    item_count[interacted_item_val] += interacted_item_count

    # Updating `user_topic_history`
    interacted_topics = np.expand_dims(recommender.item_topics[interacted_items], axis=1)
    old_topic_count = np.take_along_axis(user_topic_history, interacted_topics, axis=1)
    np.put_along_axis(user_topic_history, interacted_topics, old_topic_count+1, axis=1)
    return item_count, user_topic_history

In [16]:
test_item_count = np.zeros(recsys.num_items)
test_user_topic_history = np.zeros((recsys.num_users, recsys.num_topics))

test_item_count, test_user_topic_history = state_update(recsys, test_item_count, test_user_topic_history, item_topics)

In [17]:
print(recsys.item_count[:5])

[15  0  0  1  0]


In [18]:
print(np.array_equal(recsys.item_count, test_item_count))
print(np.array_equal(recsys.user_topic_history, test_user_topic_history))

True
True


**-> Two values are equal after 1 iteration**

In [19]:
# now we run the model
recsys.run(timesteps=1)
measurements = recsys.get_measurements()

100%|██████████| 1/1 [00:03<00:00,  3.56s/it]


In [20]:
test_item_count, test_user_topic_history = state_update(recsys, test_item_count, test_user_topic_history, item_topics)
print(np.array_equal(recsys.item_count, test_item_count))
print(np.array_equal(recsys.user_topic_history, test_user_topic_history))

True
True


**-> The two values are equal after a second iteration**

In [21]:
for key in measurements.keys():
    print(key, measurements[key])

mse [0.21272475127742968, 0.16140467867966388, 0.1557236005498112]
interaction_spread [None, -899.0, -18.0]
afsr [None, 9.116729286899385, 7.859449826696249]
timesteps [0 1 2]


In [22]:
bubble = BubbleBurster(
    # num_users=number_of_users,
    # num_items=num_items,
    # num_attributes=number_of_attributes,
    item_topics=item_topics,
    user_representation=user_representation,
    item_representation=item_representation,
    actual_user_representation=users,
    record_base_state=True,
)

In [23]:
bubble.add_metrics(MSEMeasurement(), InteractionSpread(), AverageFeatureScoreRange())
print("These are the current metrics:")
print(recsys.metrics)

These are the current metrics:
[<trecs.metrics.measurement.MSEMeasurement object at 0x166dd6970>, <trecs.metrics.measurement.InteractionSpread object at 0x165135640>, <trecs.metrics.measurement.AverageFeatureScoreRange object at 0x1670f5c70>]


In [24]:
bubble.startup_and_train(timesteps=100)

100%|██████████| 100/100 [00:03<00:00, 28.23it/s]


In [25]:
for key in bubble.get_measurements().keys():
    print(key, bubble.get_measurements()[key][-1:-5:-1])

mse [0.1557236005498112, 0.1557236005498112, 0.1557236005498112, 0.1557236005498112]
interaction_spread [0.5, 0.0, 0.0, 0.0]
afsr [3.4217124712473077, 3.344723354792558, 3.2209218023625223, 3.332879284484277]
timesteps [100  99  98  97]


# Testing evalmetrics - `NoveltyMetric`

In [26]:
bubble = BubbleBurster(
    # num_users=number_of_users,
    # num_items=num_items,
    # num_attributes=number_of_attributes,
    item_topics=item_topics,
    user_representation=user_representation,
    item_representation=item_representation,
    actual_user_representation=users,
    record_base_state=True,
)

In [28]:
from metrics import NoveltyMetric

bubble.add_metrics(MSEMeasurement(), NoveltyMetric())
print("These are the current metrics:")
print(bubble.metrics)

These are the current metrics:
[<trecs.metrics.measurement.MSEMeasurement object at 0x1670f5f40>, <metrics.evaluation_metrics.NoveltyMetric object at 0x167153130>]


In [29]:
bubble.run(timesteps=1)
measurements = bubble.get_measurements()

  slate_items_self_info = (-1) * np.log(np.divide(mask_slate_items_self_info, recommender.num_users))
100%|██████████| 1/1 [00:03<00:00,  3.68s/it]


In [30]:
slate_items_self_info = bubble.item_count[bubble.items_shown]
mask_slate_items_self_info = np.ma.masked_where(slate_items_self_info <= 0, slate_items_self_info)

slate_items_self_info = (-1) * np.log(np.divide(mask_slate_items_self_info, bubble.num_users))

slate_items_pred_score = np.take_along_axis(bubble.users.actual_user_scores.value, bubble.items_shown, axis=1)#.shape

item_novelty = np.multiply(slate_items_self_info, slate_items_pred_score)

slate_novelty = np.divide(np.sum(item_novelty, axis=1), bubble.num_items_per_iter)
print(slate_novelty.shape)

novelty = np.mean(slate_novelty)
print(novelty)

(943,)
3.126147852561864


  slate_items_self_info = (-1) * np.log(np.divide(mask_slate_items_self_info, bubble.num_users))


In [31]:
for key in bubble.get_measurements().keys():
    print(key, bubble.get_measurements()[key])

mse [0.14034090977625432, 0.13633168043415414]
novelty_metric [None, 3.126147852561864]
timesteps [0 1]


In [32]:
print("Is novelty equal? ", measurements['novelty_metric'][-1] == novelty)

Is novelty equal?  True


In [33]:
bubble.run(timesteps=1)
measurements = bubble.get_measurements()

100%|██████████| 1/1 [00:03<00:00,  3.48s/it]


In [34]:
slate_items_self_info = bubble.item_count[bubble.items_shown]
mask_slate_items_self_info = np.ma.masked_where(slate_items_self_info <= 0, slate_items_self_info)

slate_items_self_info = (-1) * np.log(np.divide(mask_slate_items_self_info, bubble.num_users))

slate_items_pred_score = np.take_along_axis(bubble.users.actual_user_scores.value, bubble.items_shown, axis=1)#.shape

item_novelty = np.multiply(slate_items_self_info, slate_items_pred_score)

slate_novelty = np.divide(np.sum(item_novelty, axis=1), bubble.num_items_per_iter)
print(slate_novelty.shape)

novelty = np.mean(slate_novelty)
print(novelty)

(943,)
2.499094886370202


  slate_items_self_info = (-1) * np.log(np.divide(mask_slate_items_self_info, bubble.num_users))


In [35]:
print("Is novelty equal? ", measurements['novelty_metric'][-1] == novelty)

Is novelty equal?  True


In [36]:
bubble.run(timesteps=1)
measurements = bubble.get_measurements()

100%|██████████| 1/1 [00:03<00:00,  3.67s/it]


In [37]:
slate_items_self_info = bubble.item_count[bubble.items_shown]
mask_slate_items_self_info = np.ma.masked_where(slate_items_self_info <= 0, slate_items_self_info)

slate_items_self_info = (-1) * np.log(np.divide(mask_slate_items_self_info, bubble.num_users))

slate_items_pred_score = np.take_along_axis(bubble.users.actual_user_scores.value, bubble.items_shown, axis=1)#.shape

item_novelty = np.multiply(slate_items_self_info, slate_items_pred_score)

slate_novelty = np.divide(np.sum(item_novelty, axis=1), bubble.num_items_per_iter)
print(slate_novelty.shape)

novelty = np.mean(slate_novelty)
print(novelty)

(943,)
2.3310050157443016


  slate_items_self_info = (-1) * np.log(np.divide(mask_slate_items_self_info, bubble.num_users))


In [38]:
print("Is novelty equal? ", measurements['novelty_metric'][-1] == novelty)

Is novelty equal?  True


In [39]:
for key in bubble.get_measurements().keys():
    print(key, bubble.get_measurements()[key])

mse [0.14034090977625432, 0.13633168043415414, 0.13601623720950623, 0.13628298776653813]
novelty_metric [None, 3.126147852561864, 2.499094886370202, 2.3310050157443016]
timesteps [0 1 2 3]


# Testing evalmetrics - `SerendipityMetric`

In [40]:
bubble = BubbleBurster(
    # num_users=number_of_users,
    # num_items=num_items,
    # num_attributes=number_of_attributes,
    item_topics=item_topics,
    user_representation=user_representation,
    item_representation=item_representation,
    actual_user_representation=users,
    record_base_state=True,
)

In [41]:
from metrics import SerendipityMetric

bubble.add_metrics(MSEMeasurement(), SerendipityMetric())
print("These are the current metrics:")
print(bubble.metrics)

These are the current metrics:
[<trecs.metrics.measurement.MSEMeasurement object at 0x167188580>, <metrics.evaluation_metrics.SerendipityMetric object at 0x165135730>]


In [42]:
bubble.run(timesteps=1)
measurements = bubble.get_measurements()

100%|██████████| 1/1 [00:05<00:00,  5.06s/it]


In [43]:
for key in bubble.get_measurements().keys():
    print(key, bubble.get_measurements()[key])

mse [0.13628298776653813, 0.1434635384740876]
serendipity_metric [None, 0.9003181336161188]
timesteps [0 1]


In [44]:
# Indices for the items shown
items_shown = bubble.items_shown
# Scores for the items shown
user_scores = bubble.users.actual_user_scores.value
# Boolean matrix where value=1 for the shown items that have a score greater than 0
user_scores_items_shown = np.take_along_axis(bubble.users.actual_user_scores.value, bubble.items_shown, axis=1) > 0
# Topics that correspond to each item shown
topics_shown = bubble.item_topics[bubble.items_shown]
# Boolean matrix where value=1 if the topic shown is not in the user history, otherwise value=0
new_topics = np.apply_along_axis(np.isin, 1, topics_shown, bubble.user_topic_history, invert=True)
# calculate serendipity for all items presented to each user
items_shown_serendipity = np.multiply(new_topics, user_scores_items_shown)
# Calculate average serendipity - average serendipity by slate AND users
serendipity = np.mean(items_shown_serendipity)#np.sum(np.multiply(new_topics, user_scores_items_shown)) / bubble.num_users
print("serendipity = ", serendipity)
# # to complete the measurement, call `self.observe(metric_value)`

serendipity =  0.9003181336161188


In [45]:
print("Is serendipity equal? ", measurements['serendipity_metric'][-1] == serendipity)

Is serendipity equal?  True


In [46]:
bubble.run(timesteps=1)
measurements = bubble.get_measurements()

100%|██████████| 1/1 [00:05<00:00,  5.01s/it]


In [47]:
# Indices for the items shown
items_shown = bubble.items_shown
print(items_shown.shape)
# Scores for the items shown
user_scores = bubble.users.actual_user_scores.value
print(user_scores.shape)
# Scores for just the shown items that have a score greater than 0
user_scores_items_shown = np.take_along_axis(bubble.users.actual_user_scores.value, bubble.items_shown, axis=1) > 0
print(user_scores_items_shown.shape)
# Topics that correspond to each item shown
topics_shown = bubble.item_topics[bubble.items_shown]
print(topics_shown.shape)
"""
Need to update the below 2 lines depending on how user_topic_history is implemented in the wrapper class
"""
# Boolean matrix where value=1 if the topic shown is not in the user history, otherwise value=0
new_topics = np.apply_along_axis(np.isin, 1, topics_shown, bubble.user_topic_history, invert=True)
print(new_topics.shape)

# # calculate serendipity for all items presented to each user
items_shown_serendipity = np.multiply(new_topics, user_scores_items_shown)
print(items_shown_serendipity.shape)
# Calculate average serendipity - average serendipity by slate AND users
serendipity = np.mean(items_shown_serendipity)#np.sum(np.multiply(new_topics, user_scores_items_shown)) / bubble.num_users
print(serendipity)
# # to complete the measurement, call `self.observe(metric_value)`

(943, 10)
(943, 1682)
(943, 10)
(943, 10)
(943, 10)
(943, 10)
0.8336161187698834


In [48]:
print("Is serendipity equal? ", measurements['serendipity_metric'][-1] == serendipity)

Is serendipity equal?  True


In [49]:
bubble.run(timesteps=1)
measurements = bubble.get_measurements()

100%|██████████| 1/1 [00:05<00:00,  5.16s/it]


In [50]:
# Indices for the items shown
items_shown = bubble.items_shown
print(items_shown.shape)
# Scores for the items shown
user_scores = bubble.users.actual_user_scores.value
print(user_scores.shape)
# Scores for just the shown items that have a score greater than 0
user_scores_items_shown = np.take_along_axis(bubble.users.actual_user_scores.value, bubble.items_shown, axis=1) > 0
print(user_scores_items_shown.shape)
# Topics that correspond to each item shown
topics_shown = bubble.item_topics[bubble.items_shown]
print(topics_shown.shape)
"""
Need to update the below 2 lines depending on how user_topic_history is implemented in the wrapper class
"""
# Boolean matrix where value=1 if the topic shown is not in the user history, otherwise value=0
new_topics = np.apply_along_axis(np.isin, 1, topics_shown, bubble.user_topic_history, invert=True)
print(new_topics.shape)

# # calculate serendipity for all items presented to each user
items_shown_serendipity = np.multiply(new_topics, user_scores_items_shown)
print(items_shown_serendipity.shape)
# Calculate average serendipity - average serendipity by slate AND users
serendipity = np.mean(items_shown_serendipity)#np.sum(np.multiply(new_topics, user_scores_items_shown)) / bubble.num_users
print(serendipity)
# # to complete the measurement, call `self.observe(metric_value)`

(943, 10)
(943, 1682)
(943, 10)
(943, 10)
(943, 10)
(943, 10)
0.8148462354188759


In [51]:
print("Is serendipity equal? ", measurements['serendipity_metric'][-1] == serendipity)

Is serendipity equal?  True


In [52]:
for key in bubble.get_measurements().keys():
    print(key, bubble.get_measurements()[key])

mse [0.13628298776653813, 0.1434635384740876, 0.1446242331501705, 0.14611490283693482]
serendipity_metric [None, 0.9003181336161188, 0.8336161187698834, 0.8148462354188759]
timesteps [0 1 2 3]


In [53]:
new_topics.size == np.count_nonzero((new_topics==0) | (new_topics==1))

True

# Testing evalmetrics - `DiversityMetric`

In [54]:
bubble = BubbleBurster(
    # num_users=number_of_users,
    # num_items=num_items,
    # num_attributes=number_of_attributes,
    item_topics=item_topics,
    user_representation=user_representation,
    item_representation=item_representation,
    # actual_user_representation=users,
    record_base_state=True,
)

In [55]:
from metrics import DiversityMetric

bubble.add_metrics(MSEMeasurement(), DiversityMetric())
print("These are the current metrics:")
print(bubble.metrics)

These are the current metrics:
[<trecs.metrics.measurement.MSEMeasurement object at 0x167188e80>, <metrics.evaluation_metrics.DiversityMetric object at 0x167188580>]


In [56]:
bubble.run(timesteps=1)
measurements = bubble.get_measurements()

100%|██████████| 1/1 [00:04<00:00,  4.41s/it]


In [57]:
for key in measurements.keys():
    print(key, measurements[key])

mse [0.17605366183227583, 0.16486793352934279]
diversity_metric [None, 0.9087427830800049]
timesteps [0 1]


In [58]:
from itertools import combinations

# Getting all possible 2-item combinations (the indices) for the items in a slate
combos = combinations(np.arange(bubble.num_items_per_iter), 2)
items_shown = bubble.items_shown

topic_similarity = np.zeros(bubble.num_users)

stop = 0
for i in combos:
    # topic_similarity is equal to the number of 2-item combinations in which the items' topics are the same
    item_pair = items_shown[:, i]
    topic_pair = bubble.item_topics[item_pair]
    topic_similarity += (topic_pair[:,0] == topic_pair[:,1])

slate_diversity = 1 - ((1 / (bubble.num_items_per_iter * (bubble.num_items_per_iter-1))) * topic_similarity)
diversity = np.mean(slate_diversity)
print(diversity)

0.9087427830800049


In [59]:
print("Is diversity equal? ", measurements['diversity_metric'][-1] == diversity)

In [60]:
bubble.run(timesteps=1)
measurements = bubble.get_measurements()

100%|██████████| 1/1 [00:03<00:00,  3.61s/it]


In [61]:
# Getting all possible 2-item combinations (the indices) for the items in a slate
combos = combinations(np.arange(bubble.num_items_per_iter), 2)
items_shown = bubble.items_shown

topic_similarity = np.zeros(bubble.num_users)

stop = 0
for i in combos:
    # topic_similarity is equal to the number of 2-item combinations in which the items' topics are the same
    item_pair = items_shown[:, i]
    topic_pair = bubble.item_topics[item_pair]
    topic_similarity += (topic_pair[:,0] == topic_pair[:,1])

slate_diversity = 1 - ((1 / (bubble.num_items_per_iter * (bubble.num_items_per_iter-1))) * topic_similarity)
diversity = np.mean(slate_diversity)
print(diversity)

0.9158477671733241


In [62]:
print("Is diversity equal? ", measurements['diversity_metric'][-1] == diversity)

In [63]:
bubble.run(timesteps=1)
measurements = bubble.get_measurements()

100%|██████████| 1/1 [00:03<00:00,  3.70s/it]


In [64]:
# Getting all possible 2-item combinations (the indices) for the items in a slate
combos = combinations(np.arange(bubble.num_items_per_iter), 2)
items_shown = bubble.items_shown

topic_similarity = np.zeros(bubble.num_users)

stop = 0
for i in combos:
    # topic_similarity is equal to the number of 2-item combinations in which the items' topics are the same
    item_pair = items_shown[:, i]
    topic_pair = bubble.item_topics[item_pair]
    topic_similarity += (topic_pair[:,0] == topic_pair[:,1])

slate_diversity = 1 - ((1 / (bubble.num_items_per_iter * (bubble.num_items_per_iter-1))) * topic_similarity)
diversity = np.mean(slate_diversity)
print(diversity)

0.9201248969011431


In [65]:
print("Is diversity equal? ", measurements['diversity_metric'][-1] == diversity)

In [66]:
for key in bubble.get_measurements().keys():
    print(key, bubble.get_measurements()[key])

mse [0.17605366183227583, 0.16486793352934279, 0.15729951602857786, 0.1548931360190154]
diversity_metric [None, 0.9087427830800049, 0.9158477671733241, 0.9201248969011431]
timesteps [0 1 2 3]


# Testing **all** evalmetrics

### Repeates **not** allowed

In [67]:
bubble = BubbleBurster(
    # num_users=number_of_users,
    # num_items=num_items,
    # num_attributes=number_of_attributes,
    item_topics=item_topics,
    user_representation=user_representation,
    item_representation=item_representation,
    actual_user_representation=users,
    record_base_state=True,
)

In [69]:
from metrics import NoveltyMetric, SerendipityMetric, DiversityMetric

bubble.add_metrics(MSEMeasurement(), NoveltyMetric(), SerendipityMetric(), DiversityMetric())
print("These are the current metrics:")
print(bubble.metrics)

These are the current metrics:
[<trecs.metrics.measurement.MSEMeasurement object at 0x167153940>, <metrics.evaluation_metrics.NoveltyMetric object at 0x167107550>, <metrics.evaluation_metrics.SerendipityMetric object at 0x167153220>, <metrics.evaluation_metrics.DiversityMetric object at 0x167179820>]


In [70]:
bubble.run(timesteps=1)
measurements = bubble.get_measurements()

for key in measurements:
    print(key, "-", measurements[key])

100%|██████████| 1/1 [00:05<00:00,  5.19s/it]

mse - [0.15442783273493083, 0.15070911505579573]
novelty_metric - [None, 1.3716426840801228]
serendipity_metric - [None, 0.6973488865323436]
diversity_metric - [None, 0.9220690467774245]
timesteps - [0 1]





In [71]:
bubble.run(timesteps=50)
measurements = bubble.get_measurements()

# print("Measurements for iters: ", str(np.arange(11) * 10))
for key in measurements:
    print(key, "\n\t", measurements[key][1::10])

 20%|██        | 10/50 [00:48<03:13,  4.84s/it]

### Repeates **allowed**

In [None]:
bubble = BubbleBurster(
    # num_users=number_of_users,
    # num_items=num_items,
    # num_attributes=number_of_attributes,
    item_topics=item_topics,
    user_representation=user_representation,
    item_representation=item_representation,
    # actual_user_representation=users,
    record_base_state=True,
)

In [None]:
from wrapper.metrics import NoveltyMetric, SerendipityMetric, DiversityMetric

bubble.add_metrics(MSEMeasurement(), NoveltyMetric(), SerendipityMetric(), DiversityMetric())
print("These are the current metrics:")
print(bubble.metrics)

These are the current metrics:
[<trecs.metrics.measurement.MSEMeasurement object at 0x177735550>, <wrapper.metrics.evaluation_metrics.NoveltyMetric object at 0x164828310>, <wrapper.metrics.evaluation_metrics.SerendipityMetric object at 0x177735850>, <wrapper.metrics.evaluation_metrics.DiversityMetric object at 0x177735b50>]


In [None]:
bubble.run(timesteps=50)
measurements = bubble.get_measurements()

# print("Measurements for iters: ", str(np.arange(11) * 10))
for key in measurements:
    print(key, "\n\t", measurements[key][1::10])

100%|██████████| 50/50 [04:02<00:00,  4.85s/it]

mse 
	 [0.172459834576788, 0.16406575288851866, 0.16351561623348596, 0.1633777681263983, 0.16338956100420463]
novelty_metric 
	 [-0.07925695495306814, 0.8741216679981739, 0.6939023645642217, 0.5772883705582177, 0.49509272766159906]
serendipity_metric 
	 [0.4943796394485684, 0.6029692470837752, 0.524390243902439, 0.44231177094379637, 0.4264050901378579]
diversity_metric 
	 [0.9195357605749972, 0.9187463178979618, 0.9188405797101451, 0.9186756215388241, 0.9183574879227053]
timesteps 
	 [ 1 11 21 31 41]





# Trying out different implementations of `BubbleBurster`
... so as to avoid `for` loops

In [None]:
content = ContentFiltering(
    user_representation=user_representation,
    item_representation=item_representation,
    record_base_state=True,
)

In [None]:
bubble = BubbleBurster(
    # num_users=number_of_users,
    # num_items=num_items,
    # num_attributes=number_of_attributes,
    item_topics=item_topics,
    user_representation=user_representation,
    item_representation=item_representation,
    actual_user_representation=users,
    record_base_state=True,
)

In [None]:
from wrapper.metrics import NoveltyMetric, SerendipityMetric, DiversityMetric

bubble.add_metrics(MSEMeasurement(), NoveltyMetric(), SerendipityMetric(), DiversityMetric())
print("These are the current metrics:")
print(bubble.metrics)

In [None]:
# now we run the model
bubble.run(timesteps=100)
measurements = bubble.get_measurements()

In [None]:
item_topics = item_topics
num_topics = np.unique(item_topics).size
user_topic_history = np.zeros((bubble.num_users, num_topics)).astype(int)
item_count = np.zeros((bubble.num_items)).astype(int)

In [None]:
interacted_items = bubble.interactions
# print(interacted_items.shape)

# Updating `item_count`
interacted_item_val, interacted_item_count = np.unique(interacted_items, return_counts=True)
# print(interacted_item_val.shape)
# print(interacted_item_val[:10])
# print(interacted_item_count.shape)
# print(interacted_item_count[:10])

# print(item_count.shape)
# print(item_count[:10])
item_count[interacted_item_val] += interacted_item_count
# print(item_count[:10])

# Updating `user_topic_history`
interacted_topics = np.expand_dims(item_topics[interacted_items], axis=1)
# interacted_topic_val, interacted_topic_count = np.unique(interacted_topics, return_counts=True)
# user_topic_history[interacted_topic_val] += interacted_topic_count
old_topic_count = np.take_along_axis(user_topic_history, interacted_topics, axis=1)
np.put_along_axis(user_topic_history, interacted_topics, old_topic_count+1, axis=1)

print(np.sum(user_topic_history))


In [None]:
print(np.array_equal(bubble.user_topic_history, user_topic_history))
print(np.array_equal(bubble.item_count, item_count))

In [None]:
print(item_topics[interacted_items][:5])
for i in range(5):
    if item_topics[interacted_items][i] <5 or item_topics[interacted_items][i] > 920:
        continue
    interacted_topics = item_topics[interacted_items]
    print("Topic: ", interacted_topics[i])
    print("\t", user_topic_history[i, interacted_topics[i-1]], user_topic_history[i, interacted_topics[i]], user_topic_history[i, interacted_topics[i+1]])