# Debugging and visualizing data

In [1]:
import numpy as np
import matplotlib.pyplot as plt

from rec import ContentFiltering

~~### A few details on the available debugging options~~
~~- `MEASUREMENTS` is used to debug the `measurements` module. Right now, if enabled, it only supports plotting the distribution of interactions at a given timestep. It is possible to define a `measurement_visualization_rule` to plot the distribution -- see example below.~~
~~- `USER_SCORES` is used to debug the `user_scores` module. If enabled, it plots the distribution of the most liked items at the beginning of the execution.~~
~~- `RECOMMENDER` is used to debug the behavior of the recommender system.~~

In [2]:
# NOTE: Restart kernel before re-running this

# Set up debug options (see debug_opt in main.py)
#debug_opt['MEASUREMENTS'] = False # debug measurement of equilibrium
#debug_opt['USER_SCORES'] = True  # debug user preferences
#debug_opt['RECOMMENDER'] = True # debug recommender system
# Configure and initialize debugger
#debugger = Debug(list(debug_opt.keys()), list(debug_opt.values()))

### Set up recommender system
This cell contains everything needed for setup.

In [3]:
# Set up the following
NUM_USERS = 1000
NUM_ITEMS_PER_ITER = 10  # num items presented to each user at each iteration
RUN_TIMESTEPS = 100  # num run iterations
NUM_ITEMS = round((RUN_TIMESTEPS) * NUM_ITEMS_PER_ITER * np.random.uniform(1, 5))  # total num items in simulation

# For content filtering, generate array of attributes
#for i, row in enumerate(rec_args['content']['item_representation']):
#    A = rec_args['content']['num_attributes']
#    n_indices = np.random.randint(1, A)
#    indices = np.random.randint(A, size=(n_indices))
#    row[indices] = 1
#    rec_args['content']['item_representation'][i,:] = row
# Add it to args
#rec_args['content']['item_representation'] = rec_args['content']['item_representation'].T

### Instantiate and run

In [4]:
# Create ContentFiltering instance without specifying user profiles or item attributes
default_filtering = ContentFiltering(num_users=NUM_USERS, num_items=NUM_ITEMS, num_items_per_iter=NUM_ITEMS_PER_ITER)
# Run for time steps in RUN_TIMESTEPS
default_filtering.run(timesteps=RUN_TIMESTEPS, train_between_steps=True)

### Sample results

For simplicity, results are returned in a pandas DataFrame

In [23]:
delta_t = list()
delta_t.append(default_filtering.get_measurements())
delta_t[0].head()

Unnamed: 0_level_0,Homogeneity,MSE
Timestep,Unnamed: 1_level_1,Unnamed: 2_level_1
0,-953.5,1.11177e-07
1,-35.0,0.09135395
2,32.0,0.09087284
3,-13.0,0.09072048
4,21.0,0.09064104


### Set up recommender system with predefined values

#### You can define user profiles and item attributes:
- `user_representation` must be a `|U|x|A|` matrix, where `|U|` is the number of users and `|A|` is the number of attributes in the system.
- `item_representation` must be a `|A|x|I|` matrix, where `|A|` is the number of attributes and `|I|` is the number of items in the system.
- Note that the following constraint must hold: `item_representation.shape[0] == user_representation.shape[1]`

In [19]:
from rec import Distribution

NUM_ATTRIBUTES = NUM_ITEMS - NUM_ITEMS//(1 + np.random.randint(0, 15))

# Initialize power law distribution
d = Distribution(distr_type='powerlaw')

# Use the distribution to generate user_representation
user_representation = d.compute(a=1.16, size=(NUM_USERS, NUM_ATTRIBUTES))

# Use binomial distribution to generate item_representation distribution
item_representation = Distribution(distr_type='binom', n=5, p=0.4, size=(NUM_ATTRIBUTES, NUM_ITEMS)).compute()

# Instantiate and run recsys
filtering = ContentFiltering(user_representation=user_representation, item_representation=item_representation)
# Run for 10 timesteps
filtering.run(timesteps=10)

# Collect results
delta_t = list()
delta_t = filtering.get_measurements()

### You can run the system beyond the predefined time steps:

In [20]:
# Run for some more time steps
filtering.run(timesteps=10, train_between_steps=True)

In [24]:
# Check results again
filtering.get_measurements().head()

Unnamed: 0_level_0,Homogeneity,MSE
Timestep,Unnamed: 1_level_1,Unnamed: 2_level_1
0,-998.0,0.0
1,0.0,0.0
2,0.0,0.0
3,-0.5,0.0
4,32.5,0.0


## Further analysis