In [1]:
import numpy as np
from scipy.stats import invgamma
import random

In [39]:
class Environment(object):
    """
    Generates random users at each time step in the MAB process. 
    Logs user recommendations.

    Parameters
    ----------
    filePath : str, default=None
        Path to dataset
    k : int, default=3 
        number of latent features
    var_star : float, default=0.5
        model variance hyper-parameter
    genData : bool, default=False
        create synthetic dataset

    """
    def __init__(self, filePath=None, k=3, var_star=0.5, genData=False):
        self.K = k
        self.var_star = var_star
        # dictionary tracks which users/items have been seen
        self.history = {}

        # load and process data from file
        if genData:
            self.num_users = 20
            self.num_items = 50
            self.mu_u = 0 
            self.mu_v = 0
            self.var_u = 1
            self.var_v = 1
            self.mu_star = 0
            self.data = self.gen_syn_data()
        else: # generate synthetic dataset
            # 1. read in data
            # 2. preprocess data to get user/items/ratings dicts
            # 3. set num_users/num_movies
            # 4. set default var_u, var_v (to be used in bayesian PTS)
            pass

        self.userItemsCount = self.items_per_user(genData)
        self.userRewards = { k: [] for k,v in enumerate(range(self.num_users)) } 
        self.userRecItems = { k: [] for k,v in enumerate(range(self.num_users)) } 
        


    def gen_syn_data(self):
        """ 
        Generates synthetic dataset according to sec. 5.1, pg.6 [1]

        Returns
        -------
        data : dict, 
            dictionary of data containing U,V, R_true, R_obs (noisy R_true)
        """

        #U = np.random.normal(self.mu_u, self.var_u, (self.num_users, self.K)) 
        #V = np.random.normal(self.mu_v, self.var_v, (self.num_items, self.K))
        U = np.zeros((self.num_users, self.K))
        V = np.zeros((self.num_items, self.K))
        R_true = np.dot( U, V.T )
        R_obs = R_true + np.random.normal(self.mu_star, self.var_star) # add gaussian noise to R_true
        return { 'U': U, 'V': V, 'R_true': R_true, 'R_obs': R_obs }

    def get_new_user(self):
        """ 
        Randomly samples new user from user/item pairs not yet seen in data 

        Returns
        -------
        user_id : int, 
            new user_id generated at random
        """

        # List of user ids with rated items we haven't yet seen
        available_users = [key for key, v in self.userItemsCount.items() if v > 0]
        if len(available_users) > 0:
            user_id = random.choice(available_users)
            # decrease num of remaining movies
            self.userItemsCount[user_id] -= 1 
            #print("user_id: {} | unseen movie count: {}".format(user_id, self.userItemsCount[user_id]))
            return user_id
        else:
            print("ERROR: All user/item pairs have been seen!")
            print("Restarting user log...exit to cancel!")
            self.userItemsCount = self.items_per_user()
            user_id = self.get_new_user()
            return user_id


    def items_per_user(self, genData=False):
        """ 
        maps user ids to count of num of items they've rated in data 

        Returns
        -------
        userItemsCount : dict, userId : item count
            dictionary of userIds to count of rated items
        """
        if genData:
            userItemsCount = {}
            for user in range(len(self.data['R_true'])):
                movie_idxs = np.where(self.data['R_true'][user, :] > .0)
                userItemsCount[user] = len(movie_idxs)
            return userItemsCount
        else:
            raise NotImplementedError

    def itemsRatedByUser(self, user_id):
        """ 
        useful for synthetic data but probably need to reimplement 
        with real data 

        Returns
        -------
        V_j : Numpy matrix, shape ( O(M) x K )
            matrix of items rated by user. M upper bounded total items
        """
        item_ids = self.userRecItems[user_id]
        V_j = self.data['V'][item_ids]
        if len(V_j) == 0:
            return np.zeros((1,self.K))
        return V_j

    def usersWhoRatedItem(self, item_id):
        raise NotImplementedError
    
    def get_reward_vector(self, user_id):
        _r = self.userRewards[user_id]
        if len(_r) == 0:
            return 1.0
        # reward for user i from 1:t-1
        rPred_History = np.asarray(_r).reshape(1,-1) 
        return rPred_History






In [21]:
class Particles(object):
    
    def __init__(self, num_particles, num_users, num_items, k=3):
        
        self.num_particles = num_particles
        self.particles = self.init_particles(num_users, num_items)
        self.weights = [1./self.num_particles] * self.num_particles
        
    def init_particles(self, num_users, 
                               num_items, 
                               mu_u=0, 
                               var_u=1, 
                               mu_v=0, 
                               var_v=1, 
                               k=3):
        
        particles = {}
        for i in range(self.num_particles):
            particles[i] = {
                        'U': np.random.normal(mu_u, var_u, (num_users, k)),
                        'V': np.random.normal(mu_v, var_v, (num_items, k)),
                        'var_u': var_u,
                        'var_v': var_v }
        return particles
    
    def sample(self, t=1):
        if t == 1:
            return random.choice( list(self.particles.keys()) )
        raise NotImplementedError
    
    def reweighting(self):
        raise NotImplementedError
    
    def update_weights(self):
        raise NotImplementedError



In [67]:
class PTS(object):
    """
    Does the heavy lifting lol... missing good description.
    Does most of Thompson Sampling and Prob. Matrix Factorization

    Parameters
    ----------
    alpha : float, default=2.
        shape hyper-parameter for Gamma dist.
    beta : float, default=0.5
        shape hyper-parameter for Gamma dist.
    var_star : float, default=0.5
        model variance hyper-parameter
    k : int, default=3 
        number of latent features
    """
    def __init__(self, alpha=2, beta=0.5, var_star=1.0, k=3):
        self.alpha = alpha
        self.beta = beta
        self.var_star = var_star
        self.K = k 
        self.history_log = {}
        
    def sample_var(self, matrix, N):
        """ 
        Calculates the posterior of precision which is closed
        form assuming Gamma prior => inverse gamma
        Pr(lamba_u | U, alpha, beta) = IG(*)

        Params
        ------
        matrix : type Numpy matrix
            U or V matrix depending on if var_u or var_v
        N : type int 
            ** N (M) = number of users (or M=items)

        Returns
        -------
        var : type float, 
            var_u or var_v
        """
        alpha = ( (N * self.K) / 2.0 ) + self.alpha
        beta = 0.5 * np.linalg.norm(matrix, 'fro') + self.beta
        # Generate random value from inverse gamma(shape,scale)
        var = invgamma.rvs(alpha, scale=beta)
        return var
    
    def sample_Ui(self, V_j, r_ij, var_u): # verify
        """ 
        Sample Ui from the posterior:
        Pr(Ui | V, R, var*, var_u) ~ N( Ui | mu_ui, (prec_ui)^-1 )

        Params
        ------
        V_j : type Numpy matrix, shape = O(M) x K 
            matrix of items rated by user from 1:t-1
        r_ij : type array, shape = 1 x O(M)
            array of observed ratings for user i, items j
            from 1:t-1
        var_u : type float,
            sampled var_u for time step t

        Returns
        -------
        U_i : type numpy vector, shape= 1 x K 
            sampled latent feature vector for user i
        """
        # Calculate eta_ui
        eta_ui = np.dot(r_ij, V_j)
        
        # Calculate precision_ui
        precision_ui = (1./self.var_star) * np.sum(np.dot(V_j, V_j.T)) + (var_u * np.eye(self.K))
        invPrecision = np.linalg.inv(precision_ui)
        assert ( invPrecision >= 0).all(), "ERROR: Precision cannot be negative"
        
        # calculate mu_ui 
        mu_ui = (1./self.var_star) * np.dot(invPrecision, eta_ui.T)
        
        # sample U_i = Normal(mu_ui, invPrecision_ui)
        U_i = np.random.multivariate_normal( mu_ui.reshape(-1), invPrecision ) #should be 1 x k
        return U_i

    def sample_Vj(self, U_j, r_ji, var_v): # Not thinking about this right yet :(
        raise NotImplementedError

    
    def recommend_item(self, U_i, V):
        """ 
        Choose an item to recommend and 
        predicts rating

        Params
        ------
        U_i : type Numpy vector, shape = 1 x K 
            matrix of items rated by user from 1:t-1
        V : type Numpy matrix, shape = M x K
            inventory of items

        Returns
        -------
        rating
        j_hat : type int,
            index of item with highest predicted rating
        pred_rt : type float,
            predicted rating for item
        """
        ratings = np.dot(U_i, V.T)
        j_hat = np.argmax(ratings)
        pred_rt =  ratings[j_hat]
        return j_hat, pred_rt
    
    def calculate_reward(self, r_hat, r_true):
        return 1 if r_hat == r_true else -1
    
    def update_posterior(self):
        raise NotImplementedError
    


In [70]:
#import ParticleThompsonSamplingMAB.src.environment # Can just import and clear away code above
#import ParticleThompsonSamplingMAB.src.particles
#import ParticleThompsonSamplingMAB.src.PTS
# np.random.seed(0)
# random.seed(0)


""" Main Driver Function - Part 1 """ 

def run_PTSMF(steps, num_particles=30, filePath=None, alpha=2, beta=0.5, k=3, var_star=0.5):
    
    env = Environment(filePath, k, var_star, genData=True)
    particles = Particles(num_particles, env.num_users, env.num_items, k)
    pts = PTS(alpha, beta, var_star, k)
    
    full_bayesian = True
    for t in range(1, steps + 1):
        
        user_id = env.get_new_user() # Environment randomly generates user
        d = particles.sample(1)      # not fully implemented/set = 1
        V_j = env.itemsRatedByUser(user_id)
        r_it = env.get_reward_vector(user_id)

        if full_bayesian and t > 1:
            U = particles.particles[d]['U']
            var_u = pts.sample_var(U, env.num_users)
        else:
            var_u = 1.0
            
        U_i = pts.sample_Ui(V_j, r_it, var_u) 
        j_hat, r_pred = pts.recommend_item(U_i, V_j) #V_j or V?
        r_true = env.data['V'][user_id][j_hat]
        reward = pts.calculate_reward(r_pred, r_true)

        # Update logs 
        env.userRewards[user_id].append(reward)  
        env.userRecItems[user_id].append(j_hat) 
        pts.history_log.get(t, []).append((user_id, j_hat, reward))
        
        print("user_id: {}\t j_hat: {}\t r_pred: {}\t r_true: {}\t reward: {}".format(user_id,
                                                                                    j_hat,
                                                                                    r_pred,
                                                                                    r_true,
                                                                                    reward))
        
        """ Part 2: NOT IMPLEMENTED """
        # PTS.update_posterior(pass in everything)

In [73]:
num_particles = 30
filePath = None
alpha = 2
beta = 0.5
k = 3
var_star = 0.5
steps = 20

run_PTSMF(steps, num_particles, filePath, alpha, beta, k, var_star)

user_id: 9	 j_hat: 0	 r_pred: 0.0	 r_true: 0.0	 reward: 1
user_id: 4	 j_hat: 0	 r_pred: 0.0	 r_true: 0.0	 reward: 1
user_id: 16	 j_hat: 0	 r_pred: 0.0	 r_true: 0.0	 reward: 1
user_id: 5	 j_hat: 0	 r_pred: 0.0	 r_true: 0.0	 reward: 1
user_id: 2	 j_hat: 0	 r_pred: 0.0	 r_true: 0.0	 reward: 1
user_id: 1	 j_hat: 0	 r_pred: 0.0	 r_true: 0.0	 reward: 1
user_id: 7	 j_hat: 0	 r_pred: 0.0	 r_true: 0.0	 reward: 1
user_id: 3	 j_hat: 0	 r_pred: 0.0	 r_true: 0.0	 reward: 1
user_id: 12	 j_hat: 0	 r_pred: 0.0	 r_true: 0.0	 reward: 1
user_id: 15	 j_hat: 0	 r_pred: 0.0	 r_true: 0.0	 reward: 1
user_id: 17	 j_hat: 0	 r_pred: 0.0	 r_true: 0.0	 reward: 1
user_id: 0	 j_hat: 0	 r_pred: 0.0	 r_true: 0.0	 reward: 1
user_id: 19	 j_hat: 0	 r_pred: 0.0	 r_true: 0.0	 reward: 1
user_id: 11	 j_hat: 0	 r_pred: 0.0	 r_true: 0.0	 reward: 1
user_id: 14	 j_hat: 0	 r_pred: 0.0	 r_true: 0.0	 reward: 1
user_id: 13	 j_hat: 0	 r_pred: 0.0	 r_true: 0.0	 reward: 1
user_id: 10	 j_hat: 0	 r_pred: 0.0	 r_true: 0.0	 reward: 1
user_