Bug fixes and improvements to the config file

HCDM · Apr 7, 2018 · f61f000 · f61f000
1 parent 10a3aff
commit f61f000
Show file tree

Hide file tree

Showing 17 changed files with 200 additions and 172 deletions.
diff --git a/DiffList/DiffListClasses.py b/DiffList/DiffListClasses.py
@@ -15,7 +15,7 @@ def __init__(self, alg_name):
 		self.name = 'Theta'
 
 	def update_class_parameters(self, alg_name, reward_manager, user, alg, pickedArticle, reward, noise):
-		self.diff[alg_name] += reward_manager.getL2Diff(user.theta, alg.getTheta(user.id))
+		self.diff[alg_name] += reward_manager.getL2Diff(user.theta[:reward_manager.context_dimension], alg.getTheta(user.id))
 
 class WDiffList(BaseDiffList):
 	def __init__(self, alg_name):

diff --git a/RewardManager.py b/RewardManager.py
@@ -95,12 +95,13 @@ def runAlgorithms(self, algorithms, diffLists):
 
 				for alg_name, alg in algorithms.items():
 					recommendation = alg.createRecommendation(self.articlePool, u.id, self.k)
-
-					pickedArticle = recommendation.articles[0]
-					reward, rewardList = self.reward.getRecommendationReward(u, recommendation, noise)
+
+					# Assuming that the user will always be selecting one item for each iteration
+					#pickedArticle = recommendation.articles[0]
+					reward, pickedArticle = self.reward.getRecommendationReward(u, recommendation, noise)
 					if (self.testing_method=="online"):
-						#alg.updateParameters(pickedArticle, reward, u.id)
-						alg.updateRecommendationParameters(recommendation, rewardList, u.id)
+						alg.updateParameters(pickedArticle, reward, u.id)
+						#alg.updateRecommendationParameters(recommendation, rewardList, u.id)
 						if alg_name =='CLUB':
 							n_components= alg.updateGraphClusters(u.id,'False')
 

diff --git a/Rewards/LinearReward.py b/Rewards/LinearReward.py
@@ -18,14 +18,5 @@ def getReward(self, user, pickedArticle):
 		return np.dot(user.theta, pickedArticle.featureVector)
 		#return eval(self.reward_function)
 
-	def getRecommendationReward(self, user, recommendation, noise):
-		total = 0
-		rewardList = []
-		for i in recommendation.articles:
-			articleReward = self.getReward(user, i) + noise
-			total += articleReward
-			rewardList.append(articleReward)
-		return (total/self.k), rewardList
-
 	def get_user_features(self, user):
 		return user.theta
diff --git a/Rewards/Reward.py b/Rewards/Reward.py
@@ -23,14 +23,13 @@ def getOptimalReward(self, user, articlePool, exclude = []):
 		pool_position = np.argmax(reward_matrix)
 		return reward_matrix[pool_position], articlePool[pool_position]
 
- #    ### Broadcasting Here #######
-	# def getOptimalReward(self, user, articlePool, exclude = []):
-	# 	maxReward = float('-inf')
-	# 	maxx = None
-	# 	for x in articlePool:
-	# 		reward = self.getReward(user, x)
-	# 		if reward > maxReward and x not in exclude:
-	# 		#if reward > maxReward:
-	# 			maxReward = reward
-	# 			maxx = x
-	# 	return maxReward, x
+	def getRecommendationReward(self, user, recommendation, noise):
+		max_reward = float('-inf')
+		max_article = None
+		for i in recommendation.articles:
+			articleReward = self.getReward(user, i) + noise
+			if articleReward > max_reward:
+				max_reward = articleReward
+				max_article = i
+		return max_reward, max_article
+
diff --git a/Rewards/SocialLinearReward.py b/Rewards/SocialLinearReward.py
@@ -11,14 +11,14 @@ def getReward(self, user, pickedArticle):
 		# How to conditionally change
 		return np.dot(user.CoTheta, pickedArticle.featureVector)
 
-	def getRecommendationReward(self, user, recommendation, noise, cotheta = False):
-		total = 0
-		rewardList = []
-		for i in recommendation.articles:
-			articleReward = np.dot(user.CoTheta, i.featureVector) + noise
-			total += articleReward
-			rewardList.append(articleReward)
-		return (total/self.k), rewardList
+	# def getRecommendationReward(self, user, recommendation, noise, cotheta = False):
+	# 	total = 0
+	# 	rewardList = []
+	# 	for i in recommendation.articles:
+	# 		articleReward = np.dot(user.CoTheta, i.featureVector) + noise
+	# 		total += articleReward
+	# 		rewardList.append(articleReward)
+	# 	return (total/self.k), rewardList
 
 	def get_user_features(self, user):
 		return user.CoTheta

diff --git a/Simulation.py b/Simulation.py
@@ -152,7 +152,21 @@ def generate_algorithms(alg_dict, W, system_params):
 	rewardManagerDict['Gepsilon'] = 1
 
 	user['default_file'] = os.path.join(sim_files_folder, "users_"+str(n_users)+"context_"+str(context_dimension)+"latent_"+str(latent_dimension)+ "Ugroups" + str(UserGroups)+".json")
-	if user.has_key('collaborative') and user['collaborative']:
+	# Override User type 
+	if gen.has_key('collaborative'):
+		if gen['collaborative']:
+			use_coUsers = True
+			reward_type = 'social_linear'
+		else:
+			use_coUsers = False
+			reward_type = 'linear'
+	else:
+		use_coUsers = user.has_key('collaborative') and user['collaborative']
+		reward_type = reco['type'] if reco.has_key('type') else 'linear'
+
+
+	#if user.has_key('collaborative') and user['collaborative']:
+	if use_coUsers:
 		UM = CoUserManager(context_dimension+latent_dimension, user, argv={'l2_limit':1, 'sparseLevel': n_users, 'matrixNoise': rewardManagerDict['matrixNoise']})
 	else:
 		UM = UserManager(context_dimension+latent_dimension, user, argv={'l2_limit':1})
@@ -171,7 +185,7 @@ def generate_algorithms(alg_dict, W, system_params):
 		if article.has_key('save') and article['save']:
 			AM.saveArticles(articles, articlesFilename, force=False) 
 	rewardManagerDict['k'] = reco['k'] if reco.has_key('k') else 1
-	reward_type = reco['type'] if reco.has_key('type') else 'linear'
+	#reward_type = reco['type'] if reco.has_key('type') else 'linear'
 
 	#PCA
 	pca_articles(articles, 'random')

diff --git a/config.yaml b/config.yaml
@@ -8,14 +8,15 @@
 
 # System level settings to be used in all algorithms
 general:
-  testing_iterations: 100 # (int) Number of iterations to testing and refine model upon
+  testing_iterations: 300 # (int) Number of iterations to testing and refine model upon
   training_iterations: 0 # (int) Number of iterations to train a model
   context_dimension: 16 # (int) The number of features which the algorithm knows about each article
   hidden_dimension: 0 # (int) The number of features hidden from the algorithms
   pool_article_size: 10 # (int) Number of articles to select from total pool from which the algorithm can choose
   batch_size: 1 # (int) 
   testing_method: 'online' # (string) online: update the model after each iterations
   plot: True # (True/False) Should plots be created
+  collaborative: True # (True/False) connection between collaborative user and social_linear rewards
 
 # Different settings for user objects
 user:
@@ -24,7 +25,7 @@ user:
   theta_func: featureUniform # (string)
   load: no # (yes/no) Load the theta for each user from file specified by file_name
   save: no # (yes/no Save the thetas for each user to be loaded for future simulations
-  collaborative: yes # (yes/no) Is the user able to access information about other users
+  collaborative: no # (yes/no) Is the user able to access information about other users, overridden by general collaborative setting
   file_name: test.json # (string) File from which to load user thetas
 
 # Different settings for article arms
@@ -38,7 +39,7 @@ article:
 # Reward to determine the accuracy of choices made by the algorithm
 reward:
   k: 1 # (int) number of articles to recommend for each user each iteration
-  type: social_linear # (string) linear/ social_linear: determines which reward function is used
+  type: linear # (string) linear/ social_linear: determines which reward function is used, overridden by general collaborative setting
 
 # Parameters for the different algorithms
 alg:
@@ -66,6 +67,7 @@ alg:
     CoLin:
       alpha: 0.3
       lambda_: 0.1
+      use_alpha_t: False # Use a theoretical alpha_t value
       parameters:
         Theta: False
         CoTheta: True
@@ -80,50 +82,50 @@ alg:
         W: False
         V: False
     HLinUCB:
-      alpha: 0.1
+      alpha: 0.3
       alpha2: 0.1
       lambda_: 0.1
       parameters:
         Theta: False
         CoTheta: True
         W: False
         V: True
-    UCBPMF:
-      sigma : 0.5
-      sigmaU : 1 
-      sigmaV : 1 
-      alpha : 0.1
-      parameters:
-        Theta: False
-        CoTheta: False
-        W: False
-        V: False
-    FactorUCB:
-      alpha: 0.05
-      alpha2: 0.025
-      lambda_: 0.1
-      parameters:
-        Theta: False
-        CoTheta: True
-        W: False
-        V: True
-    CLUB:
-      alpha: 0.1
-      alpha2: 0.5
-      lambda_: 0.1 
-      cluster_init : 'Erdos-Renyi'
-      parameters:
-        Theta: False
-        CoTheta: False
-        W: False
-        V: False
-    PTS:
-      particle_num : 10 
-      sigma : 0.5
-      sigmaU : 1 
-      sigmaV : 1 
-      parameters:
-        Theta: False
-        CoTheta: False
-        W: False
-        V: False
+    # UCBPMF:
+    #   sigma : 0.5
+    #   sigmaU : 1 
+    #   sigmaV : 1 
+    #   alpha : 0.1
+    #   parameters:
+    #     Theta: False
+    #     CoTheta: False
+    #     W: False
+    #     V: False
+    # FactorUCB:
+    #   alpha: 0.05
+    #   alpha2: 0.025
+    #   lambda_: 0.1
+    #   parameters:
+    #     Theta: False
+    #     CoTheta: True
+    #     W: False
+    #     V: True
+    # CLUB:
+    #   alpha: 0.1
+    #   alpha2: 0.5
+    #   lambda_: 0.1 
+    #   cluster_init : 'Erdos-Renyi'
+    #   parameters:
+    #     Theta: False
+    #     CoTheta: False
+    #     W: False
+    #     V: False
+    # PTS:
+    #   particle_num : 10 
+    #   sigma : 0.5
+    #   sigmaU : 1 
+    #   sigmaV : 1 
+    #   parameters:
+    #     Theta: False
+    #     CoTheta: False
+    #     W: False
+    #     V: False
diff --git a/lib/BaseAlg.py b/lib/BaseAlg.py
@@ -21,10 +21,7 @@ def decide(self, pool_articles, userID, exclude = []):
 		return pool_articles[len(exclude)]
 
 	def createRecommendation(self, pool_articles, userID, k):
-		articles = []
-		for x in range(k):
-			articlePicked = self.decide(pool_articles, userID, articles)
-			articles.append(articlePicked)
+		articles = self.decide(pool_articles, userID, k)
 		recommendation = Recommendation(k, articles)
 		return recommendation
 

diff --git a/lib/CLUB.py b/lib/CLUB.py
@@ -67,22 +67,25 @@ def __init__(self, arg_dict):
 			N_components, components = connected_components(g)
 
 
-	def decide(self,pool_articles,userID, exclude = []):
+	def decide(self,pool_articles,userID, k = 1):
 		self.users[userID].updateParametersofClusters(self.clusters,userID,self.Graph, self.users)
-		maxPTA = float('-inf')
-		articlePicked = None
+		articles = []
+		for i in range(k):
+			maxPTA = float('-inf')
+			articlePicked = None
 
-		for x in pool_articles:
-			x_pta = self.users[userID].getProb(self.alpha, x.contextFeatureVector[:self.dimension],self.time)
-			# pick article with highest Prob
-			if maxPTA < x_pta:
-				articlePicked = x.id
-				featureVectorPicked = x.contextFeatureVector[:self.dimension]
-				picked = x
-				maxPTA = x_pta
+			for x in pool_articles:
+				x_pta = self.users[userID].getProb(self.alpha, x.contextFeatureVector[:self.dimension],self.time)
+				# pick article with highest Prob
+				if maxPTA < x_pta and x not in articles:
+					articlePicked = x.id
+					featureVectorPicked = x.contextFeatureVector[:self.dimension]
+					picked = x
+					maxPTA = x_pta
+			articles.append(picked)
 		self.time +=1
 
-		return picked
+		return articles
 	def updateParameters(self, articlePicked, click,userID):
 		self.users[userID].updateParameters(articlePicked.contextFeatureVector[:self.dimension], click, self.alpha_2)
 	def updateGraphClusters(self,userID, binaryRatio):

diff --git a/lib/CoLin.py b/lib/CoLin.py
@@ -89,13 +89,13 @@ def decide_old(self, pool_articles, userID, exclude = []):
 		for x in pool_articles:
 			x_pta = self.USERS.getProb(self.alpha, x, userID)
 			# pick article with highest Prob
-			if maxPTA < x_pta and x not in exclude:
+			if maxPTA < x_pta:
 				articlePicked = x
 				maxPTA = x_pta
 
-		return articlePicked
+		return [articlePicked]
 
-	def decide(self, pool_articles, userID, exclude = []):
+	def decide(self, pool_articles, userID, k = 1):
 		# MEAN
 		art_features = np.empty([len(pool_articles), len(pool_articles[0].contextFeatureVector)])
 		for i in range(len(pool_articles)):
@@ -110,11 +110,20 @@ def decide(self, pool_articles, userID, exclude = []):
 			TempFeatureM.T[userID] = pool_articles[i].contextFeatureVector
 			art_temp_features[i, :] = vectorize(TempFeatureM)
 		var_matrix = np.sqrt(np.dot(np.dot(art_temp_features, self.USERS.CCA), art_temp_features.T))
-		self.USERS.calculateAlphaT()
-		pta_matrix = mean_matrix + self.USERS.alpha_t*np.diag(var_matrix)
+		#self.USERS.calculateAlphaT()
+		if self.use_alpha_t:
 
-		pool_position = np.argmax(pta_matrix)
-		return pool_articles[pool_position]
+			self.USERS.calculateAlphaT()
+			pta_matrix = mean_matrix + self.USERS.alpha_t*np.diag(var_matrix)
+		else:
+			pta_matrix = mean_matrix + self.alpha*np.diag(var_matrix)
+
+		pool_positions = np.argsort(pta_matrix)[(k*-1):]
+		articles = []
+		for i in range(k):
+			articles.append(pool_articles[pool_positions[i]])
+		return articles
+		#return pool_articles[pool_position]
 
 	def updateParameters(self, articlePicked, click, userID, update='Inv'):
 		self.USERS.updateParameters(articlePicked, click, userID, update)

diff --git a/lib/GOBLin.py b/lib/GOBLin.py
@@ -20,6 +20,7 @@ def __init__(self, featureDimension, lambda_, userNum, W):
 		self.AInv = np.linalg.inv(self.A)
 
 		self.theta = np.dot(self.AInv , self.b)
+		print np.kron(W, np.identity(n=featureDimension))
 		self.STBigWInv = sqrtm( np.linalg.inv(np.kron(W, np.identity(n=featureDimension))) )
 		self.STBigW = sqrtm(np.kron(W, np.identity(n=featureDimension)))
 	def updateParameters(self, articlePicked, click, userID, update):
@@ -55,12 +56,12 @@ class GOBLinAlgorithm(CoLinUCBAlgorithm):
 	def __init__(self, arg_dict):
 		CoLinUCBAlgorithm.__init__(self, arg_dict)
 		self.USERS = GOBLinSharedStruct(self.dimension, self.lambda_, self.n_users, self.W)
-		self.estimates['CanEstimateCoUserPreference'] = False
+		#self.estimates['CanEstimateCoUserPreference'] = False
 	def getLearntParameters(self, userID):
 		thetaMatrix =  matrixize(self.USERS.theta, self.dimension) 
 		return thetaMatrix.T[userID]
 
-	def decide(self, pool_articles, userID, exclude = []):
+	def decide(self, pool_articles, userID, k = 1):
 		# MEAN
 		art_features = np.empty([len(pool_articles), len(pool_articles[0].contextFeatureVector)*self.n_users])
 		for i in range(len(pool_articles)):
@@ -72,8 +73,14 @@ def decide(self, pool_articles, userID, exclude = []):
 		var_matrix = np.sqrt(np.dot(np.dot(CoFeaV, self.USERS.AInv), CoFeaV.T).clip(0))
 		pta_matrix = mean_matrix + self.alpha*np.diag(var_matrix)
 
-		pool_position = np.argmax(pta_matrix)
-		return pool_articles[pool_position]
+
+		pool_positions = np.argsort(pta_matrix)[(k*-1):]
+		articles = []
+		for i in range(k):
+			articles.append(pool_articles[pool_positions[i]])
+		return articles
+		# pool_position = np.argmax(pta_matrix)
+		# return pool_articles[pool_position]
 
 #inherite from CoLinUCB_SelectUserAlgorithm
 # class GOBLin_SelectUserAlgorithm(CoLinUCB_SelectUserAlgorithm):