better initialization

david-cortes · Jul 13, 2021 · 65e8463 · 65e8463
1 parent e807f61
commit 65e8463
Show file tree

Hide file tree

Showing 4 changed files with 36 additions and 52 deletions.
diff --git a/README.md b/README.md
@@ -139,8 +139,6 @@ If passing `reindex=True`, all user and item IDs that you pass to `.fit` will be
 
 For a more detailed example, see the IPython notebook [recommending songs with EchoNest MillionSong dataset](http://nbviewer.jupyter.org/github/david-cortes/hpfrec/blob/master/example/hpfrec_echonest.ipynb) illustrating its usage with the EchoNest TasteProfile dataset.
 
-This package contains only functionality related to fitting this model. For general evaluation metrics for recommendations on implicit data see other packages such as [LensKit](https://github.com/lenskit/lkpy).
-
 ## Documentation
 
 Documentation is available at readthedocs: [http://hpfrec.readthedocs.io](http://hpfrec.readthedocs.io/en/latest/)

diff --git a/hpfrec/__init__.py b/hpfrec/__init__.py
@@ -916,29 +916,35 @@ def partial_fit(self, counts_df, batch_type='users', step_size=None,
 
 	def _initialize_extra_users(self, n, seed):
 		cython_loops = cython_loops_float if self.use_float else cython_loops_double
+		c_real_t = ctypes.c_float if self.use_float else ctypes.c_double
 		rng = np.random.default_rng(seed = seed if seed > 0 else None)
 
-		new_Theta = rng.gamma(self.a, 1./self.b_prime, size=(n, self.k)).astype(cython_loops.c_real_t)
+		new_Gamma_shp = self.a_prime + 0.01 * rng.random(size=(n, self.k), dtype=c_real_t)
+		new_Gamma_rte = self.a_prime + 0.01 * rng.random(size=(n, self.k), dtype=c_real_t)
+		new_Theta = new_Gamma_shp / new_Gamma_rte
+		new_k_rte = np.empty((n,1), dtype=c_real_t)
+		new_k_rte[:,:] = self.b_prime
+
+		self.k_rte = np.r_[self.k_rte, new_k_rte]
 		self.Theta = np.r_[self.Theta, new_Theta]
-		self.k_rte = np.r_[self.k_rte, b_prime + new_Theta.sum(axis=1, keepdims=True)]
-		new_Gamma_rte = rng.gamma(self.a_prime, self.b_prime/self.a_prime, size=(n, 1)).astype(cython_loops.c_real_t) \
-							+ self.Beta.sum(axis=0, keepdims=True)
 		self.Gamma_rte = np.r_[self.Gamma_rte, new_Gamma_rte]
-		self.Gamma_shp = np.r_[self.Gamma_shp, new_Gamma_rte * new_Theta * \
-								rng.uniform(low=.85, high=1.15, size=(n, self.k)).astype(cython_loops.c_real_t)]
+		self.Gamma_shp = np.r_[self.Gamma_shp, new_Gamma_shp]
 
 	def _initialize_extra_items(self, n, seed):
 		cython_loops = cython_loops_float if self.use_float else cython_loops_double
+		c_real_t = ctypes.c_float if self.use_float else ctypes.c_double
 		rng = np.random.default_rng(seed = seed if seed > 0 else None)
 
-		new_Beta = rng.gamma(self.c, 1./self.d_prime, size=(n, self.k)).astype(cython_loops.c_real_t)
+		new_Lambda_shp = self.c_prime + 0.01 * rng.random(size=(n, self.k), dtype=c_real_t)
+		new_Lambda_rte = self.c_prime + 0.01 * rng.random(size=(n, self.k), dtype=c_real_t)
+		new_Beta = new_Lambda_shp / new_Lambda_rte
+		new_t_rte = np.empty((n,1), dtype=c_real_t)
+		new_t_rte[:,:] = self.d_prime
+
+		self.t_rte = np.r_[self.t_rte, new_t_rte]
 		self.Beta = np.r_[self.Beta, new_Beta]
-		self.t_rte = np.r_[self.t_rte, self.d_prime + new_Beta.sum(axis=1, keepdims=True)]
-		new_Lambda_rte = rng.gamma(self.c_prime, self.d_prime/self.c_prime, size=(n, 1)).astype(cython_loops.c_real_t) \
-							+ self.Theta.sum(axis=0, keepdims=True)
 		self.Lambda_rte = np.r_[self.Lambda_rte, new_Lambda_rte]
-		self.Lambda_shp = np.r_[self.Lambda_shp, new_Lambda_rte * new_Beta * \
-									 rng.uniform(low=.85, high=1.15, size=(n, self.k)).astype(cython_loops.c_real_t)]
+		self.Lambda_shp = np.r_[self.Lambda_shp, new_Lambda_shp]
 
 	def _check_input_predict_factors(self, ncores, random_seed, stop_thr, maxiter):
 
@@ -978,10 +984,6 @@ def predict_factors(self, counts_df, maxiter=10, ncores=1, random_seed=1, stop_t
 		----
 		This function only works with one user at a time.
 
-		Note
-		----
-		This function is prone to producing all NaNs values.
-
 		Parameters
 		----------
 		counts_df : DataFrame or array (nsamples, 2)
@@ -1057,10 +1059,6 @@ def add_user(self, user_id, counts_df, update_existing=False, maxiter=10, ncores
 		----
 		For betters results, refit the model again from scratch.
 
-		Note
-		----
-		This function is prone to producing all NaNs values.
-
 		Parameters
 		----------
 		user_id : obj

diff --git a/hpfrec/cython_loops.pxi b/hpfrec/cython_loops.pxi
@@ -134,40 +134,29 @@ def eval_after_term(stop_crit, int verbose, int nthreads, int full_llk, ind_type
 #####################################
 def initialize_parameters(Theta, Beta, random_seed,
 						  a, a_prime, b_prime, c, c_prime, d_prime):
-	### Comment: I'm not entirely sure how to initialize the variables according to the prior, and the
-	### initialization here differs from the implementation of the paper's author.
+	### Comment: this is the initialization that was used in the original HPF code.
+	### It doesn't exactly follow the paper's instructions about 'initializing according to prior',
+	### but it gives better results than other initializations.
 
 	nU = Theta.shape[0]
 	nI = Beta.shape[0]
 	k = Theta.shape[1]
 
 	rng = np.random.Generator(np.random.MT19937(seed = random_seed if random_seed > 0 else None))
-	Theta[:,:] = rng.gamma(a, 1./b_prime, size=(nU, k)).astype(c_real_t)
-	Beta[:, :] = rng.gamma(c, 1./d_prime, size=(nI, k)).astype(c_real_t)
-
-	### Comment: the code above seems to give worse likelihood in the first iterations, but better
-	### local optima in the end, compared to initializing them like this:
-		# cdef np.ndarray[double, ndim=2] ksi = rng.gamma(a_prime, b_prime/a_prime, size=(nU,1))
-		# Theta[:,:] = rng.gamma(a, 1/ksi, size=(nU, k)).astype(c_real_t)
-		# cdef np.ndarray[double, ndim=2] eta = rng.gamma(c_prime, d_prime/c_prime, size=(nI,1))
-		# Beta[:,:] = rng.gamma(c, 1/eta, size=(nI, k)).astype(c_real_t)
-
-	### Comment: the extra randomization here tends to lead to better end results,
-	### but has numeric precision issues when using float type
-	k_rte = a_prime/b_prime + Theta.sum(axis=1, keepdims=True) * rng.uniform(low=.85, high=1.15, size=(nU, 1)).astype(c_real_t)
-	t_rte = c_prime/d_prime + Beta.sum(axis=1, keepdims=True) * rng.uniform(low=.85, high=1.15, size=(nI, 1)).astype(c_real_t)
-
-	Gamma_rte = rng.gamma(a_prime, b_prime/a_prime, size=(nU, 1)).astype(c_real_t) \
-				+ Beta.sum(axis=0, keepdims=True) * rng.uniform(low=.85, high=1.15, size=(nU, 1)).astype(c_real_t)
-	Lambda_rte = rng.gamma(c_prime, d_prime/c_prime, size=(nI, 1)).astype(c_real_t) \
-				+ Theta.sum(axis=0, keepdims=True) * rng.uniform(low=.85, high=1.15, size=(nI, 1)).astype(c_real_t)
-
-	Gamma_shp = Gamma_rte * Theta * rng.uniform(low=.85, high=1.15, size=(nU, k)).astype(c_real_t)
-	Lambda_shp = Lambda_rte * Beta * rng.uniform(low=.85, high=1.15, size=(nI, k)).astype(c_real_t)
-	np.nan_to_num(Gamma_shp, copy=False)
-	np.nan_to_num(Lambda_shp, copy=False)
-	np.nan_to_num(Gamma_rte, copy=False)
-	np.nan_to_num(Lambda_rte, copy=False)
+
+	k_rte = np.empty((nU,1), dtype=c_real_t)
+	t_rte = np.empty((nI,1), dtype=c_real_t)
+	k_rte[:,:] = b_prime
+	t_rte[:,:] = d_prime
+
+	Gamma_rte = a_prime + 0.01 * rng.random(size=(nU, k), dtype=c_real_t)
+	Lambda_rte = c_prime + 0.01 * rng.random(size=(nI, k), dtype=c_real_t)
+
+	Gamma_shp = a_prime + 0.01 * rng.random(size=(nU, k), dtype=c_real_t)
+	Lambda_shp = c_prime + 0.01 * rng.random(size=(nI, k), dtype=c_real_t)
+
+	Theta[:,:] = Gamma_shp / Gamma_rte
+	Beta[:,:] = Lambda_shp / Lambda_rte
 
 	return Gamma_shp, Gamma_rte, Lambda_shp, Lambda_rte, k_rte, t_rte
 

diff --git a/setup.py b/setup.py
@@ -5,7 +5,6 @@
 except:
 	from distutils.core import setup
 	from distutils.extension import Extension
-from Cython.Build import cythonize
 from Cython.Distutils import build_ext
 import numpy
 import sys, os
@@ -58,7 +57,7 @@ def build_extensions(self):
 	 'scipy',
 	 'cython'
 ],
-	version = '0.2.3.4',
+	version = '0.2.4',
 	description = 'Hierarchical Poisson matrix factorization for recommender systems',
 	author = 'David Cortes',
 	author_email = 'david.cortes.rivera@gmail.com',