Skip to content

Commit

Permalink
fixed incorrect data checks
Browse files Browse the repository at this point in the history
  • Loading branch information
david-cortes committed Jan 26, 2019
1 parent b1e2e37 commit f251401
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 13 deletions.
24 changes: 13 additions & 11 deletions hpfrec/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,9 @@ def __init__(self, k=30, a=0.3, a_prime=0.3, b_prime=1.0,
assert maxiter>0
assert isinstance(maxiter, int)
else:
maxiter = 10**10
if stop_crit!='maxiter':
if stop_crit == 'maxiter':
raise ValueError("If 'stop_crit' is set to 'maxiter', must provide a maximum number of iterations.")
maxiter = 10**10

if check_every is not None:
assert isinstance(check_every, int)
Expand Down Expand Up @@ -421,15 +421,16 @@ def _process_data(self, input_df):
if isinstance(input_df, np.ndarray):
assert len(input_df.shape) > 1
assert input_df.shape[1] >= 3
input_df = input_df.values[:,:3]
input_df = pd.DataFrame(input_df[:, :3])
input_df.columns = ['UserId', 'ItemId', "Count"]

if input_df.__class__.__name__ == 'DataFrame':
elif input_df.__class__.__name__ == 'DataFrame':
assert input_df.shape[0] > 0
assert 'UserId' in input_df.columns.values
assert 'ItemId' in input_df.columns.values
assert 'Count' in input_df.columns.values
self.input_df = input_df[['UserId', 'ItemId', 'Count']]

elif input_df.__class__.__name__ == 'coo_matrix':
self.nusers = input_df.shape[0]
self.nitems = input_df.shape[1]
Expand Down Expand Up @@ -488,7 +489,7 @@ def _process_data(self, input_df):
pf.write("random seed: None\n")

if self.input_df['Count'].dtype != ctypes.c_float:
self.input_df['Count'] = self.input_df.Count.astype('float32')
self.input_df['Count'] = self.input_df.Count.astype(ctypes.c_float)
if self.input_df['UserId'].dtype != cython_loops.obj_ind_type:
self.input_df['UserId'] = self.input_df.UserId.astype(cython_loops.obj_ind_type)
if self.input_df['ItemId'].dtype != cython_loops.obj_ind_type:
Expand All @@ -507,15 +508,16 @@ def _process_valset(self, val_set, valset=True):
if isinstance(val_set, np.ndarray):
assert len(val_set.shape) > 1
assert val_set.shape[1] >= 3
val_set = val_set.values[:,:3]
val_set = pd.DataFrame(val_set[:, :3])
val_set.columns = ['UserId', 'ItemId', "Count"]

if val_set.__class__.__name__ == 'DataFrame':
elif val_set.__class__.__name__ == 'DataFrame':
assert val_set.shape[0] > 0
assert 'UserId' in val_set.columns.values
assert 'ItemId' in val_set.columns.values
assert 'Count' in val_set.columns.values
self.val_set = val_set[['UserId', 'ItemId', 'Count']]

elif val_set.__class__.__name__ == 'coo_matrix':
assert val_set.shape[0] <= self.nusers
assert val_set.shape[1] <= self.nitems
Expand Down Expand Up @@ -556,7 +558,7 @@ def _process_valset(self, val_set, valset=True):
self.val_set.reset_index(drop=True, inplace=True)

if self.val_set['Count'].dtype != ctypes.c_float:
self.val_set['Count'] = self.val_set.Count.astype('float32')
self.val_set['Count'] = self.val_set.Count.astype(ctypes.c_float)
if self.val_set['UserId'].dtype != cython_loops.obj_ind_type:
self.val_set['UserId'] = self.val_set.UserId.astype(cython_loops.obj_ind_type)
if self.val_set['ItemId'].dtype != cython_loops.obj_ind_type:
Expand All @@ -579,8 +581,8 @@ def _store_metadata(self, for_partial_fit=False):

def _cast_before_fit(self):
## setting all parameters and data to the right type
self.Theta = np.empty((self.nusers, self.k), dtype='float32')
self.Beta = np.empty((self.nitems, self.k), dtype='float32')
self.Theta = np.empty((self.nusers, self.k), dtype=ctypes.c_float)
self.Beta = np.empty((self.nitems, self.k), dtype=ctypes.c_float)
self.k = cython_loops.cast_ind_type(self.k)
self.nusers = cython_loops.cast_ind_type(self.nusers)
self.nitems = cython_loops.cast_ind_type(self.nitems)
Expand Down Expand Up @@ -610,7 +612,7 @@ def _fit(self):
self.val_set = pd.DataFrame(np.empty((0,3)), columns=['UserId','ItemId','Count'])
self.val_set['UserId'] = self.val_set.UserId.astype(cython_loops.obj_ind_type)
self.val_set['ItemId'] = self.val_set.ItemId.astype(cython_loops.obj_ind_type)
self.val_set['Count'] = self.val_set.Count.values.astype('float32')
self.val_set['Count'] = self.val_set.Count.values.astype(ctypes.c_float)
else:
use_valset = cython_loops.cast_int(1)

Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def build_extensions(self):
e.extra_compile_args = ['/openmp', '/O2']
else: # gcc and clang
for e in self.extensions:
e.extra_compile_args = ['-fopenmp', '-O3', '-march=native']
e.extra_compile_args = ['-fopenmp', '-O2', '-march=native']
e.extra_link_args = ['-fopenmp']
### Comment: -Ofast gives worse speed than -O2 or -O3
build_ext.build_extensions(self)
Expand All @@ -29,7 +29,7 @@ def build_extensions(self):
'scipy',
'cython'
],
version = '0.2.2.11',
version = '0.2.2.12',
description = 'Hierarchical Poisson matrix factorization for recommender systems',
author = 'David Cortes',
author_email = 'david.cortes.rivera@gmail.com',
Expand Down

0 comments on commit f251401

Please sign in to comment.