Skip to content

Commit

Permalink
don't use dot access for pandas columns
Browse files Browse the repository at this point in the history
  • Loading branch information
david-cortes committed Feb 24, 2024
1 parent a7b33cc commit 4412301
Showing 1 changed file with 14 additions and 14 deletions.
28 changes: 14 additions & 14 deletions hpfrec/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,8 +475,8 @@ def _process_data(self, input_df):
self.input_df = self.input_df.loc[~obs_zero]

if self.reindex:
self.input_df["UserId"], self.user_mapping_ = pd.factorize(self.input_df.UserId)
self.input_df["ItemId"], self.item_mapping_ = pd.factorize(self.input_df.ItemId)
self.input_df["UserId"], self.user_mapping_ = pd.factorize(self.input_df["UserId"])
self.input_df["ItemId"], self.item_mapping_ = pd.factorize(self.input_df["ItemId"])
self.user_mapping_ = np.require(self.user_mapping_, requirements=["ENSUREARRAY"]).reshape(-1)
self.item_mapping_ = np.require(self.item_mapping_, requirements=["ENSUREARRAY"]).reshape(-1)
self.nusers = self.user_mapping_.shape[0]
Expand All @@ -488,8 +488,8 @@ def _process_data(self, input_df):
pd.Series(self.item_mapping_).to_csv(os.path.join(self.save_folder, 'items.csv'), index=False)
else:
if calc_n:
self.nusers = self.input_df.UserId.max() + 1
self.nitems = self.input_df.ItemId.max() + 1
self.nusers = self.input_df["UserId"].max() + 1
self.nitems = self.input_df["ItemId"].max() + 1

if self.save_folder is not None:
with open(os.path.join(self.save_folder, "hyperparameters.txt"), "w") as pf:
Expand All @@ -507,11 +507,11 @@ def _process_data(self, input_df):

cython_loops = cython_loops_float if self.use_float else cython_loops_double
if self.input_df['Count'].dtype != cython_loops.c_real_t:
self.input_df['Count'] = self.input_df.Count.astype(cython_loops.c_real_t)
self.input_df['Count'] = self.input_df["Count"].astype(cython_loops.c_real_t)
if self.input_df['UserId'].dtype != cython_loops.obj_ind_type:
self.input_df['UserId'] = self.input_df.UserId.astype(cython_loops.obj_ind_type)
self.input_df['UserId'] = self.input_df["UserId"].astype(cython_loops.obj_ind_type)
if self.input_df['ItemId'].dtype != cython_loops.obj_ind_type:
self.input_df['ItemId'] = self.input_df.ItemId.astype(cython_loops.obj_ind_type)
self.input_df['ItemId'] = self.input_df["ItemId"].astype(cython_loops.obj_ind_type)

if self.users_per_batch != 0:
if self.nusers < self.users_per_batch:
Expand Down Expand Up @@ -558,9 +558,9 @@ def _process_valset(self, val_set, valset=True):
self.val_set = self.val_set.loc[~obs_zero]

if self.reindex:
self.val_set['UserId'] = pd.Categorical(self.val_set.UserId, self.user_mapping_).codes
self.val_set['ItemId'] = pd.Categorical(self.val_set.ItemId, self.item_mapping_).codes
self.val_set = self.val_set.loc[(self.val_set.UserId != (-1)) & (self.val_set.ItemId != (-1))]
self.val_set['UserId'] = pd.Categorical(self.val_set["UserId"], self.user_mapping_).codes
self.val_set['ItemId'] = pd.Categorical(self.val_set["ItemId"], self.item_mapping_).codes
self.val_set = self.val_set.loc[(self.val_set["UserId"] != (-1)) & (self.val_set["ItemId"] != (-1))]
if self.val_set.shape[0] == 0:
if valset:
warnings.warn("Validation set has no combinations of users and items"+
Expand All @@ -577,11 +577,11 @@ def _process_valset(self, val_set, valset=True):

cython_loops = cython_loops_float if self.use_float else cython_loops_double
if self.val_set['Count'].dtype != cython_loops.c_real_t:
self.val_set['Count'] = self.val_set.Count.astype(cython_loops.c_real_t)
self.val_set['Count'] = self.val_set["Count"].astype(cython_loops.c_real_t)
if self.val_set['UserId'].dtype != cython_loops.obj_ind_type:
self.val_set['UserId'] = self.val_set.UserId.astype(cython_loops.obj_ind_type)
self.val_set['UserId'] = self.val_set["UserId"].astype(cython_loops.obj_ind_type)
if self.val_set['ItemId'].dtype != cython_loops.obj_ind_type:
self.val_set['ItemId'] = self.val_set.ItemId.astype(cython_loops.obj_ind_type)
self.val_set['ItemId'] = self.val_set["ItemId"].astype(cython_loops.obj_ind_type)
return None

def _store_metadata(self, for_partial_fit=False):
Expand Down Expand Up @@ -703,7 +703,7 @@ def _process_data_single(self, counts_df):
raise ValueError("Can only make calculations for items that were in the training set.")
else:
counts_df["ItemId"] = pd.Categorical(counts_df["ItemId"].to_numpy(copy=False), self.item_mapping_).codes
if (counts_df.ItemId == -1).sum() > 0:
if (counts_df["ItemId"] == -1).sum() > 0:
raise ValueError("Can only make calculations for items that were in the training set.")

cython_loops = cython_loops_float if self.use_float else cython_loops_double
Expand Down

0 comments on commit 4412301

Please sign in to comment.