In [1]:
import pandas as pd, numpy as np
from hpfrec import HPF

In [2]:
## Generating sample counts data
nusers = 10**2
nitems = 10**2
nobs = 10**4

np.random.seed(1)
counts_df = pd.DataFrame({
    'UserId' : np.random.randint(nusers, size=nobs),
    'ItemId' : np.random.randint(nitems, size=nobs),
    'Count' : np.random.gamma(1,1, size=nobs).astype('int32')
    })
counts_df = counts_df.loc[counts_df.Count > 0].reset_index(drop=True)

## Training on full data

In [3]:
## Full function call
recommender = HPF(
    k=30, a=0.3, a_prime=0.3, b_prime=1.0,
    c=0.3, c_prime=0.3, d_prime=1.0, ncores=-1,
    stop_crit='train-llk', check_every=10, stop_thr=1e-3,
    users_per_batch=None, items_per_batch=None, step_size=lambda x: 1/np.sqrt(x+2),
    maxiter=100, reindex=False, verbose=True,
    random_seed = None, allow_inconsistent_math=False, full_llk=False,
    alloc_full_phi=False, keep_data=True, save_folder=None,
    produce_dicts=True, keep_all_objs=True, sum_exp_trick=False
)
recommender.fit(counts_df)

**********************************
Hierarchical Poisson Factorization
**********************************

Number of users: 100
Number of items: 100
Latent factors to use: 30

Initializing parameters...
Allocating Phi matrix...
Initializing optimization procedure...
Iteration 10 | train llk: -6579 | train rmse: 1.1295
Iteration 20 | train llk: -6134 | train rmse: 1.0969
Iteration 30 | train llk: -6011 | train rmse: 1.0904
Iteration 40 | train llk: -5952 | train rmse: 1.0901
Iteration 50 | train llk: -5913 | train rmse: 1.0872
Iteration 60 | train llk: -5890 | train rmse: 1.0863
Iteration 70 | train llk: -5869 | train rmse: 1.0861
Iteration 80 | train llk: -5848 | train rmse: 1.0859
Iteration 90 | train llk: -5830 | train rmse: 1.0861
Iteration 100 | train llk: -5813 | train rmse: 1.0848


Optimization finished
Final log-likelihood: -5813
Final RMSE: 1.0848
Minutes taken (optimization part): 0.1



<hpfrec.HPF at 0x7ff361dbdda0>

## Getting factors and recommendations for a new user 

In [4]:
nobs_new = 20
np.random.seed(2)
counts_df_new = pd.DataFrame({
	'ItemId' : np.random.choice(np.arange(nitems), size=nobs_new, replace=False),
	'Count' : np.random.gamma(1,1, size=nobs_new).astype('int32')
	})
counts_df_new = counts_df_new.loc[counts_df_new.Count > 0].reset_index(drop=True)
recommender.predict_factors(counts_df_new)

array([8.041132  , 0.01608876, 0.5535319 , 0.02366297, 2.0545833 ,
       0.05637857, 0.02476999, 0.01712107, 0.02405872, 0.01954543,
       9.974459  , 0.02056715, 0.02559502, 0.01939566, 0.01619562,
       0.01949963, 0.02690121, 3.9166534 , 0.9496915 , 0.02275413,
       0.02686422, 0.02180668, 0.02004912, 0.01471848, 0.07501479,
       0.01603007, 1.9092509 , 0.02013228, 0.01533022, 0.02449144],
      dtype=float32)

In [6]:
recommender.add_user(user_id=nusers+1, counts_df=counts_df_new)
recommender.topN(user=nusers, n=10, exclude_seen=True)

array([50, 24, 63, 78, 22,  9, 43, 68, 12, 87])

### Pickle'ing the object

In [8]:
import pickle
with open('recommender.pkl', 'wb') as fp:
    recommender.step_size = None
    pickle.dump(recommender, fp)

In [9]:
rec = None
with open('recommender.pkl', 'rb') as fp:
    rec = pickle.load(fp)

In [19]:
rec.add_user(user_id=nusers+2, counts_df=counts_df_new)
rec.topN(user=nusers+1, n=10, exclude_seen=True)

array([50, 24, 63, 78, 22,  9, 43, 68, 12, 87])

### Working with partial data

In [4]:
recommender = HPF(reindex=False, keep_data=False)
users_batch1 = np.unique(np.random.randint(10**2, size=20))
users_batch2 = np.unique(np.random.randint(10**2, size=20))
users_batch3 = np.unique(np.random.randint(10**2, size=20))
recommender.partial_fit(counts_df.loc[counts_df.UserId.isin(users_batch1)], nusers=10**2, nitems=10**2)
recommender.partial_fit(counts_df.loc[counts_df.UserId.isin(users_batch2)])
recommender.partial_fit(counts_df.loc[counts_df.UserId.isin(users_batch3)])

<hpfrec.HPF at 0x7fe8fcf05748>