Skip to content

Commit

Permalink
v0.0.14
Browse files Browse the repository at this point in the history
  • Loading branch information
bogliosimone committed Apr 14, 2019
1 parent f639888 commit 67a3548
Show file tree
Hide file tree
Showing 7 changed files with 212 additions and 15 deletions.
4 changes: 2 additions & 2 deletions README.md
Expand Up @@ -66,8 +66,8 @@ urm = sim.normalization.bm25(urm)
# train the model with 50 knn per item
model = sim.cosine(urm.T, k=50)

# recommend 100 items to users 1, 14 and 8
user_recommendations = sim.dot_product(urm, model.T, target_rows=[1,14,8], k=100)
# recommend 100 items to users 1, 14 and 8 filtering the items already seen by each users
user_recommendations = sim.dot_product(urm, model.T, k=100, target_rows=[1,14,8], filter_cols=urm)

```

Expand Down
6 changes: 6 additions & 0 deletions guide/temp_guide.md
Expand Up @@ -16,6 +16,12 @@ This is a first guide, full documentation coming soon.

**_target_rows_** : if setted to *None* it compute the whole matrix otherwise it compute only the targets rows (default *[target_rows=None]*)

**_target_cols_** : if setted to *None* it compute the whole matrix otherwise it compute only the targets cols before apply KNN; _target_cols_ could be an array/list with the indexes of the columns that you want keep, so in this way is applied equally on all the rows, or it could be a sparse matrix where the ones identifies the columns that you want keep, so each row in the similarity is match with the corresponding row in the _target_cols_ matrix(default *[target_cols=None]*)
_Note_: this filter could slow down the computation.

**_filter_cols_** : if setted to *None* it compute the whole matrix otherwise it filter the targets cols before apply the KNN; _filter_cols_ could be an array/list with the indexes of the columns that you want filter, so in this way is applied equally on all the rows, or it could be a sparse matrix where the ones identifies the columns that you want filter, so each row in the similarity is match with the corresponding row in the _filter_cols_ matrix(default *[filter_cols=None]*)
_Note_: this filter could slow down the computation.

**_verbose_** : *True* show progress bar, *False* hide progress bar (default *[verbose=True]*)

**_format_output_** : output format for the model matrix, support values are *coo* and *csr* (default *[format_output='coo']*)
Expand Down
2 changes: 1 addition & 1 deletion similaripy/__init__.py
@@ -1,5 +1,5 @@
from .similarity import *

__version__ = '0.0.13'
__version__ = '0.0.14'

__all__ = []
58 changes: 56 additions & 2 deletions similaripy/cython_code/s_plus.h
Expand Up @@ -53,7 +53,12 @@ class SparseMatrixMultiplier {
Value l1, Value l2, Value l3, // weights tversky and cosine and depop
Value t1, Value t2, // tversky coefficients
Value c1, Value c2, // cosine exponents
Value shrink, Value threshold)
Value shrink, Value threshold,
Index filter_mode,
Index * filter_m_indptr, Index * filter_m_indices,
Index target_col_mode,
Index * target_col_m_indptr, Index * target_col_m_indices
)
:
sums(column_count, 0),
nonzeros(column_count, -1),
Expand All @@ -65,6 +70,12 @@ class SparseMatrixMultiplier {
t1(t1), t2(t2),
c1(c1), c2(c2),
shrink(shrink), threshold(threshold),
filter_mode(filter_mode),
filter_m_indptr(filter_m_indptr),
filter_m_indices(filter_m_indices),
target_col_mode(target_col_mode),
target_col_m_indptr(target_col_m_indptr),
target_col_m_indices(target_col_m_indices),
head(-2), length(0) {
}

Expand All @@ -91,6 +102,8 @@ class SparseMatrixMultiplier {
Index col = head;
Value xy = sums[col];
Value valTversky=0, valCosine=0, valDepop=0, val=xy;
bool filter = false;
bool target_col = true;

if(l1!=0) // tversky
valTversky = l1 * (t1 * (Xtversky[row] - xy) + t2 * (Ytversky[col] - xy) + xy);
Expand All @@ -103,7 +116,43 @@ class SparseMatrixMultiplier {
if(l1!=0 || l2!=0 || l3!=0 ||shrink!=0)
val = xy/(valTversky + valCosine + valDepop + shrink);

if (val >= threshold)
// filter cols, filter_mode = 0:none, 1:array, 2:matrix
if (filter_mode !=0 ){
if(filter_mode == 1){
Index start = filter_m_indptr[0];
Index end = filter_m_indptr[1];
if(std::binary_search(&filter_m_indices[start], &filter_m_indices[end], col))
filter = true;
}
else{
if(filter_mode == 2){
Index start = filter_m_indptr[row];
Index end = filter_m_indptr[row+1];
if(std::binary_search(&filter_m_indices[start], &filter_m_indices[end], col))
filter = true;
}
}
}

// keep only the target cols, target_mode = 0:all, 1:array, 2:matrix
if (target_col_mode !=0 ){
if(target_col_mode == 1){
Index start = target_col_m_indptr[0];
Index end = target_col_m_indptr[1];
if(!std::binary_search(&target_col_m_indices[start], &target_col_m_indices[end], col))
target_col = false;
}
else{
if(target_col_mode == 2){
Index start = target_col_m_indptr[row];
Index end = target_col_m_indptr[row+1];
if(!std::binary_search(&target_col_m_indices[start], &target_col_m_indices[end], col))
target_col = false;
}
}
}

if (val >= threshold && !filter && target_col)
f(col, val);
// clear up memory and advance linked list
head = nonzeros[head];
Expand All @@ -128,7 +177,12 @@ class SparseMatrixMultiplier {
Value c1, c2;
Value shrink, threshold;
Index row;
Index filter_mode;
Index * filter_m_indptr, * filter_m_indices;
Index target_col_mode;
Index * target_col_m_indptr, * target_col_m_indices;
Index head, length;

};
} // namespace similarity
#endif // SPLUS
86 changes: 78 additions & 8 deletions similaripy/cython_code/s_plus.pyx
Expand Up @@ -32,7 +32,14 @@ cdef extern from "s_plus.h" namespace "s_plus" nogil:
Value l1, Value l2, Value l3,
Value t1, Value t2,
Value c1, Value c2,
Value shrink, Value threshold)
Value shrink, Value threshold,
Index filter_mode,
Index * filter_m_indptr,
Index * filter_m_indices,
Index target_col_mode,
Index * target_col_m_indptr,
Index * target_col_m_indices
)
void add(Index index, Value value)
void setIndexRow(Index index)
void foreach[Function](Function & f)
Expand All @@ -55,21 +62,28 @@ def s_plus(
unsigned int k=100, float shrink=0, float threshold=0,
binary=False,
target_rows=None,
filter_cols=None,
target_cols=None,
verbose=True,
format_output='csr',
int num_threads=0):

assert sp.issparse(matrix1), 'matrix m1 must be a sparse matrix'
# if receive only matrix1 in input
if matrix2 is None:
matrix2=matrix1.T
assert sp.issparse(matrix2), 'matrix m2 must be a sparse matrix'

# check that all parameters are consistent
assert(matrix1.shape[1]==matrix2.shape[0])
assert(len(weight_depop_matrix1)==matrix1.shape[0] or weight_depop_matrix1 in ('none','sum'))
assert(len(weight_depop_matrix2)==matrix2.shape[1] or weight_depop_matrix2 in ('none','sum'))
assert(target_rows is None or len(target_rows)<=matrix1.shape[0])
assert(verbose==True or verbose==False)
assert(format_output=='coo' or format_output=='csr')
assert matrix1.shape[1]==matrix2.shape[0], 'error shape matrixs'
assert k >= 1 and k <= matrix2.shape[1], 'k must be >=1 and <= m2.shape[1]'
assert len(weight_depop_matrix1)==matrix1.shape[0] or weight_depop_matrix1 in ('none','sum'), 'error format weighs_depop matrix1'
assert len(weight_depop_matrix2)==matrix2.shape[1] or weight_depop_matrix2 in ('none','sum'), 'error format weighs_depop matrix2'
assert target_rows is None or len(target_rows)<=matrix1.shape[0], 'error target rows'
assert filter_cols is None or sp.issparse(filter_cols) or isinstance(filter_cols,(list,np.ndarray)), 'error format filter_cols'
assert target_cols is None or sp.issparse(target_cols) or isinstance(target_cols,(list,np.ndarray)), 'error format target_cols'
assert verbose==True or verbose==False, 'verbose must be boolean'
assert format_output=='coo' or format_output=='csr', 'output format must be \'coo\' or \'csr\''

# build target rows (only the row that must be computed)
if target_rows is None:
Expand Down Expand Up @@ -168,6 +182,57 @@ def s_plus(

### END OF PREPROCESSING ###

# filter col matrix
# mode: 0 no filter, 1 filter array, 2 filter matrix
cdef int filter_col_mode
cdef int[:] filter_m_indptr
cdef int[:] filter_m_indices

if sp.issparse(filter_cols) and filter_cols.data.shape[0] != 0:
assert filter_cols.shape == (item_count, user_count), 'shape filter_cols matrix not correct'
filter_col_mode = 2
# build indices and indptrs and sort indices since we will use binary search
filter_cols = filter_cols.tocsr()
filter_cols.eliminate_zeros()
filter_cols.sort_indices()
filter_m_indptr = np.array(filter_cols.indptr, dtype=np.int32)
filter_m_indices = np.array(filter_cols.indices, dtype=np.int32)
elif isinstance(filter_cols, (list, np.ndarray)) and len(filter_cols) != 0:
filter_col_mode = 1
# sort array since we will use binary search
filter_m_indptr = np.array([0,len(filter_cols)], dtype=np.int32)
filter_m_indices = np.array(filter_cols, dtype=np.int32)
else:
# filter cols is empty or None
filter_col_mode = 0
filter_m_indptr = np.array([],dtype=np.int32)
filter_m_indices = np.array([],dtype=np.int32)

# target col matrix
# mode: 0 target all, 1 target array, 2 target matrix
cdef int target_col_mode = 0
cdef int[:] target_m_indptr
cdef int[:] target_m_indices

if sp.issparse(target_cols):
assert target_cols.shape == (item_count, user_count), 'shape target_cols matrix not correct'
target_col_mode = 2
# build indices and indptrs and sort indices since we will use binary search
target_cols = target_cols.tocsr()
target_cols.eliminate_zeros()
target_cols.sort_indices()
target_m_indptr = np.array(target_cols.indptr, dtype=np.int32)
target_m_indices = np.array(target_cols.indices, dtype=np.int32)
elif isinstance(target_cols, (list, np.ndarray)):
target_col_mode = 1
target_m_indptr = np.array([0,len(target_cols)], dtype=np.int32)
target_m_indices = np.array(target_cols, dtype=np.int32)
else:
# target cols is None
target_col_mode = 0
target_m_indptr = np.array([],dtype=np.int32)
target_m_indices = np.array([],dtype=np.int32)

# set progress bar
cdef int counter = 0
cdef int * counter_add = address(counter)
Expand Down Expand Up @@ -198,7 +263,12 @@ def s_plus(
l1, l2, l3,
t1, t2,
c1, c2,
shrink, threshold)
shrink, threshold,
filter_col_mode,
&filter_m_indptr[0], &filter_m_indices[0],
target_col_mode,
&target_m_indptr[0], &target_m_indices[0],
)
topk = new TopK[int, float](k)
try:
for i in prange(n_targets, schedule='dynamic'):
Expand Down

0 comments on commit 67a3548

Please sign in to comment.