-
Notifications
You must be signed in to change notification settings - Fork 31
/
cmfrec.h
2196 lines (2162 loc) · 77.7 KB
/
cmfrec.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*******************************************************************************
Collective Matrix Factorization
-------------------------------
This is a module for multi-way factorization of sparse and dense matrices
intended to be used for recommender system with explicit feedback data plus
side information about users and/or items.
The reference papers are:
(a) Cortes, David.
"Cold-start recommendations in Collective Matrix Factorization."
arXiv preprint_t arXiv:1809.00366 (2018).
(b) Singh, Ajit P., and Geoffrey J. Gordon.
"Relational learning via collective matrix factorization."
Proceedings of the 14th ACM SIGKDD international conference on
Knowledge discovery and data mining. 2008.
(c) Hu, Yifan, Yehuda Koren, and Chris Volinsky.
"Collaborative filtering for implicit feedback datasets."
2008 Eighth IEEE International Conference on Data Mining.
Ieee, 2008.
(d) Takacs, Gabor, Istvan Pilaszy, and Domonkos Tikk.
"Applications of the conjugate gradient method for
implicit feedback collaborative filtering."
Proceedings of the fifth ACM conference on
Recommender systems. 2011.
(e) Rendle, Steffen, Li Zhang, and Yehuda Koren.
"On the difficulty of evaluating baselines:
A study on recommender systems."
arXiv preprint arXiv:1905.01395 (2019).
For information about the models offered here and how they are fit to
the data, see the files 'collective.c' and 'offsets.c'.
Written for C99 standard and OpenMP version 2.0 or higher, and aimed to be
used either as a stand-alone program, or wrapped into scripting languages
such as Python and R.
<https://www.github.com/david-cortes/cmfrec>
MIT License:
Copyright (c) 2020 David Cortes
All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
*******************************************************************************/
#ifdef __cplusplus
extern "C" {
#endif
#include <stddef.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <math.h>
#include <signal.h>
#ifndef _FOR_R
#include <stdio.h>
#endif
#ifdef _OPENMP
#include <omp.h>
#else
#define omp_get_thread_num() (0)
#endif
#ifdef _FOR_PYTHON
/* This contains the standard cblas.h header */
#ifdef USE_FINDBLAS
#include "findblas.h" /* https://www.github.com/david-cortes/findblas */
#endif
#elif defined(_FOR_R)
#include <R.h>
#include <Rinternals.h>
#include <R_ext/Print.h>
#include <R_ext/BLAS.h>
#include <R_ext/Lapack.h>
#define USE_DOUBLE
#define FORCE_NO_NAN_PROPAGATION
#define printf Rprintf
#define fprintf(f, message) REprintf(message)
#elif defined(MKL_ILP64)
#include "mkl.h"
#endif
/* Here one may also include the standard headers "cblas.h" and "lapack.h",
if one wants to use a non-standard version such as ILP64 (-DMKL_ILP64). */
/* Aliasing for compiler optimizations */
#ifdef __cplusplus
#if defined(__GNUG__) || defined(__GNUC__) || defined(_MSC_VER) || defined(__clang__) || defined(__INTEL_COMPILER)
#define restrict __restrict
#else
#define restrict
#endif
#elif defined(_MSC_VER)
#define restrict __restrict
#elif !defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L)
#define restrict
#endif
/* OpenMP < 3.0 (e.g. MSVC as of 2020) does not support parallel for's with unsigned iterators,
and does not support declaring the iterator type in the loop itself */
#ifdef _OPENMP
#if (_OPENMP < 200801) || defined(_WIN32) || defined(_WIN64) /* OpenMP < 3.0 */
#define size_t_for
#else
#define size_t_for size_t
#endif
#else
#define size_t_for size_t
#endif
#ifndef isnan
#ifdef _isnan
#define isnan _isnan
#else
#define isnan(x) ( (x) != (x) )
#endif
#endif
#ifdef _FOR_R
#define NAN_ NA_REAL
#else
#define NAN_ NAN
#endif
#if !defined(USE_FLOAT)
#define LBFGS_FLOAT 64
#define real_t double
#define exp_t exp
#define cblas_tdot cblas_ddot
#define cblas_tcopy cblas_dcopy
#define cblas_taxpy cblas_daxpy
#define cblas_tscal cblas_dscal
#define cblas_tsyr cblas_dsyr
#define cblas_tsyrk cblas_dsyrk
#define cblas_tnrm2 cblas_dnrm2
#define cblas_tgemm cblas_dgemm
#define cblas_tgemv cblas_dgemv
#define cblas_tsymv cblas_dsymv
#define tlacpy_ dlacpy_
#define tposv_ dposv_
#define tlarnv_ dlarnv_
#define tpotrf_ dpotrf_
#define tpotrs_ dpotrs_
#define tgelsd_ dgelsd_
#else
#define LBFGS_FLOAT 32
#define real_t float
#define exp_t expf
#define cblas_tdot cblas_sdot
#define cblas_tcopy cblas_scopy
#define cblas_taxpy cblas_saxpy
#define cblas_tscal cblas_sscal
#define cblas_tsyr cblas_ssyr
#define cblas_tsyrk cblas_ssyrk
#define cblas_tnrm2 cblas_snrm2
#define cblas_tgemm cblas_sgemm
#define cblas_tgemv cblas_sgemv
#define cblas_tsymv cblas_ssymv
#define tlacpy_ slacpy_
#define tposv_ sposv_
#define tlarnv_ slarnv_
#define tpotrf_ spotrf_
#define tpotrs_ spotrs_
#define tgelsd_ sgelsd_
#endif
#if !defined(USE_INT64) && !defined(MKL_ILP64)
#define int_t int
#else
#define ILP64
#include <inttypes.h>
#define int_t int64_t
#endif
#if !defined(LAPACK_H) && !defined(_FOR_R)
void tposv_(const char*, const int_t*, const int_t*, const real_t*, const int_t*, const real_t*, const int_t*, const int_t*);
void tlacpy_(const char*, const int_t*, const int_t*, const real_t*, const int_t*, const real_t*, const int_t*);
void tlarnv_(const int_t*, const int_t*, const int_t*, const real_t*);
void tpotrf_(const char*, const int_t*, const real_t*, const int_t*, const int_t*);
void tpotrs_(const char*, const int_t*, const int_t*, const real_t*, const int_t*, const real_t*, const int_t*, const int_t*);
void tgelsd_(const int_t*, const int_t*, const int_t*,
const real_t*, const int_t*,
const real_t*, const int_t*,
const real_t*, const real_t*, const int_t*, const real_t*,
const int_t*, const int_t*, const int_t*);
#endif
#ifndef CBLAS_H
typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;
typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
typedef CBLAS_ORDER CBLAS_LAYOUT;
real_t cblas_tdot(const int_t n, const real_t *x, const int_t incx, const real_t *y, const int_t incy);
void cblas_tcopy(const int_t n, const real_t *x, const int_t incx, real_t *y, const int_t incy);
void cblas_taxpy(const int_t n, const real_t alpha, const real_t *x, const int_t incx, real_t *y, const int_t incy);
void cblas_tscal(const int_t N, const real_t alpha, real_t *X, const int_t incX);
void cblas_tsyr(const CBLAS_ORDER order, const CBLAS_UPLO Uplo, const int_t N, const real_t alpha, const real_t *X, const int_t incX, real_t *A, const int_t lda);
void cblas_tsyrk(const CBLAS_ORDER Order, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE Trans,
const int_t N, const int_t K, const real_t alpha, const real_t *A, const int_t lda, const real_t beta, real_t *C, const int_t ldc);
real_t cblas_tnrm2 (const int_t N, const real_t *X, const int_t incX);
void cblas_tgemm(const CBLAS_ORDER Order, const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, const int_t M, const int_t N, const int_t K,
const real_t alpha, const real_t *A, const int_t lda, const real_t *B, const int_t ldb, const real_t beta, real_t *C, const int_t ldc);
void cblas_tgemv(const CBLAS_ORDER order, const CBLAS_TRANSPOSE trans, const int_t m, const int_t n,
const real_t alpha, const real_t *a, const int_t lda, const real_t *x, const int_t incx, const real_t beta, real_t *y, const int_t incy);
void cblas_tsymv(const CBLAS_ORDER order, const CBLAS_UPLO Uplo, const int_t N, const real_t alpha, const real_t *A,
const int_t lda, const real_t *X, const int_t incX, const real_t beta, real_t *Y, const int_t incY);
#endif
#include "lbfgs.h"
#define square(x) ( (x) * (x) )
#define cap_to_4(x) (((x) > 4)? 4 : (x))
#define max2(a, b) ((a) >= ((b))? (a) : (b))
#define min2(a, b) ((a) <= ((b))? (a) : (b))
#define set_to_zero(arr, n) memset((arr), 0, (size_t)(n)*sizeof(real_t))
#define copy_arr(from, to, n) memcpy((to), (from), (size_t)(n)*sizeof(real_t))
/* helpers.c */
void set_to_zero_(real_t *arr, const size_t n, int_t nthreads);
void copy_arr_(real_t *restrict src, real_t *restrict dest, size_t n, int_t nthreads);
int_t count_NAs(real_t arr[], size_t n, int_t nthreads);
void count_NAs_by_row
(
real_t *restrict arr, int_t m, int_t n,
int_t *restrict cnt_NA, int_t nthreads,
bool *restrict full_dense, bool *restrict near_dense
);
void count_NAs_by_col
(
real_t *restrict arr, int_t m, int_t n,
int_t *restrict cnt_NA,
bool *restrict full_dense, bool *restrict near_dense
);
void sum_by_rows(real_t *restrict A, real_t *restrict outp, int_t m, int_t n, int_t nthreads);
void sum_by_cols(real_t *restrict A, real_t *restrict outp, int_t m, int_t n, size_t lda, int_t nthreads);
void mat_plus_rowvec(real_t *restrict A, real_t *restrict b, int_t m, int_t n, int_t nthreads);
void mat_plus_colvec(real_t *restrict A, real_t *restrict b, real_t alpha, int_t m, int_t n, size_t lda, int_t nthreads);
void mat_minus_rowvec2
(
real_t *restrict Xfull,
int_t ixA[], int_t ixB[], real_t *restrict X, size_t nnz,
real_t *restrict b, int_t m, int_t n, int_t nthreads
);
void mat_minus_colvec2
(
real_t *restrict Xfull,
int_t ixA[], int_t ixB[], real_t *restrict X, size_t nnz,
real_t *restrict b, int_t m, int_t n, int_t nthreads
);
void nan_to_zero(real_t *restrict arr, real_t *restrict comp, size_t n, int_t nthreads);
void mult_if_non_nan(real_t *restrict arr, real_t *restrict comp, real_t *restrict w, size_t n, int_t nthreads);
void mult_elemwise(real_t *restrict inout, real_t *restrict other, size_t n, int_t nthreads);
real_t sum_squares(real_t *restrict arr, size_t n, int_t nthreads);
void taxpy_large(real_t *restrict A, real_t x, real_t *restrict Y, size_t n, int_t nthreads);
void tscal_large(real_t *restrict arr, real_t alpha, size_t n, int_t nthreads);
int_t rnorm(real_t *restrict arr, size_t n, int_t seed, int_t nthreads);
void rnorm_preserve_seed(real_t *restrict arr, size_t n, int_t seed_arr[4]);
void process_seed_for_larnv(int_t seed_arr[4]);
void reduce_mat_sum(real_t *restrict outp, size_t lda, real_t *restrict inp,
int_t m, int_t n, int_t nthreads);
void exp_neg_x(real_t *restrict arr, size_t n, int_t nthreads);
void add_to_diag(real_t *restrict A, real_t val, size_t n);
real_t sum_sq_div_w(real_t *restrict arr, real_t *restrict w, size_t n, bool compensated, int_t nthreads);
void tgemm_sp_dense
(
int_t m, int_t n, real_t alpha,
size_t indptr[], int_t indices[], real_t values[],
real_t DenseMat[], size_t ldb,
real_t OutputMat[], size_t ldc,
int_t nthreads
);
void tgemv_dense_sp
(
int_t m, int_t n,
real_t alpha, real_t DenseMat[], size_t lda,
int_t ixB[], real_t vec_sp[], size_t nnz,
real_t OutputVec[]
);
void tgemv_dense_sp_weighted
(
int_t m, int_t n,
real_t alpha[], real_t DenseMat[], size_t lda,
int_t ixB[], real_t vec_sp[], size_t nnz,
real_t OutputVec[]
);
void tgemv_dense_sp_weighted2
(
int_t m, int_t n,
real_t alpha[], real_t alpha2, real_t DenseMat[], size_t lda,
int_t ixB[], real_t vec_sp[], size_t nnz,
real_t OutputVec[]
);
void tgemv_dense_sp_notrans
(
int_t m, int_t n,
real_t DenseMat[], int_t lda,
int_t ixB[], real_t vec_sp[], size_t nnz,
real_t OutputVec[]
);
void copy_mat
(
int_t m, int_t n,
real_t *restrict A, int_t lda,
real_t *restrict B, int_t ldb
);
void sum_mat
(
size_t m, size_t n,
real_t *restrict A, size_t lda,
real_t *restrict B, size_t ldb
);
void transpose_mat(real_t *restrict A, size_t m, size_t n, real_t *restrict buffer_real_t);
void transpose_mat2(real_t *restrict A, size_t m, size_t n, real_t *restrict outp);
void transpose_mat3
(
real_t *restrict A, size_t lda,
size_t m, size_t n,
real_t *restrict outp, size_t ldb
);
int_t coo_to_csr_plus_alloc
(
int_t *restrict Xrow, int_t *restrict Xcol, real_t *restrict Xval,
real_t *restrict W,
int_t m, int_t n, size_t nnz,
size_t *restrict *csr_p, int_t *restrict *csr_i, real_t *restrict *csr_v,
real_t *restrict *csr_w
);
void coo_to_csr
(
int_t *restrict Xrow, int_t *restrict Xcol, real_t *restrict Xval,
real_t *restrict W,
int_t m, int_t n, size_t nnz,
size_t *restrict csr_p, int_t *restrict csr_i, real_t *restrict csr_v,
real_t *restrict csr_w
);
void coo_to_csr_and_csc
(
int_t *restrict Xrow, int_t *restrict Xcol, real_t *restrict Xval,
real_t *restrict W, int_t m, int_t n, size_t nnz,
size_t *restrict csr_p, int_t *restrict csr_i, real_t *restrict csr_v,
size_t *restrict csc_p, int_t *restrict csc_i, real_t *restrict csc_v,
real_t *restrict csr_w, real_t *restrict csc_w,
int_t nthreads
);
void row_means_csr(size_t indptr[], real_t *restrict values,
real_t *restrict output, int_t m, int_t nthreads);
extern bool should_stop_procedure;
void set_interrup_global_variable(int_t s);
int_t lbfgs_printer_collective
(
void *instance,
const real_t *x,
const real_t *g,
const real_t fx,
const real_t xnorm,
const real_t gnorm,
const real_t step,
size_t n,
int_t k,
int_t ls
);
int_t lbfgs_printer_offsets
(
void *instance,
const real_t *x,
const real_t *g,
const real_t fx,
const real_t xnorm,
const real_t gnorm,
const real_t step,
size_t n,
int_t k,
int_t ls
);
bool check_is_sorted(int_t arr[], int_t n);
void qs_argpartition(int_t arr[], real_t values[], int_t n, int_t k);
void append_ones_last_col
(
real_t *restrict orig, size_t m, size_t n,
real_t *restrict outp
);
void fill_lower_triangle(real_t A[], size_t n, size_t lda);
void print_oom_message(void);
#ifdef _FOR_R
void R_nan_to_C_nan(real_t arr[], size_t n);
#endif
/* common.c */
real_t fun_grad_cannonical_form
(
real_t *restrict A, int_t lda, real_t *restrict B, int_t ldb,
real_t *restrict g_A, real_t *restrict g_B,
int_t m, int_t n, int_t k,
int_t ixA[], int_t ixB[], real_t *restrict X, size_t nnz,
real_t *restrict Xfull, bool full_dense,
size_t Xcsr_p[], int_t Xcsr_i[], real_t *restrict Xcsr,
size_t Xcsc_p[], int_t Xcsc_i[], real_t *restrict Xcsc,
bool user_bias, bool item_bias,
real_t *restrict biasA, real_t *restrict biasB,
real_t *restrict g_biasA, real_t *restrict g_biasB,
real_t *restrict weight, real_t *restrict weightR, real_t *restrict weightC,
real_t scaling,
real_t *restrict buffer_real_t,
real_t *restrict buffer_mt,
int_t nthreads
);
void factors_closed_form
(
real_t *restrict a_vec, int_t k,
real_t *restrict B, int_t n, int_t ldb,
real_t *restrict Xa_dense, bool full_dense,
real_t *restrict Xa, int_t ixB[], size_t nnz,
real_t *restrict weight,
real_t *restrict buffer_real_t,
real_t lam, real_t lam_last,
real_t *restrict precomputedTransBtBinvBt,
real_t *restrict precomputedBtB, int_t cnt_NA, int_t ld_BtB,
bool BtB_has_diag, bool BtB_is_scaled, real_t scale_BtB, int_t n_BtB,
real_t *restrict precomputedBtBchol, bool NA_as_zero,
bool use_cg, int_t max_cg_steps, /* <- 'cg' should not be used for new data */
bool force_add_diag
);
void factors_explicit_cg
(
real_t *restrict a_vec, int_t k,
real_t *restrict B, int_t n, int_t ldb,
real_t *restrict Xa, int_t ixB[], size_t nnz,
real_t *restrict weight,
real_t *restrict buffer_real_t,
real_t lam, real_t lam_last,
int_t max_cg_steps
);
void factors_explicit_cg_NA_as_zero_weighted
(
real_t *restrict a_vec, int_t k,
real_t *restrict B, int_t n, int_t ldb,
real_t *restrict Xa, int_t ixB[], size_t nnz,
real_t *restrict weight,
real_t *restrict precomputedBtB, int_t ld_BtB,
real_t *restrict buffer_real_t,
real_t lam, real_t lam_last,
int_t max_cg_steps
);
void factors_explicit_cg_dense
(
real_t *restrict a_vec, int_t k,
real_t *restrict B, int_t n, int_t ldb,
real_t *restrict Xa_dense, int_t cnt_NA,
real_t *restrict weight,
real_t *restrict precomputedBtB, int_t ld_BtB,
real_t *restrict buffer_real_t,
real_t lam, real_t lam_last,
int_t max_cg_steps
);
void factors_implicit_cg
(
real_t *restrict a_vec, int_t k,
real_t *restrict B, size_t ldb,
real_t *restrict Xa, int_t ixB[], size_t nnz,
real_t lam,
real_t *restrict precomputedBtB, int_t ld_BtB,
int_t max_cg_steps,
real_t *restrict buffer_real_t
);
void factors_implicit_chol
(
real_t *restrict a_vec, int_t k,
real_t *restrict B, size_t ldb,
real_t *restrict Xa, int_t ixB[], size_t nnz,
real_t lam,
real_t *restrict precomputedBtB, int_t ld_BtB,
bool zero_out,
real_t *restrict buffer_real_t,
bool force_add_diag
);
void factors_implicit
(
real_t *restrict a_vec, int_t k,
real_t *restrict B, size_t ldb,
real_t *restrict Xa, int_t ixB[], size_t nnz,
real_t lam,
real_t *restrict precomputedBtB, int_t ld_BtB,
bool zero_out, bool use_cg, int_t max_cg_steps,
real_t *restrict buffer_real_t,
bool force_add_diag
);
real_t fun_grad_Adense
(
real_t *restrict g_A,
real_t *restrict A, int_t lda,
real_t *restrict B, int_t ldb,
int_t m, int_t n, int_t k,
real_t *restrict Xfull, real_t *restrict weight,
real_t lam, real_t w, real_t lam_last,
bool do_B, bool reset_grad,
int_t nthreads,
real_t *restrict buffer_real_t
);
void add_lam_to_grad_and_fun
(
real_t *restrict fun,
real_t *restrict grad,
real_t *restrict A,
int_t m, int_t k, int_t lda,
real_t lam, int_t nthreads
);
typedef struct data_fun_grad_Adense {
int_t lda;
real_t *B; int_t ldb;
int_t m; int_t n; int_t k;
real_t *Xfull; real_t *weight;
real_t lam; real_t w; real_t lam_last;
int_t nthreads;
real_t *buffer_real_t;
} data_fun_grad_Adense;
typedef struct data_fun_grad_Bdense {
real_t *A; int_t lda;
int_t ldb;
int_t m; int_t n; int_t k;
real_t *Xfull; real_t *weight;
real_t lam; real_t w; real_t lam_last;
int_t nthreads;
real_t *buffer_real_t;
} data_fun_grad_Bdense;
real_t wrapper_fun_grad_Adense
(
void *instance,
real_t *x,
real_t *g,
const size_t n,
const real_t step
);
real_t wrapper_fun_grad_Bdense
(
void *instance,
real_t *x,
real_t *g,
const size_t n,
const real_t step
);
size_t buffer_size_optimizeA
(
size_t n, bool full_dense, bool near_dense, bool do_B,
bool has_dense, bool has_weights, bool NA_as_zero,
size_t k, size_t nthreads,
bool pass_allocated_BtB, bool keep_precomputedBtB,
bool use_cg, bool finalize_chol
);
size_t buffer_size_optimizeA_implicit
(
size_t k, size_t nthreads,
bool pass_allocated_BtB,
bool use_cg, bool finalize_chol
);
void optimizeA
(
real_t *restrict A, int_t lda,
real_t *restrict B, int_t ldb,
int_t m, int_t n, int_t k,
size_t Xcsr_p[], int_t Xcsr_i[], real_t *restrict Xcsr,
real_t *restrict Xfull, int_t ldX, bool full_dense, bool near_dense,
int_t cnt_NA[], real_t *restrict weight, bool NA_as_zero,
real_t lam, real_t lam_last,
bool do_B, bool is_first_iter,
int_t nthreads,
bool use_cg, int_t max_cg_steps,
bool keep_precomputedBtB,
real_t *restrict precomputedBtB, bool *filled_BtB,
real_t *restrict buffer_real_t
);
void optimizeA_implicit
(
real_t *restrict A, size_t lda,
real_t *restrict B, size_t ldb,
int_t m, int_t n, int_t k,
size_t Xcsr_p[], int_t Xcsr_i[], real_t *restrict Xcsr,
real_t lam,
int_t nthreads,
bool use_cg, int_t max_cg_steps, bool force_set_to_zero,
real_t *restrict precomputedBtB,
real_t *restrict buffer_real_t
);
int_t initialize_biases
(
real_t *restrict glob_mean, real_t *restrict biasA, real_t *restrict biasB,
bool user_bias, bool item_bias,
real_t lam_user, real_t lam_item,
int_t m, int_t n,
int_t m_bias, int_t n_bias,
int_t ixA[], int_t ixB[], real_t *restrict X, size_t nnz,
real_t *restrict Xfull, real_t *restrict Xtrans,
size_t Xcsr_p[], int_t Xcsr_i[], real_t *restrict Xcsr,
size_t Xcsc_p[], int_t Xcsc_i[], real_t *restrict Xcsc,
int_t nthreads
);
int_t center_by_cols
(
real_t *restrict col_means,
real_t *restrict Xfull, int_t m, int_t n,
int_t ixA[], int_t ixB[], real_t *restrict X, size_t nnz,
size_t Xcsr_p[], int_t Xcsr_i[], real_t *restrict Xcsr,
size_t Xcsc_p[], int_t Xcsc_i[], real_t *restrict Xcsc,
int_t nthreads
);
bool check_sparse_indices
(
int_t n, int_t p,
real_t *restrict u_vec_sp, int_t u_vec_ixB[], size_t nnz_u_vec,
real_t *restrict Xa, int_t ixB[], size_t nnz
);
void predict_multiple
(
real_t *restrict A, int_t k_user,
real_t *restrict B, int_t k_item,
real_t *restrict biasA, real_t *restrict biasB,
real_t glob_mean,
int_t k, int_t k_main,
int_t m, int_t n,
int_t predA[], int_t predB[], size_t nnz,
real_t *restrict outp,
int_t nthreads
);
int_t cmp_int(const void *a, const void *b);
extern real_t *ptr_real_t_glob;
#if !defined(_WIN32) && !defined(_WIN64)
#pragma omp threadprivate(ptr_real_t_glob)
/* Note: will not be used inside OMP, this is a precausion just in case */
#endif
int_t cmp_argsort(const void *a, const void *b);
int_t topN
(
real_t *restrict a_vec, int_t k_user,
real_t *restrict B, int_t k_item,
real_t *restrict biasB,
real_t glob_mean, real_t biasA,
int_t k, int_t k_main,
int_t *restrict include_ix, int_t n_include,
int_t *restrict exclude_ix, int_t n_exclude,
int_t *restrict outp_ix, real_t *restrict outp_score,
int_t n_top, int_t n, int_t nthreads
);
int_t fit_most_popular
(
real_t *restrict biasA, real_t *restrict biasB,
real_t *restrict glob_mean,
real_t lam_user, real_t lam_item,
real_t alpha,
int_t m, int_t n,
int_t ixA[], int_t ixB[], real_t *restrict X, size_t nnz,
real_t *restrict Xfull,
real_t *restrict weight,
bool implicit, bool adjust_weight,
real_t *restrict w_main_multiplier,
int_t nthreads
);
int_t topN_old_most_popular
(
bool user_bias,
real_t a_bias,
real_t *restrict biasA, int_t row_index,
real_t *restrict biasB,
real_t glob_mean,
int_t *restrict include_ix, int_t n_include,
int_t *restrict exclude_ix, int_t n_exclude,
int_t *restrict outp_ix, real_t *restrict outp_score,
int_t n_top, int_t n
);
int_t predict_X_old_most_popular
(
int_t row[], int_t col[], real_t *restrict predicted, size_t n_predict,
real_t *restrict biasA, real_t *restrict biasB,
real_t glob_mean,
int_t m, int_t n
);
/* collective.c */
void nvars_collective_fun_grad
(
size_t m, size_t n, size_t m_u, size_t n_i, size_t m_ubin, size_t n_ibin,
size_t p, size_t q, size_t pbin, size_t qbin,
size_t nnz, size_t nnz_U, size_t nnz_I,
size_t k, size_t k_main, size_t k_user, size_t k_item,
bool user_bias, bool item_bias, size_t nthreads,
real_t *X, real_t *Xfull,
real_t *U, real_t *Ub, real_t *II, real_t *Ib,
real_t *U_sp, real_t *U_csr, real_t *I_sp, real_t *I_csr,
size_t *nvars, size_t *nbuffer, size_t *nbuffer_mt
);
real_t collective_fun_grad
(
real_t *restrict values, real_t *restrict grad,
int_t m, int_t n, int_t k,
int_t ixA[], int_t ixB[], real_t *restrict X, size_t nnz,
real_t *restrict Xfull,
size_t Xcsr_p[], int_t Xcsr_i[], real_t *restrict Xcsr,
size_t Xcsc_p[], int_t Xcsc_i[], real_t *restrict Xcsc,
real_t *restrict weight, real_t *restrict weightR, real_t *restrict weightC,
bool user_bias, bool item_bias,
real_t lam, real_t *restrict lam_unique,
real_t *restrict U, int_t m_u, int_t p, bool U_has_NA,
real_t *restrict II, int_t n_i, int_t q, bool I_has_NA,
real_t *restrict Ub, int_t m_ubin, int_t pbin, bool Ub_has_NA,
real_t *restrict Ib, int_t n_ibin, int_t qbin, bool Ib_has_NA,
int_t U_row[], int_t U_col[], real_t *restrict U_sp, size_t nnz_U,
int_t I_row[], int_t I_col[], real_t *restrict I_sp, size_t nnz_I,
size_t U_csr_p[], int_t U_csr_i[], real_t *restrict U_csr,
size_t U_csc_p[], int_t U_csc_i[], real_t *restrict U_csc,
size_t I_csr_p[], int_t I_csr_i[], real_t *restrict I_csr,
size_t I_csc_p[], int_t I_csc_i[], real_t *restrict I_csc,
real_t *restrict buffer_real_t, real_t *restrict buffer_mt,
int_t k_main, int_t k_user, int_t k_item,
real_t w_main, real_t w_user, real_t w_item,
int_t nthreads
);
real_t collective_fun_grad_bin
(
real_t *restrict A, int_t lda, real_t *restrict Cb, int_t ldc,
real_t *restrict g_A, real_t *restrict g_Cb,
real_t *restrict Ub,
int_t m, int_t pbin, int_t k,
bool Ub_has_NA, double w_user,
real_t *restrict buffer_real_t,
int_t nthreads
);
real_t collective_fun_grad_single
(
real_t *restrict a_vec, real_t *restrict g_A,
int_t k, int_t k_user, int_t k_item, int_t k_main,
real_t *restrict u_vec, int_t p,
int_t u_vec_ixB[], real_t *restrict u_vec_sp, size_t nnz_u_vec,
real_t *restrict u_bin_vec, int_t pbin,
bool u_vec_has_NA, bool u_bin_vec_has_NA,
real_t *restrict B, int_t n,
real_t *restrict C, real_t *restrict Cb,
real_t *restrict Xa, int_t ixB[], size_t nnz,
real_t *restrict Xa_dense,
real_t *restrict weight,
real_t *restrict buffer_real_t,
real_t lam, real_t w_main, real_t w_user, real_t lam_last
);
typedef struct data_factors_fun_grad {
int_t k; int_t k_user; int_t k_item; int_t k_main;
real_t *u_vec; int_t p;
int_t *u_vec_ixB; real_t *u_vec_sp; size_t nnz_u_vec;
real_t *u_bin_vec; int_t pbin;
bool u_vec_has_NA; bool u_bin_vec_has_NA;
real_t *B; int_t n;
real_t *C; real_t *Cb;
real_t *Xa; int_t *ixB; real_t *weight; size_t nnz;
real_t *Xa_dense;
real_t *buffer_real_t;
real_t lam; real_t w_main; real_t w_user; real_t lam_last;
} data_factors_fun_grad;
real_t wrapper_factors_fun_grad
(
void *instance,
real_t *x,
real_t *g,
const size_t n,
const real_t step
);
int_t collective_factors_lbfgs
(
real_t *restrict a_vec,
int_t k, int_t k_user, int_t k_item, int_t k_main,
real_t *restrict u_vec, int_t p,
int_t u_vec_ixB[], real_t *restrict u_vec_sp, size_t nnz_u_vec,
real_t *restrict u_bin_vec, int_t pbin,
bool u_vec_has_NA, bool u_bin_vec_has_NA,
real_t *restrict B, int_t n,
real_t *restrict C, real_t *restrict Cb,
real_t *restrict Xa, int_t ixB[], real_t *restrict weight, size_t nnz,
real_t *restrict Xa_dense,
real_t *restrict buffer_real_t,
real_t lam, real_t w_main, real_t w_user, real_t lam_last
);
void collective_closed_form_block
(
real_t *restrict a_vec,
int_t k, int_t k_user, int_t k_item, int_t k_main,
real_t *restrict Xa_dense,
real_t *restrict Xa, int_t ixB[], size_t nnz,
int_t u_vec_ixB[], real_t *restrict u_vec_sp, size_t nnz_u_vec,
real_t *restrict u_vec,
bool NA_as_zero_X, bool NA_as_zero_U,
real_t *restrict B, int_t n, int_t ldb,
real_t *restrict C, int_t p,
real_t *restrict Bi, int_t k_main_i, bool add_implicit_features,
real_t *restrict Xones, int_t incXones,
real_t *restrict weight,
real_t lam, real_t w_user, real_t w_implicit, real_t lam_last,
real_t *restrict precomputedBtB, int_t cnt_NA_x,
real_t *restrict precomputedCtCw, int_t cnt_NA_u,
real_t *restrict precomputedBeTBeChol, int_t n_BtB,
real_t *restrict precomputedBiTBi,
bool add_X, bool add_U,
bool use_cg, int_t max_cg_steps,/* <- 'cg' should not be used for new data*/
real_t *restrict buffer_real_t
);
void collective_closed_form_block_implicit
(
real_t *restrict a_vec,
int_t k, int_t k_user, int_t k_item, int_t k_main,
real_t *restrict B, int_t n, real_t *restrict C, int_t p,
real_t *restrict Xa, int_t ixB[], size_t nnz,
real_t *restrict u_vec, int_t cnt_NA_u,
real_t *restrict u_vec_sp, int_t u_vec_ixB[], size_t nnz_u_vec,
bool NA_as_zero_U,
real_t lam, real_t w_user,
real_t *restrict precomputedBeTBe,
real_t *restrict precomputedBtB, /* for cg, should NOT have lambda added */
real_t *restrict precomputedBeTBeChol,
real_t *restrict precomputedCtCw,
bool add_U, bool shapes_match,
bool use_cg, int_t max_cg_steps,
real_t *restrict buffer_real_t
);
void collective_block_cg
(
real_t *restrict a_vec,
int_t k, int_t k_user, int_t k_item, int_t k_main,
real_t *restrict Xa_dense,
real_t *restrict Xa, int_t ixB[], size_t nnz,
int_t u_vec_ixB[], real_t *restrict u_vec_sp, size_t nnz_u_vec,
real_t *restrict u_vec,
bool NA_as_zero_X, bool NA_as_zero_U,
real_t *restrict B, int_t n, int_t ldb,
real_t *restrict C, int_t p,
bool add_implicit_features,
real_t *restrict Xones, int_t incXones,
real_t * restrict Bi, real_t *restrict precomputedBiTBi, int_t k_main_i,
real_t *restrict weight,
real_t lam, real_t w_user, real_t w_implicit, real_t lam_last,
int_t cnt_NA_x, int_t cnt_NA_u,
real_t *restrict precomputedBtB,
real_t *restrict precomputedCtC, /* should NOT be multiplied by 'w_user' */
int_t max_cg_steps,
real_t *restrict buffer_real_t
);
void collective_block_cg_implicit
(
real_t *restrict a_vec,
int_t k, int_t k_user, int_t k_item, int_t k_main,
real_t *restrict Xa, int_t ixB[], size_t nnz,
int_t u_vec_ixB[], real_t *restrict u_vec_sp, size_t nnz_u_vec,
real_t *restrict u_vec,
bool NA_as_zero_U,
real_t *restrict B, int_t n,
real_t *restrict C, int_t p,
real_t lam, real_t w_user,
int_t cnt_NA_u,
int_t max_cg_steps,
real_t *restrict precomputedBtB,
real_t *restrict precomputedCtC,
real_t *restrict buffer_real_t
);
void optimizeA_collective_implicit
(
real_t *restrict A, real_t *restrict B, real_t *restrict C,
int_t m, int_t m_u, int_t n, int_t p,
int_t k, int_t k_main, int_t k_user, int_t k_item,
size_t Xcsr_p[], int_t Xcsr_i[], real_t *restrict Xcsr,
size_t U_csr_p[], int_t U_csr_i[], real_t *restrict U_csr,
real_t *restrict U, int_t cnt_NA_u[],
bool full_dense_u, bool near_dense_u, bool NA_as_zero_U,
real_t lam, real_t w_user,
int_t nthreads,
bool use_cg, int_t max_cg_steps, bool is_first_iter,
real_t *restrict precomputedBtB, /* will not have lambda with CG */
real_t *restrict precomputedBeTBe,
real_t *restrict precomputedBeTBeChol,
real_t *restrict precomputedCtC,
bool *filled_BeTBe,
bool *filled_BeTBeChol,
bool *filled_CtC,
real_t *restrict buffer_real_t
);
int_t collective_factors_cold
(
real_t *restrict a_vec,
real_t *restrict u_vec, int_t p,
real_t *restrict u_vec_sp, int_t u_vec_ixB[], size_t nnz_u_vec,
real_t *restrict u_bin_vec, int_t pbin,
real_t *restrict C, real_t *restrict Cb,
real_t *restrict TransCtCinvCt,
real_t *restrict CtCw,
real_t *restrict col_means,
int_t k, int_t k_user, int_t k_main,
real_t lam, real_t w_main, real_t w_user,
bool NA_as_zero_U
);
int_t collective_factors_cold_implicit
(
real_t *restrict a_vec,
real_t *restrict u_vec, int_t p,
real_t *restrict u_vec_sp, int_t u_vec_ixB[], size_t nnz_u_vec,
real_t *restrict B, int_t n,
real_t *restrict C,
real_t *restrict BeTBe,
real_t *restrict BtB,
real_t *restrict BeTBeChol,
real_t *restrict col_means,
int_t k, int_t k_user, int_t k_item, int_t k_main,
real_t lam, real_t w_main, real_t w_user, real_t w_main_multiplier,
bool NA_as_zero_U
);
int_t collective_factors_warm
(
real_t *restrict a_vec, real_t *restrict a_bias,
real_t *restrict u_vec, int_t p,
real_t *restrict u_vec_sp, int_t u_vec_ixB[], size_t nnz_u_vec,
real_t *restrict u_bin_vec, int_t pbin,
real_t *restrict C, real_t *restrict Cb,
real_t glob_mean, real_t *restrict biasB,
real_t *restrict col_means,
real_t *restrict Xa, int_t ixB[], size_t nnz,
real_t *restrict Xa_dense, int_t n,
real_t *restrict weight,
real_t *restrict B,
real_t *restrict Bi, bool add_implicit_features,
int_t k, int_t k_user, int_t k_item, int_t k_main,
real_t lam, real_t w_main, real_t w_user, real_t w_implicit,real_t lam_bias,
int_t n_max, bool include_all_X,
real_t *restrict TransBtBinvBt,
real_t *restrict BtB,
real_t *restrict BeTBeChol,
real_t *restrict BiTBi,
real_t *restrict CtCw,
bool NA_as_zero_U, bool NA_as_zero_X,
real_t *restrict B_plus_bias
);
int_t collective_factors_warm_implicit
(
real_t *restrict a_vec,
real_t *restrict u_vec, int_t p,
real_t *restrict u_vec_sp, int_t u_vec_ixB[], size_t nnz_u_vec,
bool NA_as_zero_U,
real_t *restrict col_means,
real_t *restrict B, int_t n, real_t *restrict C,
real_t *restrict Xa, int_t ixB[], size_t nnz,
int_t k, int_t k_user, int_t k_item, int_t k_main,
real_t lam, real_t alpha, real_t w_main, real_t w_user,
real_t w_main_multiplier,
real_t *restrict BeTBe,
real_t *restrict BtB,
real_t *restrict BeTBeChol
);
real_t fun_grad_A_collective
(
real_t *restrict A, real_t *restrict g_A,
real_t *restrict B, real_t *restrict C,
int_t m, int_t m_u, int_t n, int_t p,
int_t k, int_t k_main, int_t k_user, int_t k_item, int_t padding,
real_t *restrict Xfull, bool full_dense,
size_t Xcsr_p[], int_t Xcsr_i[], real_t *restrict Xcsr,
real_t *restrict weight,
size_t U_csr_p[], int_t U_csr_i[], real_t *restrict U_csr,
real_t *restrict U, bool full_dense_u,
real_t lam, real_t w_main, real_t w_user, real_t lam_last,
bool do_B,
int_t nthreads,
real_t *restrict buffer_real_t
);
typedef struct data_fun_grad_Adense_col {
real_t *B; real_t *C;
int_t m; int_t m_u; int_t n; int_t p;
int_t k; int_t k_main; int_t k_user; int_t k_item; int_t padding;
real_t *Xfull; bool full_dense;
size_t *Xcsr_p; int_t *Xcsr_i; real_t *Xcsr;
real_t *weight;
size_t *U_csr_p; int_t *U_csr_i; real_t *U_csr;
real_t *U; bool full_dense_u;
real_t lam; real_t w_main; real_t w_user; real_t lam_last;
bool do_B;
int_t nthreads;
real_t *buffer_real_t;