From a6e534af58754c84717bf4f571c04cf9ea7cae94 Mon Sep 17 00:00:00 2001 From: Julian Knoll Date: Fri, 2 Dec 2016 09:46:20 +0000 Subject: [PATCH] version 0.1 --- DESCRIPTION | 16 +++++ MD5 | 19 ++++++ NAMESPACE | 10 +++ R/FM.train.R | 15 +++++ R/HoFM.train.R | 14 ++++ R/RcppExports.R | 11 ++++ R/SVM.train.R | 13 ++++ R/learn.FM.model.R | 29 +++++++++ R/predict.FMmodel.R | 13 ++++ R/print.FMmodel.R | 6 ++ R/summary.FMmodel.R | 13 ++++ build/partial.rdb | Bin 0 -> 5254 bytes man/010-FactoRizationMachines.Rd | 106 ++++++++++++++++++++++++++++++ man/020-SVM.train.Rd | 97 ++++++++++++++++++++++++++++ man/030-FM.train.Rd | 105 ++++++++++++++++++++++++++++++ man/040-HoFM.train.Rd | 107 +++++++++++++++++++++++++++++++ man/050-predict.FMmodel.Rd | 63 ++++++++++++++++++ man/050-summary.FMmodel.Rd | 55 ++++++++++++++++ src/FactoRizationMachines.cpp | 6 ++ src/RcppExports.cpp | 29 +++++++++ 20 files changed, 727 insertions(+) create mode 100644 DESCRIPTION create mode 100644 MD5 create mode 100644 NAMESPACE create mode 100644 R/FM.train.R create mode 100644 R/HoFM.train.R create mode 100644 R/RcppExports.R create mode 100644 R/SVM.train.R create mode 100644 R/learn.FM.model.R create mode 100644 R/predict.FMmodel.R create mode 100644 R/print.FMmodel.R create mode 100644 R/summary.FMmodel.R create mode 100644 build/partial.rdb create mode 100644 man/010-FactoRizationMachines.Rd create mode 100644 man/020-SVM.train.Rd create mode 100644 man/030-FM.train.Rd create mode 100644 man/040-HoFM.train.Rd create mode 100644 man/050-predict.FMmodel.Rd create mode 100644 man/050-summary.FMmodel.Rd create mode 100644 src/FactoRizationMachines.cpp create mode 100644 src/RcppExports.cpp diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..eb14838 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,16 @@ +Package: FactoRizationMachines +Type: Package +Title: Machine Learning with Higher-Order Factorization Machines +Version: 0.1 +Date: 2016-12-01 +Author: Julian Knoll +Maintainer: Julian Knoll +Description: Implementation of three machine learning approaches: Support Vector Machines (SVM) with a linear kernel, second-order Factorization Machines (FM), and higher-order Factorization Machines (HoFM). +License: CC BY-NC-ND 4.0 +Imports: Rcpp (>= 0.12.1), methods, Matrix +LinkingTo: Rcpp +Suggests: MASS +NeedsCompilation: yes +Packaged: 2016-12-01 21:23:31 UTC; Administrator +Repository: CRAN +Date/Publication: 2016-12-02 10:46:20 diff --git a/MD5 b/MD5 new file mode 100644 index 0000000..09432fa --- /dev/null +++ b/MD5 @@ -0,0 +1,19 @@ +c8cdf596c8ed5c0ff537702e46a1e6b3 *DESCRIPTION +e3e91ea4b50daf14bf225933068871ec *NAMESPACE +c7d441f68c9979353067e9be799eeee2 *R/FM.train.R +eae319297f64ca4db1da8f44468f9967 *R/HoFM.train.R +04f486702ca68e0ca3818b54888b5933 *R/RcppExports.R +2258bbaea49855bcd8468eb9ee33c03d *R/SVM.train.R +ffd4ff94bd20e44734d8104978ec67b2 *R/learn.FM.model.R +9362d97edc11fac048f09c76dcdd681c *R/predict.FMmodel.R +7ae86c7274231a96199b745b8656d20c *R/print.FMmodel.R +55fe3612feb72bb55f90d288517272b1 *R/summary.FMmodel.R +a9cd8206f8f4ecca8ce4cc703bab4187 *build/partial.rdb +35ad0f3f15e32fe37ab3625d0a1f0cf8 *man/010-FactoRizationMachines.Rd +30546a06c11d9128dd63b0245c3c99d7 *man/020-SVM.train.Rd +bfe5bc4e4d533199975edef2efb6bbc4 *man/030-FM.train.Rd +070336b9d1914eefd2f79e54b797669d *man/040-HoFM.train.Rd +be251cba1dccb4a05eb19d800cba62d2 *man/050-predict.FMmodel.Rd +bec595a6073baa2fa64d933ee8b09200 *man/050-summary.FMmodel.Rd +7e330e6cb461806302e0be0c788412be *src/FactoRizationMachines.cpp +b4a13b87b84867b1266789fc62901f6b *src/RcppExports.cpp diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..510119e --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,10 @@ +useDynLib(FactoRizationMachines) +importFrom(Rcpp, evalCpp) +importFrom(methods, as) +import(Matrix) +S3method(print, FMmodel) +S3method(summary, FMmodel) +S3method(predict, FMmodel) +export(SVM.train) +export(FM.train) +export(HoFM.train) diff --git a/R/FM.train.R b/R/FM.train.R new file mode 100644 index 0000000..18b7e83 --- /dev/null +++ b/R/FM.train.R @@ -0,0 +1,15 @@ +FM.train <- +function(data, target, factors=c(1,10), intercept=T, iter=100, regular=0, stdev=0.1){ + + object=list() + if(length(factors)>2) object$vK=factors[1:2] else object$vK=factors + if(length(regular)==1) regular=rep(regular,length(factors)) + object$vLambda=regular + length(object$vLambda)=length(factors) + + + if(length(factors)>2) warning("FM.train only supports second-order factors -> parameter factors partly ignored\nsee command HoFM.train for higher-order support") + + return(learn.FM.model(data=data, target=target, intercept=intercept, iter=iter, stdev=stdev, object=object)) + +} diff --git a/R/HoFM.train.R b/R/HoFM.train.R new file mode 100644 index 0000000..29f442f --- /dev/null +++ b/R/HoFM.train.R @@ -0,0 +1,14 @@ +HoFM.train <- +function(data, target, factors=c(1,10,5), intercept=T, iter=100, regular=0, stdev=0.1){ + + object=list() + object$vK=factors + if(length(regular)==1) regular=rep(regular,length(factors)) + object$vLambda=regular + length(object$vLambda)=length(factors) + + if(length(factors)>3) warning("HoFM.train only supports up to third-order factors -> parameter factors partly ignored") + + return(learn.FM.model(data=data, target=target, intercept=intercept, iter=iter, stdev=stdev, object=object)) + +} diff --git a/R/RcppExports.R b/R/RcppExports.R new file mode 100644 index 0000000..87b72f4 --- /dev/null +++ b/R/RcppExports.R @@ -0,0 +1,11 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +trainFM <- function(j14924k) { + .Call('FactoRizationMachines_trainFM', PACKAGE = 'FactoRizationMachines', j14924k) +} + +predictFM <- function(j14924k) { + .Call('FactoRizationMachines_predictFM', PACKAGE = 'FactoRizationMachines', j14924k) +} + diff --git a/R/SVM.train.R b/R/SVM.train.R new file mode 100644 index 0000000..6bde04c --- /dev/null +++ b/R/SVM.train.R @@ -0,0 +1,13 @@ +SVM.train <- +function(data, target, factors=1, intercept=T, iter=100, regular=0, stdev=0.1){ + + object=list() + object$vK=factors[1] + if(object$vK==0) intercept=T + object$vLambda=regular[1] + + if(factors[1]!=1&factors[1]!=0) warning("SVM.train does not allow factors -> parameter factors ignored") + + return(learn.FM.model(data=data, target=target, intercept=intercept, iter=iter, stdev=stdev, object=object)) + +} diff --git a/R/learn.FM.model.R b/R/learn.FM.model.R new file mode 100644 index 0000000..2e573a8 --- /dev/null +++ b/R/learn.FM.model.R @@ -0,0 +1,29 @@ +learn.FM.model <- +function(data, target, intercept, iter, stdev, silent, object){ + + if(object$vK[1]>1 | object$vK[1]<0) warning("fist element of factors must either be 0 or 1") + if(object$vK[1]>1) object$vK[1]=1 + if(object$vK[1]<1) object$vK[1]=0 + + object=c(object,bIntercept=intercept,iIter=iter,dStdev=stdev) + + if(is.data.frame(data)) data=as.matrix(data) + data=as(data,"dgTMatrix") + object$mX=cbind(data@i,data@j,data@x) + object$vY=target + + if(length(target)!=nrow(data)) stop("number of training cases does not match between feature matrix and target vector") + + if(!is.numeric(object$mX)) stop("feature matrix contains non-numeric elements") + if(!is.numeric(object$vY)) stop("target vector contains non-numeric elements") + + if(any(is.na(object$vY)) | any(is.nan(object$vY)) | any(is.infinite(object$vY)) ) stop("target vector contains na, nan, or inf element") + if(any(is.na(object$mX)) | any(is.nan(object$mX)) | any(is.infinite(object$mX)) ) warning("feature matrix contains na, nan, or inf element") + + object=trainFM(object) + + if(any(is.na(object$weights)) | any(is.nan(object$weights)) | any(is.infinite(object$weights)) ) warning("model parameter contain na, nan, or inf element") + + return(object) + +} diff --git a/R/predict.FMmodel.R b/R/predict.FMmodel.R new file mode 100644 index 0000000..3bc43ea --- /dev/null +++ b/R/predict.FMmodel.R @@ -0,0 +1,13 @@ +predict.FMmodel <- +function(object, newdata, truncate=T, ...){ + + if(is.data.frame(newdata)) newdata=as.matrix(newdata) + newdata=as(newdata,"dgTMatrix") + object$mX=cbind(newdata@i,newdata@j,newdata@x) + object$truncate=truncate + + if(object$variables!=ncol(newdata)) stop(paste0("number of features (p=",ncol(newdata),") does not match with model (p=",object$variables,")")) + + return(predictFM(object)) + +} diff --git a/R/print.FMmodel.R b/R/print.FMmodel.R new file mode 100644 index 0000000..98dc67b --- /dev/null +++ b/R/print.FMmodel.R @@ -0,0 +1,6 @@ +print.FMmodel <- +function(x, ...){ + + summary(x) + +} diff --git a/R/summary.FMmodel.R b/R/summary.FMmodel.R new file mode 100644 index 0000000..ae90bf0 --- /dev/null +++ b/R/summary.FMmodel.R @@ -0,0 +1,13 @@ +summary.FMmodel <- +function(object, ...){ + + cat(c(paste("\nfactorization machine model"), + paste("\nnumber of training examples: ",object$traincases), + paste("\nnumber of variables: ",object$variables), + paste("\nminimum of target vector: ",object$min.target), + paste("\nmaximum of target vector: ",object$max.target), + paste("\nnumber of factors: ",paste(object$factors,collapse=" "), + "" + ))) + +} diff --git a/build/partial.rdb b/build/partial.rdb new file mode 100644 index 0000000000000000000000000000000000000000..c6344352061a643220bbae3b84337aadccfc1218 GIT binary patch literal 5254 zcmV;16nX0(iwFP!000002Gw2rdeg?XH@T1-;gYtrxj?2#3pVvN2{%IuxkA{aNu9u^ z8?%dSjS~@BGLjs~=A+;Kqc3t^;^>B^O7sOu;VUJ1tPy1f36_%YXb*-$?3hQ(fK3N^c1`sDvo^f>$rN@P zv;O_d(WG}xDLeY_f3drsy60#&NR_fGN{W+g%+YShkg8NSgldiL`yza?^uL$CPye%} zVSiZ`bRGBBV-w!q+SzzjHtbQG9}On`+ert8r3h8(p!0bppLwBEP0uWeRY{R_LlX>D z%PiGutAZh9o~va+&g{}^EmJV6wd}ojTfZD;3Z=Rvi{hQ^?Mmftj@}hBuGY&`1tpWs zWmB%-rVFBjWX}iDL9f_JdX}1ORSo{(=$mr}uO-*!h-8MzixnU0;eY^caLgranp;I z0K9_omF`;wx*y{Zw{FwuLk-x7TR`XN4@;*%W`(DXxP{b84V$9gA6!)(D5{G9&eEykPJ)DuF=I;vVnrV$=)O4 z5i3um^15sm)}vh5gx1mk#-5Emp<*yFjX@lfjo~un<*;<%yNev+7E&wE3E8ODWLl+) zVMUl)A;!K&DXH**mnsQ56-gE2Rkq_*sMR!;{gLW<;&(*Ds8qn`C_fK9qF&;g$Ov{0 z$W;j;iCiNjMRw}7nyMM(B?VBYEF$U>y5yxuBPs_G zB`6}Aa^<_!muh4vR>B=7Ewe}DymjBBS|MhNv#`R;U#F4 zu#);v5?Q%jI%2gr`z+L`rn8-xpIC=fqe_|Tz7;-(V*@OqT#C%$E)+$omo=#dBl4KR zK#v%^AtqG(x4tJ*(0vbwXEeS15;b6BxasBhVd;DovnFCT;ucaX8A5jVC7s}TAm+kl zxz4=AbmWRE%jyT1MOLY?uZnu!FX^kOH^2RI4=8S@(uP5$erW; zvrH6$))mBYl*c=Nq=oKA5GPQc==_luy6Z=rM0ql_TO{*j(+n75TEILF<8h)P>DQ5g z=@jA44&b$TZPNEZR8s2&f4RqKRfX zq*vT?3#gONF!&BGU_6OfK)KNU%3~6p*v4q;upptuEnnzhXa%1aPGLYzH3wR!gKG){ zY6^`s5({|P@E2TD7*JEqawmf8}7H*maC6K!P&qe2iVkxB~nZ_6hDJ_1LYf$d&g)^la!#@7O2f5K0^6XcYGs-_!#BK-SLf^h)XC3 zx?a{~fNY-4w#@)Fon#x#(I)TuK1&O1ml5Bi{630w=R5I!Yq_6^;nE%d?R`RaRU!%o zHBSRJ){(+#4(uM1j$MH4ef8Sp;J_d~^Flh@4%vOeSGl7*G6 z)in=Jj}ccHI&yvTXHgCMgUmR5cC70(LH7-9m@D1wpvGgi}KsZY}GrX z1+vs0qJ*;49ZLy_?@)dh+EP~%MAC_Fog}1U+IK1dItPfKQ2rE})-oY0s-n~Pb*dQh zQ4&0;E(?vxpvie}kLrvwT>q-7EFG9D)F>$ndVot{vzB+5xQz00=y9wN@)P~YGL=DV z)-^msC6c|c&ZN7kDlFlx2%4<2Ij(k~9+H)P0mfvdJ--3-9g+uJK{?p!2)BSaKT|DN z%f4d<$UH;bLV2t6F$3MLB5tF+9oneZ6JSLh%qUfk9Iolx@qpXUhziO|XpT1<*O;rS zMw7>>*u3vcs<0_xkZk-tIuzhdlmjhP_Q=NmJqwtm5wj>~JMUT0T^uooa~tmHG6;cjl+`0epIF z?5C7EE9Xm8+e?e|N5A|Ns6xOz%K6CLdY~xO*}+%yP#ko2>ukOH7J9OcXXMpCpayIV z=hb~-=_F&;qGB6y3#pagiMhD(S=#dCBq`9cT4e>C$lsZC!i`DrE1k_~%lqckXq6sP zS*@8>AWQHYtOk^2wiEVo=ZIB{Rh_(g^=e~dBT446xjVkoKDS0W_B+bIht@=ZbF^vf z95A|bv<5L(aqpy=RuxfZ1&+tEInz3_R9fFmlBJbRva1P-E|jeWcdFv1cQDT`E$=g1 zt9xwfF>jh+&bJ9NI9fmhWh1f_HBvlY@8=N-&|x-oV1^1v=)Q-;GfGq3+TsY(RLi2L z#xgdaw4fFnMjS(VEW+CCfF6BNHPNHzbBOk`%P+cb7wG;=4snafE?>JR1;19BN6Xl! z$A@E4UcYE(rFparAa}~O+g5_^XE?+yA}d{Gb}=;Rqr+G}IWC}K0H;x&?uwBX&@chO{_7yiHjggi+vVHQqjYa{SL^-;(EA0$)8;t^ZhR;QT?yqr(TSNvL zJuXUF3F?#D9kd7FRg|xG#Wr`)9)R3h>2BKwy8nhl+`3KUE^5HWaAnG9SUUI66>N-V z>A(MHZ`?;!0QR69tcdK$(siJ93Gr)`1NpFPub!9Ap}Qf(Z%~eMfv?AjPw1`}aUA9G z&`#0o|K3X=Fo9^V!wjF+{d*^O_Njy38FU$V9vu?!D$3ENc2DQbqeJE~CWa!82@mJY zqeJE~CVGR$L|Es{V{`ycq8$DP5C6{j03Gr{N1ZeHLh1+Tk`K^emx3D1pL;$)mwbSx z8H#n~E`0^RWJDAVk~SC9!dd=7!o%2 z1P+Z*G=4-4*cfg~(tBDuo|pR{#jFLyHsThLPvX?-@pzKxPhdX)Plaz$++U2~BI-;&T*tee7YvFG+5V6n{fONRpRrlGe#h z->g=V$!ep$iQRUrHSF5PXdG)A9cxoPl<0G+=p>iT9>B_9xV_TwOL85Ftw(CXH&q!} zEed+NYT7WaQ9(=_LJ6L~xo;S?d?o`=bk@^*np&^PY#eD-+snW(*eLW&6@~|Y&Q#?T z>?>1G!R6o66?ce=~{i z;1@sS66Q$E;rSlQGo=5lIV!uI5dMN)+Zatk_+i$%Fb@yIab^WWB^4^b+Bdj~qUx70 z_W((LwMt0BTM);KdQH%DYGwGiw5ZWqJeOBA^#l4tA~~N(zFRDt&YLvto-}5BFH#dY7#u!#c!+(LOP^q|HJLEEE7Bl6~bCxoAn3*Z>aW8HUBP9+gKX?&LK)JrV0 zTL&uUm&Z3Q!_5vRE8x?TV#Lj80j(vH$KtK@J>QHA$P&mh>Y~Et&Kj}a4**A+il$bN4>bhOY_j>UU&4Pwl7P|GMYr|$ z8moq>pv2?IEpDnj*~nCh6c=-ezt8!f482CWfZXDaZUYY8DouwW&L^%HVL z61gvC(>dQh{425rDC~FJA~qS2){zQNpfm>}yOpxM$ud$8*W~WG)dZflwq4 z_z2}kf93>IL6VjKyacj`bob6Sf$U?~HU>^0uD!x5(iB6hD=s}h#@?i`J%_yd2mSYO zh+9M&7%I|_=9vk%8*1zrd)FNmcwLWuhoiPJlm=Q_^%UF|>DvDeaM-aRLx&a@pv5Te z<-GN`O7&{Y6@>?YAn(ZEO1BaIqjB zQcX9XDc|_D)2l{`=GmOG(cH{w(!QctQCdkfwc!?x@Wb~c*i$T=;Alfd{YX)3x}-Oc zprzqRQK27Tc|R+DxTasXDxalrILx&%;b@T^C#|;*7_^<&4HT^>kTk2Uh1>b!p&-|J zb6;|6H4C@PR=8HPE>U;r9>AyahkoXH?|B33Vco(X;Gwo zH@UMx z9$DL3SKIjEsH;Ns8ZAqLN0MsW!C=u4N-%wIj$nsdur@L8^mN{O|1Z~K$W_|~ajVvp zpoUvDWj}}KL=Rw&Qga+WoVjWKl(|W&P+8}#bG2oFb=`ayIA15b6bDkJeMadmHq{Pp zZ2F3|gHs%!JU7_qH4j^VVZX3=m9HRr?5@T=CUMPWTXRlk&x@bFJso<2cSbZc%EDOpZ`AkuYcNq zZ%hsC7heyU3#ZD4?EvOS<5=@U&JMCG`OJ1<>q%khw_K)>DWvA-f6JxkGWx!t(M&zeo%<=W{k+iGv>~%3R+#}V^!wWq-C1!GJoAJ1advW+2mG> zqcu4feD~{iAwHXPb*HQ7LVzxpUP`Av5pjw{yHZ&C$qeo&hJD8@PCmf`PCcK=z`oOk zR83Rip}gh-P7JHX+1%$(kW@Y~!A%9+gEROQS;17`Jb3Di1?5KkP4E#4$*xudP`Y9S+1USZW9K7|x zE#GrH#-3guO?tR!>tP8`BSfKsNB&h{oyG7T{S1y6kXsxPK|btb69Fljao<1vxXmLV z>)3n585|{`%bV&0+I!5l_wln7qZ#{(f16hu6uh;-KNOa`wi +} + +\references{ + [1] J. Knoll, Recommending with Higer-Order Factorization Machines, Research and Development in Intelligent Systems XXXIII, 2016. + + [2] S. Rendle, Factorization Machines with libFM, ACM Transactions onIntelligent Systems and Technology, 3, 2012. +} + +\keyword{ package } +\keyword{ Factorization Machine } +\keyword{ Matrix Factorization } +\keyword{ Machine Learning } +\keyword{ Recommender } + +\seealso{ + \code{\link{SVM.train}}, + \code{\link{FM.train}}, + \code{\link{HoFM.train}}, + \code{\link{predict.FMmodel}} +} + +\examples{ +\dontrun{ + +# Load libraries +library(FactoRizationMachines) +library(Matrix) + +# Load MovieLens 100k data set +ml100k=as.matrix(read.table("http://files.grouplens.org/datasets/movielens/ml-100k/u.data")) +user=ml100k[,1] +items=ml100k[,2]+max(user) +wdays=(as.POSIXlt(ml100k[,4],origin="1970-01-01")$wday+1)+max(items) + +# Transform MovieLens 100k to feature form +data=sparseMatrix(i=rep(1:nrow(ml100k),3),j=c(user,items,wdays),giveCsparse=F) +target=ml100k[,3] + +# Subset data to training and test data +set.seed(123) +subset=sample.int(nrow(data),nrow(data)*.8) +data.train=data[subset,] +data.test=data[-subset,] +target.train=target[subset] +target.test=target[-subset] + +# Predict ratings with Support Vector Machine with linear kernel +model=SVM.train(data.train,target.train) +# RMSE resulting from test data prediction +sqrt(mean((predict(model,data.test)-target.test)^2)) + +# Predict ratings with second-order Factorization Machine +# with second-order 10 factors (default) and regularization +model=FM.train(data.train,target.train,regular=0.1) +# RMSE resulting from test data prediction +sqrt(mean((predict(model,data.test)-target.test)^2)) + +# Predict ratings with higher-order Factorization Machine +# with 3 second-order and 1 third-order factor and regularization +model=HoFM.train(data.train,target.train,c(1,3,1),regular=0.1) +# RMSE resulting from test data prediction +sqrt(mean((predict(model,data.test)-target.test)^2)) + +} +} diff --git a/man/020-SVM.train.Rd b/man/020-SVM.train.Rd new file mode 100644 index 0000000..4929824 --- /dev/null +++ b/man/020-SVM.train.Rd @@ -0,0 +1,97 @@ +\name{SVM.train} +\alias{SVM.train} + +\title{ + Method training a Support Vector Machine +} + +\description{ + \code{SVM.train} is a method training a Support Vector Machine with a linear kernel. + + \code{factors} specifies whether linear weights are used (\code{1}) or not (\code{0}). + If linear weights are not used \code{intercept} is set to \code{TRUE}. + + To date the learning method alternating least squares (\code{"als"}) and the task regression (\code{"r"})is supported. + Consequently, regularization is suggested in most of the cases. + Next steps are to implement the Monte Carlo Markov Chain method (\code{"mcmc"}) to simplify regularization. + Furthermore, the task classifiction (\code{"c"}) will be supported in the future. +} + +\usage{ + SVM.train(data, target, factors = 1, intercept = T, + iter = 100, regular = 0, stdev = 0.1) +} + + +\arguments{ + \item{data}{ + an object of class \code{dgTMatrix}, \code{matrix} or \code{data.frame} (or an object coercible to \code{dgTMatrix}): + a matrix containing training data, each row representing a training example and each column representing a feature. + } + \item{target}{ + \code{numeric}: vector specifying the target value of each training example (length must match rows of object data). + } + \item{factors}{ + either \code{0} or \code{1}: specifying whether linear weights are used (\code{1}) or not (\code{0}). + If linear weights are not used \code{intercept} is set to \code{TRUE}. + } + \item{intercept}{ + \code{logical}: specifying whether a global intercept is used (\code{TRUE}) or not (\code{FALSE}). + } + \item{iter}{ + \code{integer}: the number of iterations the learning method is applied. + } + \item{regular}{ + \code{numeric}: regularization value for the linear weights. + } + \item{stdev}{ + \code{numeric}: the standard deviation used to initialize the model parameters. + } +} + + +\seealso{ + \code{\link{FactoRizationMachines}} +} + +\examples{ + +### Example to illustrate the usage of the method +### Data set very small and not sparse, results not representative +### Please study major example in general help 'FactoRizationMachines' + +# Load data set +library(FactoRizationMachines) +library(MASS) +data("Boston") + +# Subset data to training and test data +set.seed(123) +subset=sample.int(nrow(Boston),nrow(trees)*.8) +data.train=Boston[subset,-ncol(Boston)] +target.train=Boston[subset,ncol(Boston)] +data.test=Boston[-subset,-ncol(Boston)] +target.test=Boston[-subset,ncol(Boston)] + + +# Predict with linear weights and intercept +model=SVM.train(data.train,target.train) + +# RMSE resulting from test data prediction +sqrt(mean((predict(model,data.test)-target.test)^2)) + + +# Predict with linear weights but without intercept +model=SVM.train(data.train,target.train,intercept=FALSE) + +# RMSE resulting from test data prediction +sqrt(mean((predict(model,data.test)-target.test)^2)) + + +# Predict with linear weights and regularization +model=SVM.train(data.train,target.train,regular=0.1) + +# RMSE resulting from test data prediction +sqrt(mean((predict(model,data.test)-target.test)^2)) + +} diff --git a/man/030-FM.train.Rd b/man/030-FM.train.Rd new file mode 100644 index 0000000..de58094 --- /dev/null +++ b/man/030-FM.train.Rd @@ -0,0 +1,105 @@ +\name{FM.train} +\alias{FM.train} + +\title{ + Method training a second-order Factorization Machine +} + +\description{ + \code{FM.train} is a method training a second-order Factorization Machine. + + \code{factors} specifies the model parameters of the Factorization Machine: + the first element specifies whether linear weights are used (\code{1}) or not (\code{0}), + the second element specifies the number of parameters factorizing the second-order. + + To date the learning method alternating least squares (\code{"als"}) and the task regression (\code{"r"})is supported. + Consequently, regularization is suggested in most of the cases. + Next steps are to implement the Monte Carlo Markov Chain method (\code{"mcmc"}) to simplify regularization. + Furthermore, the task classifiction (\code{"c"}) will be supported in the future. +} + +\usage{ + FM.train(data, target, factors = c(1, 10), intercept = T, + iter = 100, regular = 0, stdev = 0.1) +} + +\arguments{ + \item{data}{ + an object of class \code{dgTMatrix}, \code{matrix} or \code{data.frame} (or an object coercible to \code{dgTMatrix}): + a matrix containing training data, each row representing a training example and each column representing a feature. + } + \item{target}{ + \code{numeric}: vector specifying the target value of each training example (length must match rows of object data). + } + \item{factors}{ + \code{numeric}: vector specifying the number of factors for each considered order: + the first element specifies whether linear weights are used (\code{1}) or not (\code{0}), + the second element specifies the number of parameters factorizing the second-order. + } + \item{intercept}{ + \code{logical}: specifying whether a global intercept is used (\code{TRUE}) or not (\code{FALSE}). + } + \item{iter}{ + \code{integer}: the number of iterations the learning method is applied. + } + \item{regular}{ + \code{numeric}: regularization value for each order corresponding to factors. If one value, each order is regularized with this value, + otherwise the first element of the vector specifies the regularization value for the linear weights and the second the regularization value for the second-order factors. + } + \item{stdev}{ + \code{numeric}: the standard deviation used to initialize the model parameters. + } +} + +\references{ + [1] J. Knoll, Recommending with Higer-Order Factorization Machines, Research and Development in Intelligent Systems XXXIII, 2016. + + [2] S. Rendle, Factorization Machines with libFM, ACM Transactions onIntelligent Systems and Technology (TIST), 3, 2012. +} + +\seealso{ + \code{\link{FactoRizationMachines}} +} + +\examples{ + +### Example to illustrate the usage of the method +### Data set very small and not sparse, results not representative +### Please study major example in general help 'FactoRizationMachines' + +# Load data set +library(FactoRizationMachines) +library(MASS) +data("Boston") + +# Subset data to training and test data +set.seed(123) +subset=sample.int(nrow(Boston),nrow(trees)*.8) +data.train=Boston[subset,-ncol(Boston)] +target.train=Boston[subset,ncol(Boston)] +data.test=Boston[-subset,-ncol(Boston)] +target.test=Boston[-subset,ncol(Boston)] + + +# Predict with 3 second-order factors +model=FM.train(data.train,target.train,c(1,3)) + +# RMSE resulting from test data prediction +sqrt(mean((predict(model,data.test)-target.test)^2)) + + +# Predict with 10 second-order factor +model=FM.train(data.train,target.train) + +# RMSE resulting from test data prediction +sqrt(mean((predict(model,data.test)-target.test)^2)) + + +# Predict with 10 second-order factor +# and regularization +model=FM.train(data.train,target.train,regular=0.1) + +# RMSE resulting from test data prediction +sqrt(mean((predict(model,data.test)-target.test)^2)) + +} diff --git a/man/040-HoFM.train.Rd b/man/040-HoFM.train.Rd new file mode 100644 index 0000000..d643c9a --- /dev/null +++ b/man/040-HoFM.train.Rd @@ -0,0 +1,107 @@ +\name{HoFM.train} +\alias{HoFM.train} + +\title{ + Method training a higher-order Factorization Machine +} + +\description{ + \code{HoFM.train} is a method training a higher-order Factorization Machine. + + \code{factors} specifies the model parameters of the Factorization Machine: + the first element specifies whether linear weights are used (\code{1}) or not (\code{0}), + the second element specifies the number of parameters factorizing the second-order, + the third element specifies the number of parameters factorizing the third-order. + + To date the learning method alternating least squares (\code{"als"}) and the task regression (\code{"r"})is supported. + Consequently, regularization is suggested in most of the cases. + Next steps are to implement the Monte Carlo Markov Chain method (\code{"mcmc"}) to simplify regularization. + Furthermore, the task classifiction (\code{"c"}) will be supported in the future. +} + +\usage{ + HoFM.train(data, target, factors = c(1, 10, 5), intercept = T, + iter = 100, regular = 0, stdev = 0.1) +} + +\arguments{ + \item{data}{ + an object of class \code{dgTMatrix}, \code{matrix} or \code{data.frame} (or an object coercible to \code{dgTMatrix}): + a matrix containing training data, each row representing a training example and each column representing a feature. + } + \item{target}{ + \code{numeric}: vector specifying the target value of each training example (length must match rows of object data). + } + \item{factors}{ + \code{numeric}: vector specifying the number of factors for each considered order: + the first element specifies whether linear weights are used (\code{1}) or not (\code{0}), + the second element specifies the number of parameters factorizing the second-order, + the third element specifies the number of parameters factorizing the third-order. + } + \item{intercept}{ + \code{logical}: specifying whether a global intercept is used (\code{TRUE}) or not (\code{FALSE}). + } + \item{iter}{ + \code{integer}: the number of iterations the learning method is applied. + } + \item{regular}{ + \code{numeric}: regularization value for each order corresponding to factors. If one value, each order is regularized with this value, + otherwise each element of the vector specifies the regularization value of the corresponding order. + } + \item{stdev}{ + \code{numeric}: the standard deviation used to initialize the model parameters. + } +} + +\references{ + [1] J. Knoll, Recommending with Higer-Order Factorization Machines, Research and Development in Intelligent Systems XXXIII, 2016. + + [2] S. Rendle, Factorization Machines with libFM, ACM Transactions onIntelligent Systems and Technology (TIST), 3, 2012. +} + +\seealso{ + \code{\link{FactoRizationMachines}} +} + +\examples{ + +### Example to illustrate the usage of the method +### Data set very small and not sparse, results not representative +### Please study major example in general help 'FactoRizationMachines' + +# Load data set +library(FactoRizationMachines) +library(MASS) +data("Boston") + +# Subset data to training and test data +set.seed(123) +subset=sample.int(nrow(Boston),nrow(trees)*.8) +data.train=Boston[subset,-ncol(Boston)] +target.train=Boston[subset,ncol(Boston)] +data.test=Boston[-subset,-ncol(Boston)] +target.test=Boston[-subset,ncol(Boston)] + + +# Predict with 7 second-order and 2 third-order factors +model=HoFM.train(data.train,target.train,c(1,7,2)) + +# RMSE resulting from test data prediction +sqrt(mean((predict(model,data.test)-target.test)^2)) + + +# Predict with 10 second-order and 5 third-order factor +model=HoFM.train(data.train,target.train) + +# RMSE resulting from test data prediction +sqrt(mean((predict(model,data.test)-target.test)^2)) + + +# Predict with 10 second-order and 5 third-order factor +# and regulariztion +model=HoFM.train(data.train,target.train,regular=0.1) + +# RMSE resulting from test data prediction +sqrt(mean((predict(model,data.test)-target.test)^2)) + +} diff --git a/man/050-predict.FMmodel.Rd b/man/050-predict.FMmodel.Rd new file mode 100644 index 0000000..5afc493 --- /dev/null +++ b/man/050-predict.FMmodel.Rd @@ -0,0 +1,63 @@ +\name{predict.FMmodel} +\alias{predict.FMmodel} + +\title{ + Predict Method for FMmodel Objects +} + +\description{ + Function for predicting new data based on a FMmodel object +} + +\usage{ + \method{predict}{FMmodel}(object, newdata, truncate = T, ...) +} + +\arguments{ + \item{object}{ + a FMmodel object (output of \code{\link{SVM.train}}, \code{\link{FM.train}}, or \code{\link{HoFM.train}}) + } + \item{newdata}{ + new data for prediction based on the FMmodel object (number of features must match the features of the training data) + } + \item{truncate}{ + bool indicating whether the output should be trunceted (\code{T}) order not (\code{F}) + } + \item{\dots}{ + additional arguments + } +} + +\seealso{ + \code{\link{SVM.train}}, + \code{\link{FM.train}}, + \code{\link{HoFM.train}} +} + +\examples{ + +### Example to illustrate the usage of the method +### Data set very small and not sparse, results not representative +### Please study major example in general help 'FactoRizationMachines' + +# Load data set +library(FactoRizationMachines) +library(MASS) +data("Boston") + +# Subset data to training and test data +set.seed(123) +subset=sample.int(nrow(Boston),nrow(trees)*.8) +data.train=Boston[subset,-ncol(Boston)] +target.train=Boston[subset,ncol(Boston)] +data.test=Boston[-subset,-ncol(Boston)] +target.test=Boston[-subset,ncol(Boston)] + + +# Predict with 10 second-order and 5 third-order factor +model=HoFM.train(data.train,target.train) + +# RMSE resulting from test data prediction +sqrt(mean((predict(model,data.test)-target.test)^2)) + +} diff --git a/man/050-summary.FMmodel.Rd b/man/050-summary.FMmodel.Rd new file mode 100644 index 0000000..8122c91 --- /dev/null +++ b/man/050-summary.FMmodel.Rd @@ -0,0 +1,55 @@ +\name{summary.FMmodel} +\alias{summary.FMmodel} +\alias{print.FMmodel} + +\title{ + Summary and Print Method for FMmodel Objects +} + +\description{ + Function generating the summary of a FMmodel object. +} + +\details{ + The summary contains for instance: + + - the number of training examples the model was build with, + + - the number of variables (features) the model considers, + + - the minimum value of the target vector elements (to truncate the prediction), + + - the maximum value of the target vector elements (to truncate the prediction), + + - the number of factors for each considered order: + the first element specifies whether linear weights are used (\code{1}) or not (\code{0}), + the second element specifies the number of parameters factorizing the second-order, + the third element specifies the number of parameters factorizing the third-order. +} + +\usage{ +\method{summary}{FMmodel}(object, ...) + +\method{print}{FMmodel}(x, ...) +} + +\arguments{ + + \item{object}{ + a FMmodel object (output of \code{\link{SVM.train}}, \code{\link{FM.train}}, or \code{\link{HoFM.train}}) + } + + \item{x}{ + a FMmodel object (output of \code{\link{SVM.train}}, \code{\link{FM.train}}, or \code{\link{HoFM.train}}) + } + + \item{\dots}{ + additional arguments + } +} + +\seealso{ + \code{\link{SVM.train}}, + \code{\link{FM.train}}, + \code{\link{HoFM.train}} +} \ No newline at end of file diff --git a/src/FactoRizationMachines.cpp b/src/FactoRizationMachines.cpp new file mode 100644 index 0000000..72d2ac5 --- /dev/null +++ b/src/FactoRizationMachines.cpp @@ -0,0 +1,6 @@ +#include +using namespace Rcpp; +// [[Rcpp::export]] +List trainFM(List j14924k) { int j17031k=(j14924k["iIter"]); double j14311k=(j14924k["dStdev"]); std::vector j12681k = as< std::vector >(j14924k["vK"]); bool j11769k=(j14924k["bIntercept"]); std::vector j19342k = as< std::vector >(j14924k["vLambda"]); NumericMatrix j17949k = as< NumericMatrix >(j14924k["mX"]); std::vector j19422k = as< std::vector >(j14924k["vY"]); if(j12681k.size()<10) j12681k.resize(10,0); int *j14133k; double* j11938k; double* j14250k; double* j11476k; double* j17049k; std::vector* j17565k; double j19107k, j17166k, j11365k; int j14913k; int j16079k=j12681k.size(); std::vector j14850k(j17949k.nrow()); std::vector< std::vector > j11344k(j16079k); std::vector< std::vector > j11546k(j16079k); for(int j11407k=0;j11407k >j17696k(j12924k); std::vector< std::vector >j15146k(j12924k); for(int j17168k = 0; j17168k < j17949k.nrow(); j17168k++ ){ j17696k[j17949k(j17168k,1)].push_back(j17949k(j17168k,0)); j15146k[j17949k(j17168k,1)].push_back(j17949k(j17168k,2)); } double j15306k=*(std::min_element(j19422k.begin(),j19422k.end())); double j19648k=*(std::max_element(j19422k.begin(),j19422k.end())); j19107k=0; for(int j12046k=0;j12046k j11469k(j19107k*j12924k+1,0); std::vector j16615k(j11469k.size(),0); for(unsigned int j11407k=0;j11407k 0.27846) || ((j14250k2*j14250k2) > (-4.0*j18591k*j18591k*std::log(j18591k)))); j11469k[j11407k]=j16026k+(j17655k*(j14250k2/j18591k)); } std::vector< std::vector< std::vector< std::vector > > > j12192k; j12192k.resize(j16079k); for(int j11407k = 0; j11407k < j16079k; j11407k++ ) { j12192k[j11407k].resize(j12681k[j11407k]); for(int j17883k = 0; j17883k < j12681k[j11407k]; j17883k++ ) { j12192k[j11407k][j17883k].resize(j17156k); for(int j10054k = 0; j10054k < j17156k; j10054k++ ){ j12192k[j11407k][j17883k][j10054k].resize(j11407k+1); for(int j19554k = 0; j19554k < j11407k+1; j19554k++ ) j12192k[j11407k][j17883k][j10054k][j19554k]=0; } } } for(unsigned int j11407k = 0; j11407k < j12192k.size(); j11407k++ ) { for(unsigned int j17883k = 0; j17883k < j12192k[j11407k].size(); j17883k++ ) { for(unsigned int j10054k = 0; j10054k < j12192k[j11407k][j17883k].size(); j10054k++ ){ for(unsigned int j19554k = 0; j19554k < j12192k[j11407k][j17883k][j10054k].size(); j19554k++ ) j12192k[j11407k][j17883k][j10054k][j19554k]=0; } } } j14913k=0; for(int j12046k = 0; j12046k < j16079k; j12046k++ ) { for( int j19452k = 0; j19452k < j12681k[j12046k]; j19452k++ ) { for( int j16856k = 0; j16856k < j12924k; j16856k++ ) { j14913k++; j14250k=&j11469k[j14913k]; for( unsigned int j17168k = 0; j17168k < j17696k[j16856k].size(); j17168k++ ) { j14133k=&j17696k[j16856k][j17168k]; j11938k=&j15146k[j16856k][j17168k]; j17565k=&j12192k[j12046k][j19452k][*j14133k]; for( unsigned int j19543k = 1; j19543k <= (*j17565k).size(); j19543k++ ) { (*j17565k)[j19543k-1]+=pow(*j11938k,j19543k)*pow(*j14250k,j19543k); } } } } } std::vector< std::vector >* j19954k; std::vector j15571k(j17156k,j11469k[0]); for(int j12046k = 0; j12046k < j16079k; j12046k++ ) { for( int j19452k = 0; j19452k < j12681k[j12046k]; j19452k++ ) { j19954k=&j12192k[j12046k][j19452k]; if(j12046k==0) for(unsigned int j17168k = 0; j17168k < (*j19954k).size(); j17168k++ ) j15571k[j17168k]+=(*j19954k)[j17168k][0]; if(j12046k==1) for(unsigned int j17168k = 0; j17168k < (*j19954k).size(); j17168k++ ) j15571k[j17168k]+=(1.0/2.0)*((1*pow((*j19954k)[j17168k][0],2))+(-1*pow((*j19954k)[j17168k][1],1))); if(j12046k==2) for(unsigned int j17168k = 0; j17168k < (*j19954k).size(); j17168k++ ) j15571k[j17168k]+=(1.0/6.0)*((1*pow((*j19954k)[j17168k][0],3))+(-3*pow((*j19954k)[j17168k][1],1)*pow((*j19954k)[j17168k][0],1))+(2*pow((*j19954k)[j17168k][2],1))); } } std::vector j16390k(j17156k,0); for(int j17168k=0;j17168k0;j11407k--) if(j12681k[j11407k]==0) j12681k.resize(j11407k); else break; List j11556k=Rcpp::List::create(Rcpp::Named("weights") = j11469k, Rcpp::Named("factors") = j12681k, Rcpp::Named("variables") = j12924k, Rcpp::Named("traincases") = j17156k, Rcpp::Named("min.target") = j15306k, Rcpp::Named("max.target") = j19648k); j11556k.attr("class") = "FMmodel"; return j11556k; } +// [[Rcpp::export]] +NumericVector predictFM(List j14924k) { NumericMatrix j17949k = as< NumericMatrix >(j14924k["mX"]); std::vector j12681k = as< std::vector >(j14924k["factors"]); std::vector j11469k = as< std::vector >(j14924k["weights"]); bool j18933k=(j14924k["truncate"]); double j15306k=(j14924k["min.target"]); double j19648k=(j14924k["max.target"]); double j12924k=(j14924k["variables"]); std::vector* j17565k; int j16079k=j12681k.size(); int *j14133k; double* j11938k; double* j14250k; int j14913k; int j17156k=0; for(int j17168k = 0; j17168k < j17949k.nrow(); j17168k++ ){ if(j17156k >j17696k(j12924k); std::vector< std::vector >j15146k(j12924k); for(int j17168k = 0; j17168k < j17949k.nrow(); j17168k++ ){ j17696k[j17949k(j17168k,1)].push_back(j17949k(j17168k,0)); j15146k[j17949k(j17168k,1)].push_back(j17949k(j17168k,2)); } std::vector< std::vector< std::vector< std::vector > > > j12192k; j12192k.resize(j16079k); for(int j11407k = 0; j11407k < j16079k; j11407k++ ) { j12192k[j11407k].resize(j12681k[j11407k]); for(int j17883k = 0; j17883k < j12681k[j11407k]; j17883k++ ) { j12192k[j11407k][j17883k].resize(j17156k); for(int j10054k = 0; j10054k < j17156k; j10054k++ ){ j12192k[j11407k][j17883k][j10054k].resize(j11407k+1); for(int j19554k = 0; j19554k < j11407k+1; j19554k++ ) j12192k[j11407k][j17883k][j10054k][j19554k]=0; } } } for(unsigned int j11407k = 0; j11407k < j12192k.size(); j11407k++ ) { for(unsigned int j17883k = 0; j17883k < j12192k[j11407k].size(); j17883k++ ) { for(unsigned int j10054k = 0; j10054k < j12192k[j11407k][j17883k].size(); j10054k++ ){ for(unsigned int j19554k = 0; j19554k < j12192k[j11407k][j17883k][j10054k].size(); j19554k++ ) j12192k[j11407k][j17883k][j10054k][j19554k]=0; } } } j14913k=0; for(int j12046k = 0; j12046k < j16079k; j12046k++ ) { for( int j19452k = 0; j19452k < j12681k[j12046k]; j19452k++ ) { for( int j16856k = 0; j16856k < j12924k; j16856k++ ) { j14913k++; j14250k=&j11469k[j14913k]; for( unsigned int j17168k = 0; j17168k < j17696k[j16856k].size(); j17168k++ ) { j14133k=&j17696k[j16856k][j17168k]; j11938k=&j15146k[j16856k][j17168k]; j17565k=&j12192k[j12046k][j19452k][*j14133k]; for( unsigned int j19543k = 1; j19543k <= (*j17565k).size(); j19543k++ ) { (*j17565k)[j19543k-1]+=pow(*j11938k,j19543k)*pow(*j14250k,j19543k); } } } } } NumericVector j15571k(j17156k,j11469k[0]); std::vector< std::vector >* j19954k; for(int j12046k = 0; j12046k < j16079k; j12046k++ ) { for( int j19452k = 0; j19452k < j12681k[j12046k]; j19452k++ ) { j19954k=&j12192k[j12046k][j19452k]; if(j12046k==0) for(unsigned int j17168k = 0; j17168k < (*j19954k).size(); j17168k++ ) j15571k[j17168k]+=(*j19954k)[j17168k][0]; if(j12046k==1) for(unsigned int j17168k = 0; j17168k < (*j19954k).size(); j17168k++ ) j15571k[j17168k]+=(1.0/2.0)*((1*pow((*j19954k)[j17168k][0],2))+(-1*pow((*j19954k)[j17168k][1],1))); if(j12046k==2) for(unsigned int j17168k = 0; j17168k < (*j19954k).size(); j17168k++ ) j15571k[j17168k]+=(1.0/6.0)*((1*pow((*j19954k)[j17168k][0],3))+(-3*pow((*j19954k)[j17168k][1],1)*pow((*j19954k)[j17168k][0],1))+(2*pow((*j19954k)[j17168k][2],1))); } } if(j18933k){ for(int j17168k=0;j17168kj19648k) j15571k[j17168k]=j19648k; if(j15571k[j17168k] do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include + +using namespace Rcpp; + +// trainFM +List trainFM(List j14924k); +RcppExport SEXP FactoRizationMachines_trainFM(SEXP j14924kSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< List >::type j14924k(j14924kSEXP); + rcpp_result_gen = Rcpp::wrap(trainFM(j14924k)); + return rcpp_result_gen; +END_RCPP +} +// predictFM +NumericVector predictFM(List j14924k); +RcppExport SEXP FactoRizationMachines_predictFM(SEXP j14924kSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< List >::type j14924k(j14924kSEXP); + rcpp_result_gen = Rcpp::wrap(predictFM(j14924k)); + return rcpp_result_gen; +END_RCPP +}