Skip to content

Commit

Permalink
version 0.1
Browse files Browse the repository at this point in the history
  • Loading branch information
Julian Knoll authored and cran-robot committed Dec 2, 2016
0 parents commit a6e534a
Show file tree
Hide file tree
Showing 20 changed files with 727 additions and 0 deletions.
16 changes: 16 additions & 0 deletions DESCRIPTION
@@ -0,0 +1,16 @@
Package: FactoRizationMachines
Type: Package
Title: Machine Learning with Higher-Order Factorization Machines
Version: 0.1
Date: 2016-12-01
Author: Julian Knoll
Maintainer: Julian Knoll <julian.knoll@th-nuernberg.de>
Description: Implementation of three machine learning approaches: Support Vector Machines (SVM) with a linear kernel, second-order Factorization Machines (FM), and higher-order Factorization Machines (HoFM).
License: CC BY-NC-ND 4.0
Imports: Rcpp (>= 0.12.1), methods, Matrix
LinkingTo: Rcpp
Suggests: MASS
NeedsCompilation: yes
Packaged: 2016-12-01 21:23:31 UTC; Administrator
Repository: CRAN
Date/Publication: 2016-12-02 10:46:20
19 changes: 19 additions & 0 deletions MD5
@@ -0,0 +1,19 @@
c8cdf596c8ed5c0ff537702e46a1e6b3 *DESCRIPTION
e3e91ea4b50daf14bf225933068871ec *NAMESPACE
c7d441f68c9979353067e9be799eeee2 *R/FM.train.R
eae319297f64ca4db1da8f44468f9967 *R/HoFM.train.R
04f486702ca68e0ca3818b54888b5933 *R/RcppExports.R
2258bbaea49855bcd8468eb9ee33c03d *R/SVM.train.R
ffd4ff94bd20e44734d8104978ec67b2 *R/learn.FM.model.R
9362d97edc11fac048f09c76dcdd681c *R/predict.FMmodel.R
7ae86c7274231a96199b745b8656d20c *R/print.FMmodel.R
55fe3612feb72bb55f90d288517272b1 *R/summary.FMmodel.R
a9cd8206f8f4ecca8ce4cc703bab4187 *build/partial.rdb
35ad0f3f15e32fe37ab3625d0a1f0cf8 *man/010-FactoRizationMachines.Rd
30546a06c11d9128dd63b0245c3c99d7 *man/020-SVM.train.Rd
bfe5bc4e4d533199975edef2efb6bbc4 *man/030-FM.train.Rd
070336b9d1914eefd2f79e54b797669d *man/040-HoFM.train.Rd
be251cba1dccb4a05eb19d800cba62d2 *man/050-predict.FMmodel.Rd
bec595a6073baa2fa64d933ee8b09200 *man/050-summary.FMmodel.Rd
7e330e6cb461806302e0be0c788412be *src/FactoRizationMachines.cpp
b4a13b87b84867b1266789fc62901f6b *src/RcppExports.cpp
10 changes: 10 additions & 0 deletions NAMESPACE
@@ -0,0 +1,10 @@
useDynLib(FactoRizationMachines)
importFrom(Rcpp, evalCpp)
importFrom(methods, as)
import(Matrix)
S3method(print, FMmodel)
S3method(summary, FMmodel)
S3method(predict, FMmodel)
export(SVM.train)
export(FM.train)
export(HoFM.train)
15 changes: 15 additions & 0 deletions R/FM.train.R
@@ -0,0 +1,15 @@
FM.train <-
function(data, target, factors=c(1,10), intercept=T, iter=100, regular=0, stdev=0.1){

object=list()
if(length(factors)>2) object$vK=factors[1:2] else object$vK=factors
if(length(regular)==1) regular=rep(regular,length(factors))
object$vLambda=regular
length(object$vLambda)=length(factors)


if(length(factors)>2) warning("FM.train only supports second-order factors -> parameter factors partly ignored\nsee command HoFM.train for higher-order support")

return(learn.FM.model(data=data, target=target, intercept=intercept, iter=iter, stdev=stdev, object=object))

}
14 changes: 14 additions & 0 deletions R/HoFM.train.R
@@ -0,0 +1,14 @@
HoFM.train <-
function(data, target, factors=c(1,10,5), intercept=T, iter=100, regular=0, stdev=0.1){

object=list()
object$vK=factors
if(length(regular)==1) regular=rep(regular,length(factors))
object$vLambda=regular
length(object$vLambda)=length(factors)

if(length(factors)>3) warning("HoFM.train only supports up to third-order factors -> parameter factors partly ignored")

return(learn.FM.model(data=data, target=target, intercept=intercept, iter=iter, stdev=stdev, object=object))

}
11 changes: 11 additions & 0 deletions R/RcppExports.R
@@ -0,0 +1,11 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

trainFM <- function(j14924k) {
.Call('FactoRizationMachines_trainFM', PACKAGE = 'FactoRizationMachines', j14924k)
}

predictFM <- function(j14924k) {
.Call('FactoRizationMachines_predictFM', PACKAGE = 'FactoRizationMachines', j14924k)
}

13 changes: 13 additions & 0 deletions R/SVM.train.R
@@ -0,0 +1,13 @@
SVM.train <-
function(data, target, factors=1, intercept=T, iter=100, regular=0, stdev=0.1){

object=list()
object$vK=factors[1]
if(object$vK==0) intercept=T
object$vLambda=regular[1]

if(factors[1]!=1&factors[1]!=0) warning("SVM.train does not allow factors -> parameter factors ignored")

return(learn.FM.model(data=data, target=target, intercept=intercept, iter=iter, stdev=stdev, object=object))

}
29 changes: 29 additions & 0 deletions R/learn.FM.model.R
@@ -0,0 +1,29 @@
learn.FM.model <-
function(data, target, intercept, iter, stdev, silent, object){

if(object$vK[1]>1 | object$vK[1]<0) warning("fist element of factors must either be 0 or 1")
if(object$vK[1]>1) object$vK[1]=1
if(object$vK[1]<1) object$vK[1]=0

object=c(object,bIntercept=intercept,iIter=iter,dStdev=stdev)

if(is.data.frame(data)) data=as.matrix(data)
data=as(data,"dgTMatrix")
object$mX=cbind(data@i,data@j,data@x)
object$vY=target

if(length(target)!=nrow(data)) stop("number of training cases does not match between feature matrix and target vector")

if(!is.numeric(object$mX)) stop("feature matrix contains non-numeric elements")
if(!is.numeric(object$vY)) stop("target vector contains non-numeric elements")

if(any(is.na(object$vY)) | any(is.nan(object$vY)) | any(is.infinite(object$vY)) ) stop("target vector contains na, nan, or inf element")
if(any(is.na(object$mX)) | any(is.nan(object$mX)) | any(is.infinite(object$mX)) ) warning("feature matrix contains na, nan, or inf element")

object=trainFM(object)

if(any(is.na(object$weights)) | any(is.nan(object$weights)) | any(is.infinite(object$weights)) ) warning("model parameter contain na, nan, or inf element")

return(object)

}
13 changes: 13 additions & 0 deletions R/predict.FMmodel.R
@@ -0,0 +1,13 @@
predict.FMmodel <-
function(object, newdata, truncate=T, ...){

if(is.data.frame(newdata)) newdata=as.matrix(newdata)
newdata=as(newdata,"dgTMatrix")
object$mX=cbind(newdata@i,newdata@j,newdata@x)
object$truncate=truncate

if(object$variables!=ncol(newdata)) stop(paste0("number of features (p=",ncol(newdata),") does not match with model (p=",object$variables,")"))

return(predictFM(object))

}
6 changes: 6 additions & 0 deletions R/print.FMmodel.R
@@ -0,0 +1,6 @@
print.FMmodel <-
function(x, ...){

summary(x)

}
13 changes: 13 additions & 0 deletions R/summary.FMmodel.R
@@ -0,0 +1,13 @@
summary.FMmodel <-
function(object, ...){

cat(c(paste("\nfactorization machine model"),
paste("\nnumber of training examples: ",object$traincases),
paste("\nnumber of variables: ",object$variables),
paste("\nminimum of target vector: ",object$min.target),
paste("\nmaximum of target vector: ",object$max.target),
paste("\nnumber of factors: ",paste(object$factors,collapse=" "),
""
)))

}
Binary file added build/partial.rdb
Binary file not shown.
106 changes: 106 additions & 0 deletions man/010-FactoRizationMachines.Rd
@@ -0,0 +1,106 @@
\name{FactoRizationMachines}
\alias{FactoRizationMachines}
\docType{package}

\title{
\packageTitle{FactoRizationMachines}
}

\description{
Implementation of three factorization-based machine learning approaches:

- Support Vector Machines (\code{\link{SVM.train}}) with a linear kernel,

- second-order Factorization Machines [2] (\code{\link{FM.train}}),

- and higher-order Factorization Machines [1] (\code{\link{HoFM.train}}).

Further informations about Factorization Machines are provided by the papers in the references.
}

\details{
This package includes the following methods:

- \code{\link{SVM.train}}: Method training a Support Vector Machine,

- \code{\link{FM.train}}: Method training a second-order Factorization Machine,

- \code{\link{HoFM.train}}: Method training a higher-order Factorization Machine,

- \code{\link{predict.FMmodel}}: Predict Method for FMmodel Objects,

- \code{\link{summary.FMmodel}} and \code{\link{print.FMmodel}}: Summary and Print Method for FMmodel Objects.

To date the learning method alternating least squares (\code{"als"}) and the task regression (\code{"r"}) is supported.
Consequently, regularization is suggested in most of the cases.
Next steps are to implement the Monte Carlo Markov Chain method (\code{"mcmc"}) to simplify regularization.
Furthermore, the task classifiction (\code{"c"}) will be supported in the future.
}

\author{
Maintainer: Julian Knoll <julian.knoll@th-nuernberg.de>
}

\references{
[1] J. Knoll, Recommending with Higer-Order Factorization Machines, Research and Development in Intelligent Systems XXXIII, 2016.

[2] S. Rendle, Factorization Machines with libFM, ACM Transactions onIntelligent Systems and Technology, 3, 2012.
}

\keyword{ package }
\keyword{ Factorization Machine }
\keyword{ Matrix Factorization }
\keyword{ Machine Learning }
\keyword{ Recommender }

\seealso{
\code{\link{SVM.train}},
\code{\link{FM.train}},
\code{\link{HoFM.train}},
\code{\link{predict.FMmodel}}
}

\examples{
\dontrun{

# Load libraries
library(FactoRizationMachines)
library(Matrix)

# Load MovieLens 100k data set
ml100k=as.matrix(read.table("http://files.grouplens.org/datasets/movielens/ml-100k/u.data"))
user=ml100k[,1]
items=ml100k[,2]+max(user)
wdays=(as.POSIXlt(ml100k[,4],origin="1970-01-01")$wday+1)+max(items)

# Transform MovieLens 100k to feature form
data=sparseMatrix(i=rep(1:nrow(ml100k),3),j=c(user,items,wdays),giveCsparse=F)
target=ml100k[,3]

# Subset data to training and test data
set.seed(123)
subset=sample.int(nrow(data),nrow(data)*.8)
data.train=data[subset,]
data.test=data[-subset,]
target.train=target[subset]
target.test=target[-subset]

# Predict ratings with Support Vector Machine with linear kernel
model=SVM.train(data.train,target.train)
# RMSE resulting from test data prediction
sqrt(mean((predict(model,data.test)-target.test)^2))

# Predict ratings with second-order Factorization Machine
# with second-order 10 factors (default) and regularization
model=FM.train(data.train,target.train,regular=0.1)
# RMSE resulting from test data prediction
sqrt(mean((predict(model,data.test)-target.test)^2))

# Predict ratings with higher-order Factorization Machine
# with 3 second-order and 1 third-order factor and regularization
model=HoFM.train(data.train,target.train,c(1,3,1),regular=0.1)
# RMSE resulting from test data prediction
sqrt(mean((predict(model,data.test)-target.test)^2))

}
}
97 changes: 97 additions & 0 deletions man/020-SVM.train.Rd
@@ -0,0 +1,97 @@
\name{SVM.train}
\alias{SVM.train}

\title{
Method training a Support Vector Machine
}

\description{
\code{SVM.train} is a method training a Support Vector Machine with a linear kernel.

\code{factors} specifies whether linear weights are used (\code{1}) or not (\code{0}).
If linear weights are not used \code{intercept} is set to \code{TRUE}.

To date the learning method alternating least squares (\code{"als"}) and the task regression (\code{"r"})is supported.
Consequently, regularization is suggested in most of the cases.
Next steps are to implement the Monte Carlo Markov Chain method (\code{"mcmc"}) to simplify regularization.
Furthermore, the task classifiction (\code{"c"}) will be supported in the future.
}

\usage{
SVM.train(data, target, factors = 1, intercept = T,
iter = 100, regular = 0, stdev = 0.1)
}


\arguments{
\item{data}{
an object of class \code{dgTMatrix}, \code{matrix} or \code{data.frame} (or an object coercible to \code{dgTMatrix}):
a matrix containing training data, each row representing a training example and each column representing a feature.
}
\item{target}{
\code{numeric}: vector specifying the target value of each training example (length must match rows of object data).
}
\item{factors}{
either \code{0} or \code{1}: specifying whether linear weights are used (\code{1}) or not (\code{0}).
If linear weights are not used \code{intercept} is set to \code{TRUE}.
}
\item{intercept}{
\code{logical}: specifying whether a global intercept is used (\code{TRUE}) or not (\code{FALSE}).
}
\item{iter}{
\code{integer}: the number of iterations the learning method is applied.
}
\item{regular}{
\code{numeric}: regularization value for the linear weights.
}
\item{stdev}{
\code{numeric}: the standard deviation used to initialize the model parameters.
}
}


\seealso{
\code{\link{FactoRizationMachines}}
}

\examples{

### Example to illustrate the usage of the method
### Data set very small and not sparse, results not representative
### Please study major example in general help 'FactoRizationMachines'

# Load data set
library(FactoRizationMachines)
library(MASS)
data("Boston")

# Subset data to training and test data
set.seed(123)
subset=sample.int(nrow(Boston),nrow(trees)*.8)
data.train=Boston[subset,-ncol(Boston)]
target.train=Boston[subset,ncol(Boston)]
data.test=Boston[-subset,-ncol(Boston)]
target.test=Boston[-subset,ncol(Boston)]


# Predict with linear weights and intercept
model=SVM.train(data.train,target.train)

# RMSE resulting from test data prediction
sqrt(mean((predict(model,data.test)-target.test)^2))


# Predict with linear weights but without intercept
model=SVM.train(data.train,target.train,intercept=FALSE)

# RMSE resulting from test data prediction
sqrt(mean((predict(model,data.test)-target.test)^2))


# Predict with linear weights and regularization
model=SVM.train(data.train,target.train,regular=0.1)

# RMSE resulting from test data prediction
sqrt(mean((predict(model,data.test)-target.test)^2))

}

0 comments on commit a6e534a

Please sign in to comment.