Yashwants19 · Yashwants19 · Aug 7, 2020 · Jul 27, 2020 · Jul 27, 2020 · Jul 27, 2020
diff --git a/CMake/GenerateGoBinding.cmake b/CMake/GenerateGoBinding.cmake
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -19,6 +19,11 @@ option(DOWNLOAD_STB_IMAGE "Download stb_image for image loading." ON)
 option(BUILD_PYTHON_BINDINGS "Build Python bindings." ON)
 option(BUILD_GO_SHLIB "Build Go shared library." OFF)
 
+# Set minimum library version required by mlpack.
+set(ARMADILLO_VERSION "8.400.0")
+set(ENSMALLEN_VERSION "2.10.0")
+set(BOOST_VERSION "1.58")
+
 if (WIN32)
   option(BUILD_SHARED_LIBS
       "Compile shared libraries (if OFF, static libraries are compiled)." OFF)
@@ -278,7 +283,7 @@ endif()
 #   ENSMALLEN_INCLUDE_DIR - include directory for ensmallen
 #   STB_IMAGE_INCLUDE_DIR - include directory for STB image library
 #   MATHJAX_ROOT - root of MathJax installation
-find_package(Armadillo 8.400.0 REQUIRED)
+find_package(Armadillo "${ARMADILLO_VERSION}" REQUIRED)
 
 # Include directories for the previous dependencies.
 set(MLPACK_INCLUDE_DIRS ${MLPACK_INCLUDE_DIRS} ${ARMADILLO_INCLUDE_DIRS})
@@ -344,7 +349,7 @@ endif ()
 # Find ensmallen.
 # Once ensmallen is readily available in package repos, the automatic downloader
 # here can be removed.
-find_package(Ensmallen 2.10.0)
+find_package(Ensmallen "${ENSMALLEN_VERSION}")
 if (NOT ENSMALLEN_FOUND)
   if (DOWNLOAD_ENSMALLEN)
     file(DOWNLOAD http://www.ensmallen.org/files/ensmallen-latest.tar.gz
@@ -424,7 +429,7 @@ set(Boost_ADDITIONAL_VERSIONS
 # TODO for the brave: transition all mlpack's CMake to 'target-based modern
 # CMake'.  Good luck!  You'll need it.
 set(Boost_NO_BOOST_CMAKE 1)
-find_package(Boost 1.58
+find_package(Boost "${BOOST_VERSION}"
     COMPONENTS
       unit_test_framework
       serialization

diff --git a/README.md b/README.md
@@ -32,7 +32,7 @@ bindings to other languages.  It is meant to be a machine learning analog to
 LAPACK, and aims to implement a wide array of machine learning methods and
 functions as a "swiss army knife" for machine learning researchers.  In addition
 to its powerful C++ interface, mlpack also provides command-line programs,
-Python bindings, and Julia bindings.
+Python bindings, Julia bindings, Go bindings and R bindings.
 
 [//]: # (numfocus-fiscal-sponsor-attribution)
 
@@ -121,6 +121,20 @@ following Python packages are installed:
 If you would like to build the Julia bindings, make sure that Julia >= 1.3.0 is
 installed.
 
+If you would like to build the Go bindings, make sure that Go >= 1.11.0 is
+installed with this package:
+
+     Gonum
+
+If you would like to build the R bindings, make sure that R >= 4.0 is
+installed with these R packages.
+
+     Rcpp >= 0.12.12
+     RcppArmadillo >= 0.8.400.0
+     RcppEnsmallen >= 0.2.10.0
+     BH >= 1.58
+     roxygen2
+
 If the STB library headers are available, image loading support will be
 compiled.
 
@@ -190,6 +204,8 @@ Options are specified with the -D flag.  The allowed options include:
     BUILD_GO_BINDINGS=(ON/OFF): whether or not to build Go bindings
     GO_EXECUTABLE=(/path/to/go): Path to specific Go executable
     BUILD_GO_SHLIB=(ON/OFF): whether or not to build shared libraries required by Go bindings
+    BUILD_R_BINDINGS=(ON/OFF): whether or not to build R bindings
+    R_EXECUTABLE=(/path/to/R): Path to specific R executable
     BUILD_TESTS=(ON/OFF): whether or not to build tests
     BUILD_SHARED_LIBS=(ON/OFF): compile shared libraries as opposed to
        static libraries

diff --git a/doc/guide/bindings.hpp b/doc/guide/bindings.hpp
@@ -147,8 +147,7 @@ PROGRAM_INFO("Mean Shift Clustering",
     "\n\n"
     "The output labels may be saved with the " + PRINT_PARAM_STRING("output") +
     " output parameter and the centroids of each cluster may be saved with the"
-    " " + PRINT_PARAM_STRING("centroid") + " output parameter."
-    "\n\n"
+    " " + PRINT_PARAM_STRING("centroid") + " output parameter.",
     "For example, to run mean shift clustering on the dataset " +
     PRINT_DATASET("data") + " and store the centroids to " +
     PRINT_DATASET("centroids") + ", the following command may be used: "
@@ -286,7 +285,7 @@ is of the form
 
 @code
 PROGRAM_INFO("program name", "short documentation", "long documentation",
-    SEE_ALSO("link", "description"), ...)
+    "examples", SEE_ALSO("link", "description"), ...)
 @endcode
 
 The short documentation should be two sentences indicating what the program
@@ -425,8 +424,7 @@ Input C++ (full program, 'random_numbers_main.cpp'):
       "The output random numbers can be saved with the " +
       PRINT_PARAM_STRING("output") + " output parameter.  In addition, a "
       "randomly generated linear regression model can be saved with the " +
-      PRINT_PARAM_STRING("output_model") + " output parameter."
-      "\n\n"
+      PRINT_PARAM_STRING("output_model") + " output parameter.",
       "For example, to generate 100 random numbers with 3 subtracted from them "
       "and save the output to " + PRINT_DATASET("rand") + " and the random "
       "model to " + PRINT_MODEL("rand_lr") + ", use the following "

diff --git a/doc/guide/cli_quickstart.hpp b/doc/guide/cli_quickstart.hpp
@@ -10,8 +10,9 @@ This page describes how you can quickly get started using mlpack from the
 command-line and gives a few examples of usage, and pointers to deeper
 documentation.
 
-This quickstart guide is also available for @ref python_quickstart "Python" and
-@ref julia_quickstart "Julia".
+This quickstart guide is also available for @ref python_quickstart "Python"
+@ref r_quickstart "R", @ref julia_quickstart "Julia" and
+@ref go_quickstart "Go".
 
 @section cli_quickstart_install Installing mlpack
 

diff --git a/doc/guide/go_quickstart.hpp b/doc/guide/go_quickstart.hpp
@@ -9,9 +9,9 @@
 This page describes how you can quickly get started using mlpack from Go and
 gives a few examples of usage, and pointers to deeper documentation.
 
-This quickstart guide is also available for
-@ref cli_quickstart "the command-line", @ref python_quickstart "Python"
-and @ref julia_quickstart "Julia".
+This quickstart guide is also available for @ref python_quickstart "Python"
+@ref cli_quickstart "the command-line", @ref julia_quickstart "Julia" and
+@ref r_quickstart "R".
 
 @section go_quickstart_install Installing mlpack
 

diff --git a/doc/guide/julia_quickstart.hpp b/doc/guide/julia_quickstart.hpp
@@ -9,8 +9,9 @@
 This page describes how you can quickly get started using mlpack from Julia and
 gives a few examples of usage, and pointers to deeper documentation.
 
-This quickstart guide is also available for
-@ref cli_quickstart "the command-line" and @ref python_quickstart "Python".
+This quickstart guide is also available for @ref python_quickstart "Python"
+@ref cli_quickstart "the command-line", @ref go_quickstart "Go" and
+@ref r_quickstart "R".
 
 @section julia_quickstart_install Installing mlpack
 

diff --git a/doc/guide/r_quickstart.hpp b/doc/guide/r_quickstart.hpp
@@ -0,0 +1,193 @@
+/**
+ * @file r_quickstart.hpp
+ * @author Yashwant Singh Parihar
+
+@page r_quickstart mlpack in R quickstart guide
+
+@section r_quickstart_intro Introduction
+
+This page describes how you can quickly get started using mlpack from R and
+gives a few examples of usage, and pointers to deeper documentation.
+
+This quickstart guide is also available for @ref python_quickstart "Python"
+@ref cli_quickstart "the command-line", @ref julia_quickstart "Julia" and
+@ref go_quickstart "Go".
+
+@section r_quickstart_install Installing mlpack binary package
+
+Installing the mlpack bindings for R is straightforward; you can just use
+CRAN:
+
+@code{.R}
+install.packages('mlpack')
+@endcode
+
+@section r_quickstart_install Installing mlpack package from source
+
+Building the R bindings from scratch is a little more in-depth, though.  For
+information on that, follow the instructions on the @ref build page, and be sure
+to specify @c -DBUILD_R_BINDINGS=ON to CMake; you may need to also set the
+location of the R program with @c -DR_EXECUTABLE=/path/to/R.
+
+@section r_quickstart_example Simple mlpack quickstart example
+
+As a really simple example of how to use mlpack from R, let's do some
+simple classification on a subset of the standard machine learning @c covertype
+dataset.  We'll first split the dataset into a training set and a testing set,
+then we'll train an mlpack random forest on the training data, and finally we'll
+print the accuracy of the random forest on the test dataset.
+
+You can copy-paste this code directly into R to run it.
+
+@code{.R}
+if(!requireNamespace("data.table", quietly = TRUE)) { install.packages("data.table") }
+suppressMessages({
+    library("mlpack")
+    library("data.table")
+})
+
+# Load the dataset from an online URL.  Replace with 'covertype.csv.gz' if you
+# want to use on the full dataset.
+df <- fread("https://www.mlpack.org/datasets/covertype-small.csv.gz")
+
+# Split the labels.
+labels <- df[, .(label)]
+dataset <- df[, label:=NULL]
+
+# Split the dataset using mlpack.
+prepdata <- preprocess_split(input = dataset,
+                             input_labels = labels,
+                             test_ratio = 0.3,
+                             verbose = TRUE)
+
+# Train a random forest.
+output <- random_forest(training = prepdata$training,
+                        labels = prepdata$training_labels,
+                        print_training_accuracy = TRUE,
+                        num_trees = 10,
+                        minimum_leaf_size = 3,
+                        verbose = TRUE)
+rf_model <- output$output_model
+
+# Predict the labels of the test points.
+output <- random_forest(input_model = rf_model,
+                        test = prepdata$test,
+                        verbose = TRUE)
+
+# Now print the accuracy.  The third return value ('probabilities'), which we
+# ignored here, could also be used to generate an ROC curve.
+correct <- sum(output$predictions == prepdata$test_labels)
+cat(correct, "out of", length(prepdata$test_labels), "test points correct",
+    correct / length(prepdata$test_labels) * 100.0, "%\n")
+@endcode
+
+We can see that we achieve reasonably good accuracy on the test dataset (80%+);
+if we use the full @c covertype.csv.gz, the accuracy should increase
+significantly (but training will take longer).
+
+It's easy to modify the code above to do more complex things, or to use
+different mlpack learners, or to interface with other machine learning toolkits.
+
+@section r_quickstart_whatelse What else does mlpack implement?
+
+The example above has only shown a little bit of the functionality of mlpack.
+Lots of other commands are available with different functionality.  A full list
+of each of these commands and full documentation can be found on the following
+page:
+
+ - <a href="https://www.mlpack.org/doc/mlpack-git/r_documentation.html">r documentation</a>
+
+For more information on what mlpack does, see https://www.mlpack.org/.
+Next, let's go through another example for providing movie recommendations with
+mlpack.
+
+@section r_quickstart_movierecs Using mlpack for movie recommendations
+
+In this example, we'll train a collaborative filtering model using mlpack's
+<tt><a href="https://www.mlpack.org/doc/mlpack-git/r_documentation.html#cf">cf()</a></tt> method.  We'll train this on the MovieLens dataset from
+https://grouplens.org/datasets/movielens/, and then we'll use the model that we
+train to give recommendations.
+
+You can copy-paste this code directly into R to run it.
+
+@code{.R}
+if(!requireNamespace("data.table", quietly = TRUE)) { install.packages("data.table") }
+suppressMessages({
+    library("mlpack")
+    library("data.table")
+})
+
+# First, load the MovieLens dataset.  This is taken from files.grouplens.org/
+# but reposted on mlpack.org as unpacked and slightly preprocessed data.
+ratings <- fread("http://www.mlpack.org/datasets/ml-20m/ratings-only.csv.gz")
+movies <- fread("http://www.mlpack.org/datasets/ml-20m/movies.csv.gz")
+
+# Hold out 10% of the dataset into a test set so we can evaluate performance.
+predata <- preprocess_split(input = ratings,
+                            test_ratio = 0.1,
+                            verbose = TRUE)
+
+# Train the model.  Change the rank to increase/decrease the complexity of the
+# model.
+output <- cf(training = predata$training,
+             test = predata$test,
+             rank = 10,
+             verbose = TRUE,
+             max_iteration=2,
+             algorithm = "RegSVD")
+cf_model <- output$output_model
+
+# Now query the 5 top movies for user 1.
+output <- cf(input_model = cf_model,
+             query = matrix(1),
+             recommendations = 10,
+             verbose = TRUE)
+
+# Get the names of the movies for user 1.
+cat("Recommendations for user 1:\n")
+for (i in 1:10) {
+  cat("  ", i, ":", as.character(movies[output$output[i], 3]), "\n")
+}
+@endcode
+
+Here is some example output, showing that user 1 seems to have good taste in
+movies:
+
+@code{.unparsed}
+Recommendations for user 1:
+  0: Casablanca (1942)
+  1: Pan's Labyrinth (Laberinto del fauno, El) (2006)
+  2: Godfather, The (1972)
+  3: Answer This! (2010)
+  4: Life Is Beautiful (La Vita è bella) (1997)
+  5: Adventures of Tintin, The (2011)
+  6: Dark Knight, The (2008)
+  7: Out for Justice (1991)
+  8: Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)
+  9: Schindler's List (1993)
+@endcode
+
+@section r_quickstart_nextsteps Next steps with mlpack
+
+After working through this overview to `mlpack`'s R package, we hope you are
+inspired to use `mlpack`' in your data science workflow.  We recommend as part
+of your next steps to look at more documentation for the R mlpack bindings:
+
+ - <a href="https://www.mlpack.org/doc/mlpack-git/r_documentation.html">R mlpack
+   binding documentation</a>
+
+Also, mlpack is much more flexible from C++ and allows much greater
+functionality.  So, more complicated tasks are possible if you are willing to
+write C++ (or perhaps Rcpp).  To get started learning about mlpack in C++, the
+following resources might be helpful:
+
+ - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/tutorials.html">mlpack
+   C++ tutorials</a>
+ - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/build.html">mlpack
+   build and installation guide</a>
+ - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/sample.html">Simple
+   sample C++ mlpack programs</a>
+ - <a href="https://www.mlpack.org/doc/mlpack-git/doxygen/index.html">mlpack
+   Doxygen documentation homepage</a>
+
+ */
diff --git a/doc/tutorials/tutorials.txt b/doc/tutorials/tutorials.txt
@@ -15,6 +15,7 @@ get started with mlpack in different languages.
   - \ref cli_quickstart
   - \ref julia_quickstart
   - \ref go_quickstart
+  - \ref r_quickstart
 
 @section introd_tut Introductory Tutorials
 

diff --git a/src/mlpack/bindings/R/CMakeLists.txt b/src/mlpack/bindings/R/CMakeLists.txt
@@ -19,15 +19,15 @@ if (BUILD_R_BINDINGS)
   include(${CMAKE_SOURCE_DIR}/CMake/FindRModule.cmake)
 
   # If mlpack upgrade the version of dependencies, then we also have to update the version here.
-  set(RcppArmadillo_Version "0.8.400.0.0")
-  set(RcppEnsmallen_Version "0.2.10.0")
-  set(BH_Version "1.58.0")
+  set(RcppArmadillo_Version "0.${ARMADILLO_VERSION}")
+  set(RcppEnsmallen_Version "0.${ENSMALLEN_VERSION}")
+  set(BH_Version "${BOOST_VERSION}")
 
   ## We need to check here if R is even available.  Although actually
   ## technically, I'm not sure if we even need to know!  For the tests though we
   ## do.  So it's probably a good idea to check.
   if (FORCE_BUILD_R_BINDINGS)
-    find_package(R 3.5)
+    find_package(R 4.0)
     find_r_module(roxygen2)
     find_r_module(Rcpp 0.12.12)
     find_r_module(RcppArmadillo "${RcppArmadillo_Version}")
@@ -38,7 +38,7 @@ if (BUILD_R_BINDINGS)
       message(FATAL_ERROR "Could not Build R Bindings")
     endif()
   else ()
-    find_package(R 3.5)
+    find_package(R 4.0)
     find_r_module(roxygen2)
     find_r_module(Rcpp 0.12.12)
     find_r_module(RcppArmadillo "${RcppArmadillo_Version}")