From 95b5383fae4da22aa0552e969c05b9488accb1a1 Mon Sep 17 00:00:00 2001 From: actuaryzhang Date: Fri, 7 Apr 2017 11:37:33 -0700 Subject: [PATCH 1/2] update logistic regression example --- examples/src/main/r/ml/glm.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/src/main/r/ml/glm.R b/examples/src/main/r/ml/glm.R index ee13910382c58..3651154a556a6 100644 --- a/examples/src/main/r/ml/glm.R +++ b/examples/src/main/r/ml/glm.R @@ -27,7 +27,7 @@ sparkR.session(appName = "SparkR-ML-glm-example") # $example on$ training <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm") # Fit a generalized linear model of family "gaussian" with spark.glm -df_list <- randomSplit(training, c(7,3), 2) +df_list <- randomSplit(training, c(7, 3), 2) gaussianDF <- df_list[[1]] gaussianTestDF <- df_list[[2]] gaussianGLM <- spark.glm(gaussianDF, label ~ features, family = "gaussian") @@ -44,8 +44,9 @@ gaussianGLM2 <- glm(label ~ features, gaussianDF, family = "gaussian") summary(gaussianGLM2) # Fit a generalized linear model of family "binomial" with spark.glm -training2 <- read.df("data/mllib/sample_binary_classification_data.txt", source = "libsvm") -df_list2 <- randomSplit(training2, c(7,3), 2) +training2 <- read.df("/data/mllib/sample_multiclass_classification_data.txt", source = "libsvm") +training2 <- transform(training2, label = cast(training2$label > 1, "integer")) +df_list2 <- randomSplit(training2, c(7, 3), 2) binomialDF <- df_list2[[1]] binomialTestDF <- df_list2[[2]] binomialGLM <- spark.glm(binomialDF, label ~ features, family = "binomial") From f7e71ea8c01d44852fde9c1a6a930e09cc95d2e6 Mon Sep 17 00:00:00 2001 From: actuaryzhang Date: Fri, 7 Apr 2017 11:50:19 -0700 Subject: [PATCH 2/2] fix path issue --- examples/src/main/r/ml/glm.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/src/main/r/ml/glm.R b/examples/src/main/r/ml/glm.R index 3651154a556a6..3feb21ad482dc 100644 --- a/examples/src/main/r/ml/glm.R +++ b/examples/src/main/r/ml/glm.R @@ -44,7 +44,7 @@ gaussianGLM2 <- glm(label ~ features, gaussianDF, family = "gaussian") summary(gaussianGLM2) # Fit a generalized linear model of family "binomial" with spark.glm -training2 <- read.df("/data/mllib/sample_multiclass_classification_data.txt", source = "libsvm") +training2 <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm") training2 <- transform(training2, label = cast(training2$label > 1, "integer")) df_list2 <- randomSplit(training2, c(7, 3), 2) binomialDF <- df_list2[[1]]