Skip to content
This repository has been archived by the owner on Sep 20, 2022. It is now read-only.

Commit

Permalink
hotfix
Browse files Browse the repository at this point in the history
  • Loading branch information
maropu committed Jul 12, 2017
1 parent 89c7538 commit 2d036aa
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 8 deletions.
13 changes: 9 additions & 4 deletions docs/gitbook/spark/binaryclass/a9a_df.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,15 @@ $ wget http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a9a.t

```scala
scala> :paste
val trainDf = spark.read.format("libsvm").load("a9a")
.select(
val rawTrainDf = spark.read.format("libsvm").load("a9a")

val (max, min) = rawTrainDf.select(max($"label"), min($"label")).collect.map {
case Row(max: Double, min: Double) => (max, min)
}

val trainDf = rawTrainDf.select(
// `label` must be [0.0, 1.0]
rescale($"label", lit(-1.0f), lit(1.0f)).as("label"),
rescale($"label", lit(min), lit(max)).as("label"),
$"features"
)

Expand All @@ -45,7 +50,7 @@ root

scala> :paste
val testDf = spark.read.format("libsvm").load("a9a.t")
.select(rowid(), rescale($"label", lit(-1.0f), lit(1.0f)).as("label"), $"features")
.select(rowid(), rescale($"label", lit(min), lit(max)).as("label"), $"features")
.explode_vector($"features")
.select($"rowid", $"label".as("target"), $"feature", $"weight".as("value"))
.cache
Expand Down
13 changes: 9 additions & 4 deletions docs/gitbook/spark/regression/e2006_df.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,15 @@ $ wget http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/E2006.t

```scala
scala> :paste
val trainDf = spark.read.format("libsvm").load("E2006.train.bz2")
.select(
val rawTrainDf = spark.read.format("libsvm").load("E2006.train.bz2")

val (max, min) = rawTrainDf.select(max($"label"), min($"label")).collect.map {
case Row(max: Double, min: Double) => (max, min)
}

val trainDf = rawTrainDf.select(
// `label` must be [0.0, 1.0]
rescale($"label", lit(-7.899578f), lit(-0.51940954f)).as("label"),
rescale($"label", lit(min), lit(max).as("label"),
$"features"
)

Expand All @@ -45,7 +50,7 @@ root

scala> :paste
val testDf = spark.read.format("libsvm").load("E2006.test.bz2")
.select(rowid(), rescale($"label", lit(-7.899578f), lit(-0.51940954f)).as("label"), $"features")
.select(rowid(), rescale($"label", lit(min), lit(max)).as("label"), $"features")
.explode_vector($"features")
.select($"rowid", $"label".as("target"), $"feature", $"weight".as("value"))
.cache
Expand Down

0 comments on commit 2d036aa

Please sign in to comment.