Skip to content

Commit

Permalink
Reorganized
Browse files Browse the repository at this point in the history
Moved mincount variable to top and removed its javadoc and moved setMinCount below other set methods.
  • Loading branch information
ganonp committed Dec 19, 2014
1 parent 854958b commit 5110a6f
Showing 1 changed file with 10 additions and 15 deletions.
25 changes: 10 additions & 15 deletions mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ class Word2Vec extends Serializable with Logging {
private var numPartitions = 1
private var numIterations = 1
private var seed = Utils.random.nextLong()

private var minCount = 5

/**
* Sets vector size (default: 100).
*/
Expand Down Expand Up @@ -114,20 +115,6 @@ class Word2Vec extends Serializable with Logging {
this
}

private val EXP_TABLE_SIZE = 1000
private val MAX_EXP = 6
private val MAX_CODE_LENGTH = 40
private val MAX_SENTENCE_LENGTH = 1000

/** context words from [-window, window] */
private val window = 5

/**
* The minimum number of times a token must occur in the training corpus to be
* included in the word2vec model (default: 5).
*/
private var minCount = 5

/**
* Sets minCount, the minimum number of times a token must appear to be included in the word2vec
* model's vocabulary (default: 5).
Expand All @@ -136,6 +123,14 @@ class Word2Vec extends Serializable with Logging {
this.minCount = minCount
this
}

private val EXP_TABLE_SIZE = 1000
private val MAX_EXP = 6
private val MAX_CODE_LENGTH = 40
private val MAX_SENTENCE_LENGTH = 1000

/** context words from [-window, window] */
private val window = 5

private var trainWordsCount = 0
private var vocabSize = 0
Expand Down

0 comments on commit 5110a6f

Please sign in to comment.