Skip to content

Commit

Permalink
Changes related to GC-463
Browse files Browse the repository at this point in the history
  - few correction based on @ezhulenev comments in the pull request
  • Loading branch information
jpocalan-collective committed Jul 29, 2015
1 parent 0c09c2e commit fae4fa1
Show file tree
Hide file tree
Showing 26 changed files with 73 additions and 114 deletions.
16 changes: 0 additions & 16 deletions README.md
Expand Up @@ -51,22 +51,6 @@ Schema migrations managed by [Flyway](http://flywaydb.org),
If you want to add test that excepect modelmatrix matrix schema and tables to be present please implement trait `com.collective.modelmatrix.catalog.InstallSchemaBefore`


<!--schema DDL and migrations located in: `modelmatrix-cli/src/main/resources/db/migration`-->

<!--Install schema for development:-->

<!-- sbt> project modelmatrix-cli -->
<!-- sbt> flywayMigrate -->

<!--If you need to install schema into different database, you have to provide flyway properties at sbt startup-->

<!-- sbt -Dflyway.url=myUrl \-->
<!-- -Dflyway.user=myUser \-->
<!-- -Dflyway.password=mySecretPwd \-->
<!-- -Dflyway.schemas=schema1,schema2,schema3 \-->
<!-- -Dflyway.placeholders.keyABC=valueXYZ \-->
<!-- -Dflyway.placeholders.otherplaceholder=value123-->

## Testing

Unit and Integration test are automatically creating/updating schema and using by default H2
Expand Down
1 change: 0 additions & 1 deletion build.sbt
Expand Up @@ -78,7 +78,6 @@ lazy val modelmatrixCore =
lazy val modelmatrixCli =
ModelMatrixProject("modelmatrix-cli")
.dependsOn(modelmatrixCore)
// .settings(flywaySettings: _*)

lazy val modelmatrixUdf =
ModelMatrixProject("modelmatrix-udf")
Expand Up @@ -4,7 +4,7 @@ import com.collective.modelmatrix.cli.Script
import com.collective.modelmatrix.db.{DefaultDBConfigWrapper, DatabaseConfigWrapper, SchemaInstaller}
import org.slf4j.LoggerFactory

case class InstallSchema() extends SchemaInstaller with Script {
case class InstallSchema() extends Script with SchemaInstaller{

private val log = LoggerFactory.getLogger(classOf[InstallSchema])

Expand Down
Expand Up @@ -3,7 +3,7 @@ package com.collective.modelmatrix.cli.definition
import java.nio.file.Path
import java.time.Instant

import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog
import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess
import com.collective.modelmatrix.cli.{ModelConfigurationParser, Script}
import com.typesafe.config.{ConfigFactory, ConfigResolveOptions}
import org.slf4j.LoggerFactory
Expand All @@ -15,7 +15,7 @@ case class AddDefinition(
configPath: String,
name: Option[String],
comment: Option[String]
) extends Script with DbModelMatrixCatalog {
) extends Script with ModelMatrixCatalogAccess {

private val log = LoggerFactory.getLogger(classOf[AddDefinition])

Expand Down
@@ -1,12 +1,12 @@
package com.collective.modelmatrix.cli.definition

import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog
import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess
import com.collective.modelmatrix.cli.Script
import org.slf4j.LoggerFactory

case class ListDefinitions(
name: Option[String]
) extends Script with DbModelMatrixCatalog {
) extends Script with ModelMatrixCatalogAccess {

private val log = LoggerFactory.getLogger(classOf[ListDefinitions])

Expand Down
@@ -1,12 +1,12 @@
package com.collective.modelmatrix.cli.definition

import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog
import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess
import com.collective.modelmatrix.cli.Script
import org.slf4j.LoggerFactory

case class ViewFeatures(
modelDefinitionId: Int
) extends Script with DbModelMatrixCatalog {
) extends Script with ModelMatrixCatalogAccess {

private val log = LoggerFactory.getLogger(classOf[ViewFeatures])

Expand Down
@@ -1,12 +1,12 @@
package com.collective.modelmatrix.cli.definition

import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog
import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess
import com.collective.modelmatrix.cli.Script
import org.slf4j.LoggerFactory

case class ViewSource(
modelDefinitionId: Int
) extends Script with DbModelMatrixCatalog {
) extends Script with ModelMatrixCatalogAccess {

private val log = LoggerFactory.getLogger(classOf[ViewFeatures])

Expand Down
@@ -1,9 +1,9 @@
package com.collective.modelmatrix.cli.featurize

import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog
import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess
import com.collective.modelmatrix.cli.{Source, _}
import com.collective.modelmatrix.transform.Transformer
import com.collective.modelmatrix.{Featurization, Labeling, ModelMatrix}
import com.collective.modelmatrix.{Featurization, Labeling, ModelMatrixAccess}
import org.apache.spark.mllib.linalg.{DenseVector, SparseVector}
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._
Expand All @@ -18,7 +18,7 @@ case class SparseFeaturization(
idColumn: String,
repartitionSource: Option[Int],
cacheSource: Boolean
) extends Script with SourceTransformation with DbModelMatrixCatalog with CliSparkContext {
) extends Script with SourceTransformation with ModelMatrixCatalogAccess with CliSparkContext {

private val log = LoggerFactory.getLogger(classOf[ValidateInputData])

Expand All @@ -38,7 +38,7 @@ case class SparseFeaturization(
s"Featurized sink: $sink. " +
s"Id column: $idColumn")

implicit val sqlContext = ModelMatrix.hiveContext(sc)
implicit val sqlContext = ModelMatrixAccess.hiveContext(sc)

val features = blockOn(db.run(modelInstanceFeatures.features(modelInstanceId)))
require(features.nonEmpty, s"No features are defined for model instance: $modelInstanceId. " +
Expand Down
@@ -1,9 +1,9 @@
package com.collective.modelmatrix.cli.featurize

import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog
import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess
import com.collective.modelmatrix.cli.{SourceTransformation, CliSparkContext, Script, Source}
import com.collective.modelmatrix.transform.Transformer
import com.collective.modelmatrix.{Featurization, ModelMatrix}
import com.collective.modelmatrix.{Featurization, ModelMatrixAccess}
import org.slf4j.LoggerFactory

import scalaz._
Expand All @@ -13,7 +13,7 @@ case class ValidateInputData(
source: Source,
repartitionSource: Option[Int],
cacheSource: Boolean
) extends Script with SourceTransformation with DbModelMatrixCatalog with CliSparkContext {
) extends Script with SourceTransformation with ModelMatrixCatalogAccess with CliSparkContext {

private val log = LoggerFactory.getLogger(classOf[ValidateInputData])

Expand All @@ -24,7 +24,7 @@ case class ValidateInputData(
log.info(s"Validate input data against Model Matrix instance: $modelInstanceId. " +
s"Data source: $source")

implicit val sqlContext = ModelMatrix.hiveContext(sc)
implicit val sqlContext = ModelMatrixAccess.hiveContext(sc)

val features = blockOn(db.run(modelInstanceFeatures.features(modelInstanceId)))
require(features.nonEmpty, s"No features are defined for model instance: $modelInstanceId. " +
Expand Down
@@ -1,6 +1,6 @@
package com.collective.modelmatrix.cli.instance

import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog
import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess
import com.collective.modelmatrix._
import com.collective.modelmatrix.cli.{SourceTransformation, CliSparkContext, Script, Source}
import com.collective.modelmatrix.transform._
Expand All @@ -19,14 +19,14 @@ case class AddInstance(
cacheSource: Boolean
)
extends Script with SourceTransformation
with DbModelMatrixCatalog
with ModelMatrixCatalogAccess
with CliSparkContext
with Transformers
with TransformationProcess {

private val log = LoggerFactory.getLogger(classOf[AddInstance])

private implicit lazy val sqlContext = ModelMatrix.hiveContext(sc)
private implicit lazy val sqlContext = ModelMatrixAccess.hiveContext(sc)

import com.collective.modelmatrix.cli.ASCIITableFormat._
import com.collective.modelmatrix.cli.ASCIITableFormats._
Expand Down
@@ -1,13 +1,13 @@
package com.collective.modelmatrix.cli.instance

import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog
import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess
import com.collective.modelmatrix.cli.Script
import org.slf4j.LoggerFactory


case class ListInstances(
modelDefinitionId: Option[Int], name: Option[String]
) extends Script with DbModelMatrixCatalog {
) extends Script with ModelMatrixCatalogAccess {

private val log = LoggerFactory.getLogger(classOf[ListInstances])

Expand Down
@@ -1,7 +1,7 @@
package com.collective.modelmatrix.cli.instance

import com.collective.modelmatrix.ModelMatrix
import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog
import com.collective.modelmatrix.ModelMatrixAccess
import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess
import com.collective.modelmatrix.cli.{Source, _}
import com.collective.modelmatrix.transform._
import org.slf4j.LoggerFactory
Expand All @@ -13,11 +13,11 @@ case class ValidateInputData(
source: Source,
repartitionSource: Option[Int],
cacheSource: Boolean
) extends Script with SourceTransformation with DbModelMatrixCatalog with CliSparkContext with Transformers {
) extends Script with SourceTransformation with ModelMatrixCatalogAccess with CliSparkContext with Transformers {

private val log = LoggerFactory.getLogger(classOf[ValidateInputData])

private implicit lazy val sqlContext = ModelMatrix.hiveContext(sc)
private implicit lazy val sqlContext = ModelMatrixAccess.hiveContext(sc)

import com.collective.modelmatrix.cli.ASCIITableFormat._
import com.collective.modelmatrix.cli.ASCIITableFormats._
Expand Down
@@ -1,6 +1,6 @@
package com.collective.modelmatrix.cli.instance

import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog
import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess
import com.collective.modelmatrix.catalog._
import com.collective.modelmatrix.cli._
import com.collective.modelmatrix.{CategoricalColumn, BinColumn}
Expand All @@ -11,7 +11,7 @@ import scalaz._

case class ViewColumns(
modelInstanceId: Int, group: Option[String], feature: Option[String]
) extends Script with DbModelMatrixCatalog {
) extends Script with ModelMatrixCatalogAccess {

private val log = LoggerFactory.getLogger(classOf[ViewColumns])

Expand Down
@@ -1,13 +1,13 @@
package com.collective.modelmatrix.cli.instance

import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog
import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess
import com.collective.modelmatrix.cli.Script
import org.slf4j.LoggerFactory


case class ViewFeatures(
modelInstanceId: Int
) extends Script with DbModelMatrixCatalog {
) extends Script with ModelMatrixCatalogAccess {

private val log = LoggerFactory.getLogger(classOf[ViewFeatures])

Expand Down
5 changes: 0 additions & 5 deletions modelmatrix-core/src/it/resources/pg.conf

This file was deleted.

Expand Up @@ -12,7 +12,7 @@ import scalaz.{-\/, \/, \/-}

class FeaturizationSpec extends FlatSpec with GivenWhenThen with TestSparkContext {

val sqlContext = ModelMatrix.sqlContext(sc)
val sqlContext = ModelMatrixAccess.sqlContext(sc)

val schema = StructType(Seq(
StructField("auction_id", LongType),
Expand Down
@@ -1,17 +1,17 @@
package com.collective.modelmatrix.transform

import com.collective.modelmatrix.{ModelMatrix, ModelFeature, TestSparkContext}
import org.apache.spark.sql.{Row, SQLContext}
import com.collective.modelmatrix.{ModelFeature, ModelMatrixAccess, TestSparkContext}
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._
import org.scalatest.FlatSpec

import scala.util.Random
import scalaz.{\/-, -\/}
import scalaz.syntax.either._
import scalaz.{-\/, \/-}

class BinsTransformerSpec extends FlatSpec with TestSparkContext {

val sqlContext = ModelMatrix.sqlContext(sc)
val sqlContext = ModelMatrixAccess.sqlContext(sc)

val schema = StructType(Seq(
StructField("adv_site", StringType),
Expand Down
Expand Up @@ -5,7 +5,7 @@ import java.time.{DayOfWeek, Instant, ZoneOffset}
import java.util.UUID

import com.collective.modelmatrix.CategoricalColumn.CategoricalValue
import com.collective.modelmatrix.{ModelMatrixEncoding, ModelFeature, ModelMatrix, TestSparkContext}
import com.collective.modelmatrix.{ModelMatrixEncoding, ModelFeature, ModelMatrixAccess, TestSparkContext}
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._
import org.scalatest.FlatSpec
Expand All @@ -17,7 +17,7 @@ import scalaz.syntax.either._

class ExtractExpressionsSpec extends FlatSpec with TestSparkContext {

val sqlContext = ModelMatrix.sqlContext(sc)
val sqlContext = ModelMatrixAccess.sqlContext(sc)

val rnd = new Random()

Expand Down
@@ -1,6 +1,6 @@
package com.collective.modelmatrix.transform

import com.collective.modelmatrix.{ModelMatrix, ModelFeature, TestSparkContext}
import com.collective.modelmatrix.{ModelMatrixAccess, ModelFeature, TestSparkContext}
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.apache.spark.sql.{Row, SQLContext}
import org.scalatest.FlatSpec
Expand All @@ -10,7 +10,7 @@ import scalaz.syntax.either._

class IdentityTransformerSpec extends FlatSpec with TestSparkContext {

val sqlContext = ModelMatrix.sqlContext(sc)
val sqlContext = ModelMatrixAccess.sqlContext(sc)

val schema = StructType(Seq(
StructField("adv_site", StringType),
Expand Down
@@ -1,7 +1,7 @@
package com.collective.modelmatrix.transform

import com.collective.modelmatrix.CategoricalColumn.{AllOther, CategoricalValue}
import com.collective.modelmatrix.{ModelMatrixEncoding, ModelMatrix, ModelFeature, TestSparkContext}
import com.collective.modelmatrix.{ModelMatrixEncoding, ModelMatrixAccess, ModelFeature, TestSparkContext}
import org.apache.spark.sql.{Row, SQLContext}
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.scalatest.FlatSpec
Expand All @@ -13,7 +13,7 @@ import scalaz.syntax.either._

class IndexTransformerSpec extends FlatSpec with TestSparkContext {

val sqlContext = ModelMatrix.sqlContext(sc)
val sqlContext = ModelMatrixAccess.sqlContext(sc)

val schema = StructType(Seq(
StructField("adv_site", StringType)
Expand Down
@@ -1,7 +1,7 @@
package com.collective.modelmatrix.transform

import com.collective.modelmatrix.CategoricalColumn.{AllOther, CategoricalValue}
import com.collective.modelmatrix.{ModelMatrixEncoding, ModelMatrix, ModelFeature, TestSparkContext}
import com.collective.modelmatrix.{ModelMatrixEncoding, ModelMatrixAccess, ModelFeature, TestSparkContext}
import org.apache.spark.sql.{DataFrame, Row, SQLContext}
import org.apache.spark.sql.types._
import org.scalatest.FlatSpec
Expand All @@ -11,7 +11,7 @@ import scalaz.syntax.either._

class TopTransformerSpec extends FlatSpec with TestSparkContext {

val sqlContext = ModelMatrix.sqlContext(sc)
val sqlContext = ModelMatrixAccess.sqlContext(sc)

val schema = StructType(Seq(
StructField("adv_site", StringType)
Expand Down
@@ -1,6 +1,6 @@
package com.collective.modelmatrix.transform

import com.collective.modelmatrix.{ModelFeature, ModelMatrix, TestSparkContext}
import com.collective.modelmatrix.{ModelFeature, ModelMatrixAccess, TestSparkContext}
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.scalatest.FlatSpec
Expand All @@ -9,7 +9,7 @@ import scalaz.{\/-, -\/}

class TransformerSpec extends FlatSpec with TestSparkContext {

val sqlContext = ModelMatrix.sqlContext(sc)
val sqlContext = ModelMatrixAccess.sqlContext(sc)

val schema = StructType(Seq(
StructField("adv_site", StringType),
Expand Down

0 comments on commit fae4fa1

Please sign in to comment.