diff --git a/README.md b/README.md index c8bd5f6..b44acb6 100644 --- a/README.md +++ b/README.md @@ -51,22 +51,6 @@ Schema migrations managed by [Flyway](http://flywaydb.org), If you want to add test that excepect modelmatrix matrix schema and tables to be present please implement trait `com.collective.modelmatrix.catalog.InstallSchemaBefore` - - - - - - - - - - - - - - - - ## Testing Unit and Integration test are automatically creating/updating schema and using by default H2 diff --git a/build.sbt b/build.sbt index ec7e5ee..379a28c 100644 --- a/build.sbt +++ b/build.sbt @@ -78,7 +78,6 @@ lazy val modelmatrixCore = lazy val modelmatrixCli = ModelMatrixProject("modelmatrix-cli") .dependsOn(modelmatrixCore) -// .settings(flywaySettings: _*) lazy val modelmatrixUdf = ModelMatrixProject("modelmatrix-udf") diff --git a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/db/InstallSchema.scala b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/db/InstallSchema.scala index c0537d0..6160289 100644 --- a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/db/InstallSchema.scala +++ b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/db/InstallSchema.scala @@ -4,7 +4,7 @@ import com.collective.modelmatrix.cli.Script import com.collective.modelmatrix.db.{DefaultDBConfigWrapper, DatabaseConfigWrapper, SchemaInstaller} import org.slf4j.LoggerFactory -case class InstallSchema() extends SchemaInstaller with Script { +case class InstallSchema() extends Script with SchemaInstaller{ private val log = LoggerFactory.getLogger(classOf[InstallSchema]) diff --git a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/definition/AddDefinition.scala b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/definition/AddDefinition.scala index fee5886..490d360 100644 --- a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/definition/AddDefinition.scala +++ b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/definition/AddDefinition.scala @@ -3,7 +3,7 @@ package com.collective.modelmatrix.cli.definition import java.nio.file.Path import java.time.Instant -import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog +import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess import com.collective.modelmatrix.cli.{ModelConfigurationParser, Script} import com.typesafe.config.{ConfigFactory, ConfigResolveOptions} import org.slf4j.LoggerFactory @@ -15,7 +15,7 @@ case class AddDefinition( configPath: String, name: Option[String], comment: Option[String] -) extends Script with DbModelMatrixCatalog { +) extends Script with ModelMatrixCatalogAccess { private val log = LoggerFactory.getLogger(classOf[AddDefinition]) diff --git a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/definition/ListDefinitions.scala b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/definition/ListDefinitions.scala index 9ed5f38..eb9c9a6 100644 --- a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/definition/ListDefinitions.scala +++ b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/definition/ListDefinitions.scala @@ -1,12 +1,12 @@ package com.collective.modelmatrix.cli.definition -import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog +import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess import com.collective.modelmatrix.cli.Script import org.slf4j.LoggerFactory case class ListDefinitions( name: Option[String] -) extends Script with DbModelMatrixCatalog { +) extends Script with ModelMatrixCatalogAccess { private val log = LoggerFactory.getLogger(classOf[ListDefinitions]) diff --git a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/definition/ViewFeatures.scala b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/definition/ViewFeatures.scala index 69b219f..7d2ecb9 100644 --- a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/definition/ViewFeatures.scala +++ b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/definition/ViewFeatures.scala @@ -1,12 +1,12 @@ package com.collective.modelmatrix.cli.definition -import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog +import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess import com.collective.modelmatrix.cli.Script import org.slf4j.LoggerFactory case class ViewFeatures( modelDefinitionId: Int -) extends Script with DbModelMatrixCatalog { +) extends Script with ModelMatrixCatalogAccess { private val log = LoggerFactory.getLogger(classOf[ViewFeatures]) diff --git a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/definition/ViewSource.scala b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/definition/ViewSource.scala index cae1050..6833da7 100644 --- a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/definition/ViewSource.scala +++ b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/definition/ViewSource.scala @@ -1,12 +1,12 @@ package com.collective.modelmatrix.cli.definition -import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog +import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess import com.collective.modelmatrix.cli.Script import org.slf4j.LoggerFactory case class ViewSource( modelDefinitionId: Int -) extends Script with DbModelMatrixCatalog { +) extends Script with ModelMatrixCatalogAccess { private val log = LoggerFactory.getLogger(classOf[ViewFeatures]) diff --git a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/featurize/SparseFeaturization.scala b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/featurize/SparseFeaturization.scala index c1771c3..70d6bd1 100644 --- a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/featurize/SparseFeaturization.scala +++ b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/featurize/SparseFeaturization.scala @@ -1,9 +1,9 @@ package com.collective.modelmatrix.cli.featurize -import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog +import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess import com.collective.modelmatrix.cli.{Source, _} import com.collective.modelmatrix.transform.Transformer -import com.collective.modelmatrix.{Featurization, Labeling, ModelMatrix} +import com.collective.modelmatrix.{Featurization, Labeling, ModelMatrixAccess} import org.apache.spark.mllib.linalg.{DenseVector, SparseVector} import org.apache.spark.sql.Row import org.apache.spark.sql.types._ @@ -18,7 +18,7 @@ case class SparseFeaturization( idColumn: String, repartitionSource: Option[Int], cacheSource: Boolean -) extends Script with SourceTransformation with DbModelMatrixCatalog with CliSparkContext { +) extends Script with SourceTransformation with ModelMatrixCatalogAccess with CliSparkContext { private val log = LoggerFactory.getLogger(classOf[ValidateInputData]) @@ -38,7 +38,7 @@ case class SparseFeaturization( s"Featurized sink: $sink. " + s"Id column: $idColumn") - implicit val sqlContext = ModelMatrix.hiveContext(sc) + implicit val sqlContext = ModelMatrixAccess.hiveContext(sc) val features = blockOn(db.run(modelInstanceFeatures.features(modelInstanceId))) require(features.nonEmpty, s"No features are defined for model instance: $modelInstanceId. " + diff --git a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/featurize/ValidateInputData.scala b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/featurize/ValidateInputData.scala index 4aef1f4..1a34820 100644 --- a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/featurize/ValidateInputData.scala +++ b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/featurize/ValidateInputData.scala @@ -1,9 +1,9 @@ package com.collective.modelmatrix.cli.featurize -import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog +import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess import com.collective.modelmatrix.cli.{SourceTransformation, CliSparkContext, Script, Source} import com.collective.modelmatrix.transform.Transformer -import com.collective.modelmatrix.{Featurization, ModelMatrix} +import com.collective.modelmatrix.{Featurization, ModelMatrixAccess} import org.slf4j.LoggerFactory import scalaz._ @@ -13,7 +13,7 @@ case class ValidateInputData( source: Source, repartitionSource: Option[Int], cacheSource: Boolean -) extends Script with SourceTransformation with DbModelMatrixCatalog with CliSparkContext { +) extends Script with SourceTransformation with ModelMatrixCatalogAccess with CliSparkContext { private val log = LoggerFactory.getLogger(classOf[ValidateInputData]) @@ -24,7 +24,7 @@ case class ValidateInputData( log.info(s"Validate input data against Model Matrix instance: $modelInstanceId. " + s"Data source: $source") - implicit val sqlContext = ModelMatrix.hiveContext(sc) + implicit val sqlContext = ModelMatrixAccess.hiveContext(sc) val features = blockOn(db.run(modelInstanceFeatures.features(modelInstanceId))) require(features.nonEmpty, s"No features are defined for model instance: $modelInstanceId. " + diff --git a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/instance/AddInstance.scala b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/instance/AddInstance.scala index e34544e..f3ddc0c 100644 --- a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/instance/AddInstance.scala +++ b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/instance/AddInstance.scala @@ -1,6 +1,6 @@ package com.collective.modelmatrix.cli.instance -import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog +import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess import com.collective.modelmatrix._ import com.collective.modelmatrix.cli.{SourceTransformation, CliSparkContext, Script, Source} import com.collective.modelmatrix.transform._ @@ -19,14 +19,14 @@ case class AddInstance( cacheSource: Boolean ) extends Script with SourceTransformation - with DbModelMatrixCatalog + with ModelMatrixCatalogAccess with CliSparkContext with Transformers with TransformationProcess { private val log = LoggerFactory.getLogger(classOf[AddInstance]) - private implicit lazy val sqlContext = ModelMatrix.hiveContext(sc) + private implicit lazy val sqlContext = ModelMatrixAccess.hiveContext(sc) import com.collective.modelmatrix.cli.ASCIITableFormat._ import com.collective.modelmatrix.cli.ASCIITableFormats._ diff --git a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/instance/ListInstances.scala b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/instance/ListInstances.scala index 34d8932..9a03c5f 100644 --- a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/instance/ListInstances.scala +++ b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/instance/ListInstances.scala @@ -1,13 +1,13 @@ package com.collective.modelmatrix.cli.instance -import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog +import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess import com.collective.modelmatrix.cli.Script import org.slf4j.LoggerFactory case class ListInstances( modelDefinitionId: Option[Int], name: Option[String] -) extends Script with DbModelMatrixCatalog { +) extends Script with ModelMatrixCatalogAccess { private val log = LoggerFactory.getLogger(classOf[ListInstances]) diff --git a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/instance/ValidateInputData.scala b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/instance/ValidateInputData.scala index 0913be6..c13edad 100644 --- a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/instance/ValidateInputData.scala +++ b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/instance/ValidateInputData.scala @@ -1,7 +1,7 @@ package com.collective.modelmatrix.cli.instance -import com.collective.modelmatrix.ModelMatrix -import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog +import com.collective.modelmatrix.ModelMatrixAccess +import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess import com.collective.modelmatrix.cli.{Source, _} import com.collective.modelmatrix.transform._ import org.slf4j.LoggerFactory @@ -13,11 +13,11 @@ case class ValidateInputData( source: Source, repartitionSource: Option[Int], cacheSource: Boolean -) extends Script with SourceTransformation with DbModelMatrixCatalog with CliSparkContext with Transformers { +) extends Script with SourceTransformation with ModelMatrixCatalogAccess with CliSparkContext with Transformers { private val log = LoggerFactory.getLogger(classOf[ValidateInputData]) - private implicit lazy val sqlContext = ModelMatrix.hiveContext(sc) + private implicit lazy val sqlContext = ModelMatrixAccess.hiveContext(sc) import com.collective.modelmatrix.cli.ASCIITableFormat._ import com.collective.modelmatrix.cli.ASCIITableFormats._ diff --git a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/instance/ViewColumns.scala b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/instance/ViewColumns.scala index fa55092..e1c79ed 100644 --- a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/instance/ViewColumns.scala +++ b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/instance/ViewColumns.scala @@ -1,6 +1,6 @@ package com.collective.modelmatrix.cli.instance -import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog +import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess import com.collective.modelmatrix.catalog._ import com.collective.modelmatrix.cli._ import com.collective.modelmatrix.{CategoricalColumn, BinColumn} @@ -11,7 +11,7 @@ import scalaz._ case class ViewColumns( modelInstanceId: Int, group: Option[String], feature: Option[String] -) extends Script with DbModelMatrixCatalog { +) extends Script with ModelMatrixCatalogAccess { private val log = LoggerFactory.getLogger(classOf[ViewColumns]) diff --git a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/instance/ViewFeatures.scala b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/instance/ViewFeatures.scala index 27f1bfd..0d44588 100644 --- a/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/instance/ViewFeatures.scala +++ b/modelmatrix-cli/src/main/scala/com/collective/modelmatrix/cli/instance/ViewFeatures.scala @@ -1,13 +1,13 @@ package com.collective.modelmatrix.cli.instance -import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog +import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess import com.collective.modelmatrix.cli.Script import org.slf4j.LoggerFactory case class ViewFeatures( modelInstanceId: Int -) extends Script with DbModelMatrixCatalog { +) extends Script with ModelMatrixCatalogAccess { private val log = LoggerFactory.getLogger(classOf[ViewFeatures]) diff --git a/modelmatrix-core/src/it/resources/pg.conf b/modelmatrix-core/src/it/resources/pg.conf deleted file mode 100644 index 2343543..0000000 --- a/modelmatrix-core/src/it/resources/pg.conf +++ /dev/null @@ -1,5 +0,0 @@ -pgdev = { - url = "jdbc:postgresql://localhost/modelmatrix?user=modelmatrix&password=modelmatrix" - driver = org.postgresql.Driver - keepAliveConnection = true -} diff --git a/modelmatrix-core/src/it/scala/com/collective/modelmatrix/FeaturizationSpec.scala b/modelmatrix-core/src/it/scala/com/collective/modelmatrix/FeaturizationSpec.scala index e410c23..bb380d9 100644 --- a/modelmatrix-core/src/it/scala/com/collective/modelmatrix/FeaturizationSpec.scala +++ b/modelmatrix-core/src/it/scala/com/collective/modelmatrix/FeaturizationSpec.scala @@ -12,7 +12,7 @@ import scalaz.{-\/, \/, \/-} class FeaturizationSpec extends FlatSpec with GivenWhenThen with TestSparkContext { - val sqlContext = ModelMatrix.sqlContext(sc) + val sqlContext = ModelMatrixAccess.sqlContext(sc) val schema = StructType(Seq( StructField("auction_id", LongType), diff --git a/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/BinsTransformerSpec.scala b/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/BinsTransformerSpec.scala index 9eb7617..2e62e4f 100644 --- a/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/BinsTransformerSpec.scala +++ b/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/BinsTransformerSpec.scala @@ -1,17 +1,17 @@ package com.collective.modelmatrix.transform -import com.collective.modelmatrix.{ModelMatrix, ModelFeature, TestSparkContext} -import org.apache.spark.sql.{Row, SQLContext} +import com.collective.modelmatrix.{ModelFeature, ModelMatrixAccess, TestSparkContext} +import org.apache.spark.sql.Row import org.apache.spark.sql.types._ import org.scalatest.FlatSpec import scala.util.Random -import scalaz.{\/-, -\/} import scalaz.syntax.either._ +import scalaz.{-\/, \/-} class BinsTransformerSpec extends FlatSpec with TestSparkContext { - val sqlContext = ModelMatrix.sqlContext(sc) + val sqlContext = ModelMatrixAccess.sqlContext(sc) val schema = StructType(Seq( StructField("adv_site", StringType), diff --git a/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/ExtractExpressionsSpec.scala b/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/ExtractExpressionsSpec.scala index 87906b0..2b9bfda 100644 --- a/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/ExtractExpressionsSpec.scala +++ b/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/ExtractExpressionsSpec.scala @@ -5,7 +5,7 @@ import java.time.{DayOfWeek, Instant, ZoneOffset} import java.util.UUID import com.collective.modelmatrix.CategoricalColumn.CategoricalValue -import com.collective.modelmatrix.{ModelMatrixEncoding, ModelFeature, ModelMatrix, TestSparkContext} +import com.collective.modelmatrix.{ModelMatrixEncoding, ModelFeature, ModelMatrixAccess, TestSparkContext} import org.apache.spark.sql.Row import org.apache.spark.sql.types._ import org.scalatest.FlatSpec @@ -17,7 +17,7 @@ import scalaz.syntax.either._ class ExtractExpressionsSpec extends FlatSpec with TestSparkContext { - val sqlContext = ModelMatrix.sqlContext(sc) + val sqlContext = ModelMatrixAccess.sqlContext(sc) val rnd = new Random() diff --git a/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/IdentityTransformerSpec.scala b/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/IdentityTransformerSpec.scala index de7c5dd..1c40768 100644 --- a/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/IdentityTransformerSpec.scala +++ b/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/IdentityTransformerSpec.scala @@ -1,6 +1,6 @@ package com.collective.modelmatrix.transform -import com.collective.modelmatrix.{ModelMatrix, ModelFeature, TestSparkContext} +import com.collective.modelmatrix.{ModelMatrixAccess, ModelFeature, TestSparkContext} import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} import org.apache.spark.sql.{Row, SQLContext} import org.scalatest.FlatSpec @@ -10,7 +10,7 @@ import scalaz.syntax.either._ class IdentityTransformerSpec extends FlatSpec with TestSparkContext { - val sqlContext = ModelMatrix.sqlContext(sc) + val sqlContext = ModelMatrixAccess.sqlContext(sc) val schema = StructType(Seq( StructField("adv_site", StringType), diff --git a/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/IndexTransformerSpec.scala b/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/IndexTransformerSpec.scala index a790bd0..3610bad 100644 --- a/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/IndexTransformerSpec.scala +++ b/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/IndexTransformerSpec.scala @@ -1,7 +1,7 @@ package com.collective.modelmatrix.transform import com.collective.modelmatrix.CategoricalColumn.{AllOther, CategoricalValue} -import com.collective.modelmatrix.{ModelMatrixEncoding, ModelMatrix, ModelFeature, TestSparkContext} +import com.collective.modelmatrix.{ModelMatrixEncoding, ModelMatrixAccess, ModelFeature, TestSparkContext} import org.apache.spark.sql.{Row, SQLContext} import org.apache.spark.sql.types.{StringType, StructField, StructType} import org.scalatest.FlatSpec @@ -13,7 +13,7 @@ import scalaz.syntax.either._ class IndexTransformerSpec extends FlatSpec with TestSparkContext { - val sqlContext = ModelMatrix.sqlContext(sc) + val sqlContext = ModelMatrixAccess.sqlContext(sc) val schema = StructType(Seq( StructField("adv_site", StringType) diff --git a/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/TopTransformerSpec.scala b/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/TopTransformerSpec.scala index a63abec..7973f41 100644 --- a/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/TopTransformerSpec.scala +++ b/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/TopTransformerSpec.scala @@ -1,7 +1,7 @@ package com.collective.modelmatrix.transform import com.collective.modelmatrix.CategoricalColumn.{AllOther, CategoricalValue} -import com.collective.modelmatrix.{ModelMatrixEncoding, ModelMatrix, ModelFeature, TestSparkContext} +import com.collective.modelmatrix.{ModelMatrixEncoding, ModelMatrixAccess, ModelFeature, TestSparkContext} import org.apache.spark.sql.{DataFrame, Row, SQLContext} import org.apache.spark.sql.types._ import org.scalatest.FlatSpec @@ -11,7 +11,7 @@ import scalaz.syntax.either._ class TopTransformerSpec extends FlatSpec with TestSparkContext { - val sqlContext = ModelMatrix.sqlContext(sc) + val sqlContext = ModelMatrixAccess.sqlContext(sc) val schema = StructType(Seq( StructField("adv_site", StringType) diff --git a/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/TransformerSpec.scala b/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/TransformerSpec.scala index 3073e02..3c2020a 100644 --- a/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/TransformerSpec.scala +++ b/modelmatrix-core/src/it/scala/com/collective/modelmatrix/transform/TransformerSpec.scala @@ -1,6 +1,6 @@ package com.collective.modelmatrix.transform -import com.collective.modelmatrix.{ModelFeature, ModelMatrix, TestSparkContext} +import com.collective.modelmatrix.{ModelFeature, ModelMatrixAccess, TestSparkContext} import org.apache.spark.sql.Row import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} import org.scalatest.FlatSpec @@ -9,7 +9,7 @@ import scalaz.{\/-, -\/} class TransformerSpec extends FlatSpec with TestSparkContext { - val sqlContext = ModelMatrix.sqlContext(sc) + val sqlContext = ModelMatrixAccess.sqlContext(sc) val schema = StructType(Seq( StructField("adv_site", StringType), diff --git a/modelmatrix-core/src/main/scala/com/collective/modelmatrix/ModelMatrix.scala b/modelmatrix-core/src/main/scala/com/collective/modelmatrix/ModelMatrixAccess.scala similarity index 91% rename from modelmatrix-core/src/main/scala/com/collective/modelmatrix/ModelMatrix.scala rename to modelmatrix-core/src/main/scala/com/collective/modelmatrix/ModelMatrixAccess.scala index 7715740..13aef87 100644 --- a/modelmatrix-core/src/main/scala/com/collective/modelmatrix/ModelMatrix.scala +++ b/modelmatrix-core/src/main/scala/com/collective/modelmatrix/ModelMatrixAccess.scala @@ -2,7 +2,7 @@ package com.collective.modelmatrix import java.util.concurrent.Executors -import com.collective.modelmatrix.ModelMatrix.DbModelMatrixCatalog +import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess import com.collective.modelmatrix.catalog.{ModelDefinitionFeature, ModelInstanceFeature, _} import com.collective.modelmatrix.db.DefaultDBConfigWrapper import com.collective.modelmatrix.transform.Transformer.FeatureExtractionError @@ -30,7 +30,7 @@ class ModelMatrixFeaturizationException(errors: Seq[FeaturizationError]) class ModelMatrixFeatureTransformationException(errors: Seq[(ModelDefinitionFeature, FeatureTransformationError)]) extends RuntimeException(s"Failed to run transformation. Bad features [${errors.map(_._1.feature).mkString(", ")}]") -object ModelMatrix extends ModelMatrixUDF { +object ModelMatrixAccess extends ModelMatrixUDF { def sqlContext(sc: SparkContext): SQLContext = { val sqlContext = new SQLContext(sc) @@ -44,17 +44,19 @@ object ModelMatrix extends ModelMatrixUDF { sqlContext } - trait ModelMatrixCatalogAccess { - protected val driver: JdbcProfile + trait ModelMatrixCatalogAccess { + protected val driver = DefaultDBConfigWrapper.getSlickDriver import driver.api._ - protected val db: Database + protected lazy val db = Database.forConfig("", DefaultDBConfigWrapper.dbConfig) + protected lazy val catalog = new ModelMatrixCatalog(driver) - protected def modelDefinitions: ModelDefinitions - protected def modelDefinitionFeatures: ModelDefinitionFeatures - protected def modelInstances: ModelInstances - protected def modelInstanceFeatures: ModelInstanceFeatures + protected lazy val modelDefinitions = new ModelDefinitions(catalog) + protected lazy val modelDefinitionFeatures = new ModelDefinitionFeatures(catalog) + + protected lazy val modelInstances = new ModelInstances(catalog) + protected lazy val modelInstanceFeatures = new ModelInstanceFeatures(catalog) protected def blockOn[T](f: Future[T], duration: FiniteDuration = 10.seconds) = { Await.result(f, duration) @@ -71,23 +73,6 @@ object ModelMatrix extends ModelMatrixUDF { setNameFormat(s"$prefix-%d"). build() } - - - trait DbModelMatrixCatalog extends ModelMatrixCatalogAccess { - - protected val driver = DefaultDBConfigWrapper.getSlickDriver - import driver.api._ - - protected lazy val db = Database.forConfig("", DefaultDBConfigWrapper.dbConfig) - protected lazy val catalog = new ModelMatrixCatalog(driver) - - protected lazy val modelDefinitions = new ModelDefinitions(catalog) - protected lazy val modelDefinitionFeatures = new ModelDefinitionFeatures(catalog) - - protected lazy val modelInstances = new ModelInstances(catalog) - protected lazy val modelInstanceFeatures = new ModelInstanceFeatures(catalog) - } - } /** @@ -96,10 +81,10 @@ object ModelMatrix extends ModelMatrixUDF { * * @param sqlContext Spark SQL Context */ -class ModelMatrix(sqlContext: SQLContext) extends DbModelMatrixCatalog with Transformers with TransformationProcess { - private val log = LoggerFactory.getLogger(classOf[ModelMatrix]) +class ModelMatrixAccess(sqlContext: SQLContext) extends ModelMatrixCatalogAccess with Transformers with TransformationProcess { + private val log = LoggerFactory.getLogger(classOf[ModelMatrixAccess]) - def this(sc: SparkContext) = this(ModelMatrix.hiveContext(sc)) + def this(sc: SparkContext) = this(ModelMatrixAccess.hiveContext(sc)) private implicit val _sqlContext = sqlContext diff --git a/modelmatrix-core/src/main/scala/com/collective/modelmatrix/TransformationProcess.scala b/modelmatrix-core/src/main/scala/com/collective/modelmatrix/TransformationProcess.scala index cefc014..1e0946a 100644 --- a/modelmatrix-core/src/main/scala/com/collective/modelmatrix/TransformationProcess.scala +++ b/modelmatrix-core/src/main/scala/com/collective/modelmatrix/TransformationProcess.scala @@ -2,7 +2,7 @@ package com.collective.modelmatrix import java.time.Instant -import com.collective.modelmatrix.ModelMatrix.ModelMatrixCatalogAccess +import com.collective.modelmatrix.ModelMatrixAccess.ModelMatrixCatalogAccess import com.collective.modelmatrix.catalog.ModelDefinitionFeature import com.collective.modelmatrix.transform._ import org.apache.spark.sql.types.DataType diff --git a/modelmatrix-core/src/main/scala/com/collective/modelmatrix/db/DatabaseConfig.scala b/modelmatrix-core/src/main/scala/com/collective/modelmatrix/db/DatabaseConfig.scala index f0941b0..fb591dd 100644 --- a/modelmatrix-core/src/main/scala/com/collective/modelmatrix/db/DatabaseConfig.scala +++ b/modelmatrix-core/src/main/scala/com/collective/modelmatrix/db/DatabaseConfig.scala @@ -3,31 +3,27 @@ package com.collective.modelmatrix.db import com.typesafe.config.{Config, ConfigFactory} import slick.driver.{H2Driver, JdbcProfile, PostgresDriver} -/** - * Created by jpocalan on 7/27/15. - */ -// TODO: replace by some sort of enum -// Differentiate between the different types of databse supported -class DatabaseConfig(n: String, dc: String, sd: JdbcProfile) { - val name = n; - val driverClass = dc; - val slickDriver = sd; -} // database configuration wrapper class that read the configuration files and // determines the proper slick driver to use -case class DatabaseConfigWrapper(configFilePath: String = "") { - val PG = new DatabaseConfig("pg", "org.postgresql.Driver", PostgresDriver) - val H2 = new DatabaseConfig("h2", "org.h2.Driver", H2Driver) - lazy val currentDB: DatabaseConfig = getCurrentDB +class DatabaseConfigWrapper(configFilePath: String = "") { + // Differentiate between the different types of database supported + private [DatabaseConfigWrapper] case class DatabaseConfig(val name: String, + val driverClass: String, + val slickDriver: JdbcProfile) + + private[this] val PG = DatabaseConfig("pg", "org.postgresql.Driver", PostgresDriver) + private[this] val H2 = DatabaseConfig("h2", "org.h2.Driver", H2Driver) - val dbConfigPath: String = "modelmatrix.catalog.db" + private[this] val dbConfigPath: String = "modelmatrix.catalog.db" + + lazy val currentDB: DatabaseConfig = getCurrentDB lazy val dbConfig: Config = if (configFilePath.isEmpty) ConfigFactory.load().getConfig(dbConfigPath) else ConfigFactory.systemProperties().withFallback(ConfigFactory.load(configFilePath)).getConfig(dbConfigPath) // do not need to expose this as user have access to the current db config using attribute currentDB - private def getCurrentDB = { + private[this] def getCurrentDB: DatabaseConfig = { dbConfig.getString("driver") match { case PG.driverClass => PG case H2.driverClass => H2 diff --git a/modelmatrix-core/src/main/scala/com/collective/modelmatrix/db/SchemaInstaller.scala b/modelmatrix-core/src/main/scala/com/collective/modelmatrix/db/SchemaInstaller.scala index d7f9d60..e5004d4 100644 --- a/modelmatrix-core/src/main/scala/com/collective/modelmatrix/db/SchemaInstaller.scala +++ b/modelmatrix-core/src/main/scala/com/collective/modelmatrix/db/SchemaInstaller.scala @@ -8,7 +8,7 @@ import org.flywaydb.core.Flyway trait SchemaInstaller { val dbConfigWrapper: DatabaseConfigWrapper - def installOrMigrate: Boolean = { + def installOrMigrate(): Boolean = { val config = dbConfigWrapper.dbConfig val url = config.getString("url")