Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert codebase to scala 2.13 #499

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
<maven.compiler.target>1.8</maven.compiler.target>
<encoding>UTF-8</encoding>

<scala.major.version>2.12</scala.major.version>
<scala.version>${scala.major.version}.10</scala.version>
<scala.major.version>2.13</scala.major.version>
<scala.version>${scala.major.version}.11</scala.version>
<artifact.scala.version>${scala.major.version}</artifact.scala.version>
<scala-maven-plugin.version>4.8.1</scala-maven-plugin.version>

Expand Down Expand Up @@ -103,7 +103,7 @@
<dependency>
<groupId>org.scalanlp</groupId>
<artifactId>breeze_${scala.major.version}</artifactId>
<version>0.13.2</version>
<version>1.2</version>
</dependency>

<dependency>
Expand Down
10 changes: 9 additions & 1 deletion src/main/scala/com/amazon/deequ/VerificationResult.scala
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,14 @@ object VerificationResult {
}
}

private[this] case class SimpleCheckResultOutput(checkDescription: String, checkLevel: String,
// remove private as it breaks somethingwith scala 2.13
// java.lang.RuntimeException: Error while encoding: java.util.concurrent.ExecutionException:
// org.codehaus.commons.compiler.CompileException:
// File 'generated.java', Line 105, Column 25: failed to compile:
// org.codehaus.commons.compiler.CompileException: File 'generated.java',
// Line 105, Column 25: No applicable constructor/method found for zero actual parameters; candidates are:
// "public java.lang.String com.amazon.deequ.VerificationResult$SimpleCheckResultOutput.constraintStatus()"
// private[this]
case class SimpleCheckResultOutput(checkDescription: String, checkLevel: String,
checkStatus: String, constraint: String, constraintStatus: String, constraintMessage: String)
}
2 changes: 1 addition & 1 deletion src/main/scala/com/amazon/deequ/analyzers/Analyzer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ private[deequ] object Analyzers {
if (nullInResult) {
None
} else {
Option(func(Unit))
Option(func(()))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class QuantileNonSample[T](
this.shrinkingFactor = shrinkingFactor
compactors = ArrayBuffer.fill(data.length)(new NonSampleCompactor[T])
for (i <- data.indices) {
compactors(i).buffer = data(i).to[ArrayBuffer]
compactors(i).buffer = ArrayBuffer.from(data(i)) // data(i).to[ArrayBuffer]
}
curNumOfCompactors = data.length
compactorActualSize = getCompactorItemsCount
Expand Down
5 changes: 3 additions & 2 deletions src/main/scala/com/amazon/deequ/analyzers/StateProvider.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.expressions.aggregate.{ApproximatePercentile, DeequHyperLogLogPlusPlusUtils}
import org.apache.spark.sql.SaveMode

import scala.collection.JavaConversions._
import scala.jdk.CollectionConverters._
import scala.util.hashing.MurmurHash3

private object StateInformation {
Expand Down Expand Up @@ -58,7 +58,8 @@ case class InMemoryStateProvider() extends StateLoader with StatePersister {

override def toString: String = {
val buffer = new StringBuilder()
statesByAnalyzer.foreach { case (analyzer, state) =>

statesByAnalyzer.asScala.foreach { case (analyzer, state) =>
buffer.append(analyzer.toString)
buffer.append(" => ")
buffer.append(state.toString)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,19 @@ object AnalyzerContext {
}
}

private[this] case class SimpleMetricOutput(
// when using private, leads to
// No applicable constructor/method
// private[this]
case class SimpleMetricOutput(
entity: String,
instance: String,
name: String,
value: Double)

private[this] object SimpleMetricOutput {
// when using private, leads to
// No applicable constructor/method
// private[this]
object SimpleMetricOutput {

def apply(doubleMetric: DoubleMetric): SimpleMetricOutput = {
SimpleMetricOutput(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,12 @@ case class OnlineNormalStrategy(
ret += OnlineCalculationResult(currentMean, stdDev, isAnomaly = true)
}
}
ret
ret.toSeq
}


/**
* Search for anomalies in a series of data points.
* Search for anomalies in a series of datag points.
*
* @param dataSeries The data contained in a Vector of Doubles
* @param searchInterval The indices between which anomalies should be detected. [a, b).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ class HoltWinters(
}
val forecasted = Y.drop(series.size)

ModelResults(forecasted, level, trend, seasonality, residuals)
ModelResults(forecasted.toSeq, level.toSeq, trend.toSeq, seasonality.toSeq, residuals.toSeq)
}

private def modelSelectionFor(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ object ColumnProfiler {
Histogram(histogram.column).equals(histogram)
case _ => false
}
analyzerContextExistingValues = AnalyzerContext(relevantEntries)
analyzerContextExistingValues = AnalyzerContext(relevantEntries.toMap)
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,13 @@ import com.google.gson._
import com.google.gson.reflect.TypeToken

import scala.collection._
import scala.collection.JavaConverters._
import java.util.{ArrayList => JArrayList, HashMap => JHashMap, List => JList, Map => JMap}
import JsonSerializationConstants._
import com.amazon.deequ.analyzers.Histogram.{AggregateFunction, Count => HistogramCount, Sum => HistogramSum}
import org.apache.spark.sql.Column
import org.apache.spark.sql.functions.expr

import scala.collection.JavaConversions._
import scala.jdk.CollectionConverters._

private[repository] object JsonSerializationConstants {

Expand Down Expand Up @@ -70,6 +69,7 @@ private[deequ] object SimpleResultSerde {
.asInstanceOf[JArrayList[JMap[String, String]]]
.asScala
.map(map => immutable.Map(map.asScala.toList: _*))
.toSeq
}
}

Expand Down Expand Up @@ -422,22 +422,22 @@ private[deequ] object AnalyzerDeserializer
getOptionalWhereParam(json))

case "CountDistinct" =>
CountDistinct(getColumnsAsSeq(context, json))
CountDistinct(getColumnsAsSeq(context, json).toSeq)

case "Distinctness" =>
Distinctness(getColumnsAsSeq(context, json))
Distinctness(getColumnsAsSeq(context, json).toSeq)

case "Entropy" =>
Entropy(json.get(COLUMN_FIELD).getAsString)

case "MutualInformation" =>
MutualInformation(getColumnsAsSeq(context, json))
MutualInformation(getColumnsAsSeq(context, json).toSeq)

case "UniqueValueRatio" =>
UniqueValueRatio(getColumnsAsSeq(context, json))
UniqueValueRatio(getColumnsAsSeq(context, json).toSeq)

case "Uniqueness" =>
Uniqueness(getColumnsAsSeq(context, json))
Uniqueness(getColumnsAsSeq(context, json).toSeq)

case "Histogram" =>
Histogram(
Expand Down Expand Up @@ -598,7 +598,7 @@ private[deequ] object MetricDeserializer extends JsonDeserializer[Metric[_]] {
val instance = jsonObject.get("instance").getAsString
if (jsonObject.has("value")) {
val entries = jsonObject.get("value").getAsJsonObject
val values = entries.entrySet().map { entry =>
val values = entries.entrySet().asScala.map { entry =>
entry.getKey -> entry.getValue.getAsDouble
}
.toMap
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ private[repository] object MetricsRepositoryMultipleResultsLoader {

def jsonUnion(jsonOne: String, jsonTwo: String): String = {

val objectOne: Seq[Map[String, Any]] = SimpleResultSerde.deserialize(jsonOne)
val objectTwo: Seq[Map[String, Any]] = SimpleResultSerde.deserialize(jsonTwo)
val objectOne: Seq[Map[String, Any]] = SimpleResultSerde.deserialize(jsonOne).toSeq
val objectTwo: Seq[Map[String, Any]] = SimpleResultSerde.deserialize(jsonTwo).toSeq

val columnsTotal = objectOne.headOption.getOrElse(Map.empty).keySet ++
objectTwo.headOption.getOrElse(Map.empty).keySet
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,11 @@ class FileSystemMetricsRepositoryMultipleResultsLoader(
.metricMap
.filterKeys(analyzer => forAnalyzers.isEmpty || forAnalyzers.get.contains(analyzer))

val requestedAnalyzerContext = AnalyzerContext(requestedMetrics)
val requestedAnalyzerContext = AnalyzerContext(requestedMetrics.toMap)

AnalysisResult(analysisResult.resultKey, requestedAnalyzerContext)
}
.toSeq
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import com.amazon.deequ.metrics.Metric
import com.amazon.deequ.repository._
import com.amazon.deequ.analyzers.runners.AnalyzerContext

import scala.collection.JavaConversions._
import scala.jdk.CollectionConverters._
import java.util.concurrent.ConcurrentHashMap

/** A simple Repository implementation backed by a concurrent hash map */
Expand Down Expand Up @@ -118,6 +118,7 @@ class LimitedInMemoryMetricsRepositoryMultipleResultsLoader(
/** Get the AnalysisResult */
def get(): Seq[AnalysisResult] = {
resultsRepository
.asScala
.filterKeys(key => after.isEmpty || after.get <= key.dataSetDate)
.filterKeys(key => before.isEmpty || key.dataSetDate <= before.get)
.filterKeys(key => tagValues.isEmpty || tagValues.get.toSet.subsetOf(key.tags.toSet))
Expand All @@ -129,7 +130,7 @@ class LimitedInMemoryMetricsRepositoryMultipleResultsLoader(
.metricMap
.filterKeys(analyzer => forAnalyzers.isEmpty || forAnalyzers.get.contains(analyzer))

AnalysisResult(analysisResult.resultKey, AnalyzerContext(requestedMetrics))
AnalysisResult(analysisResult.resultKey, AnalyzerContext(requestedMetrics.toMap))
}
.toSeq
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ class ConstraintSuggestionRunner {
groupedSuggestionsWithColumnNames.map { case (_, suggestion) => suggestion } }

ConstraintSuggestionResult(columnProfiles.profiles, columnProfiles.numRecords,
columnsWithSuggestions, verificationResult)
columnsWithSuggestions.toMap, verificationResult)
}

private[this] def splitTrainTestSets(
Expand Down
6 changes: 3 additions & 3 deletions src/test/scala/com/amazon/deequ/KLL/KLLDistanceTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ class KLLDistanceTest extends WordSpec with SparkContextSpec
val sample2 = scala.collection.mutable.Map(
"a" -> 22L, "b" -> 20L, "c" -> 25L, "d" -> 12L, "e" -> 13L, "f" -> 15L)
val distance = Distance.categoricalDistance(sample1, sample2, method = LInfinityMethod(alpha = Some(0.003)))
assert(distance == 0.2726338046550349)
assert(distance == 0.27263380465503484)
}

"Categorial distance should compute correct linf_robust with different alpha value .1" in {
Expand Down Expand Up @@ -137,7 +137,7 @@ class KLLDistanceTest extends WordSpec with SparkContextSpec
"a" -> 100L, "b" -> 22L, "c" -> 25L, "d" -> 5L, "e" -> 13L, "f" -> 2L)
val distance = Distance.categoricalDistance(
sample1, sample2, correctForLowNumberOfSamples = true, method = ChisquareMethod())
assert(distance == 8.789790456457125)
assert(distance == 8.789790456457123)
}

"Categorical distance should compute correct chisquare distance (low samples) with regrouping (yates)" in {
Expand Down Expand Up @@ -170,7 +170,7 @@ class KLLDistanceTest extends WordSpec with SparkContextSpec
"a" -> 100L, "b" -> 4L, "c" -> 3L, "d" -> 27L, "e" -> 20L, "f" -> 20L, "g" -> 20L, "h" -> 20L)
val distance = Distance.categoricalDistance(
sample, baseline, correctForLowNumberOfSamples = true, method = ChisquareMethod())
assert(distance == 6.827423492761593)
assert(distance == 6.827423492761592)
}

"Categorical distance should compute correct chisquare distance (low samples) " +
Expand Down
18 changes: 10 additions & 8 deletions src/test/scala/com/amazon/deequ/VerificationResultTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,13 @@ class VerificationResultTest extends WordSpec with Matchers with SparkContextSpe
val successMetricsResultsJson = VerificationResult.successMetricsAsJson(results)

val expectedJson =
"""[{"entity":"Column","instance":"item","name":"Distinctness","value":1.0},
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

scala 2.13 collections break the previous ordering of elements

"""[
|{"entity":"Multicolumn","instance":"att1,att2","name":"Uniqueness","value":0.25},
{"entity":"Column","instance":"item","name":"Distinctness","value":1.0},
|{"entity": "Column", "instance":"att2","name":"Completeness","value":1.0},
|{"entity":"Column","instance":"att1","name":"Completeness","value":1.0},
|{"entity":"Multicolumn","instance":"att1,att2",
|"name":"Uniqueness","value":0.25},
|{"entity":"Dataset","instance":"*","name":"Size","value":4.0}]"""
|{"entity":"Dataset","instance":"*","name":"Size","value":4.0},
|{"entity":"Column","instance":"att1","name":"Completeness","value":1.0}
|]"""
.stripMargin.replaceAll("\n", "")

assertSameResultsJson(successMetricsResultsJson, expectedJson)
Expand All @@ -102,9 +103,10 @@ class VerificationResultTest extends WordSpec with Matchers with SparkContextSpe
VerificationResult.successMetricsAsJson(results, metricsForAnalyzers)

val expectedJson =
"""[{"entity":"Column","instance":"att1","name":"Completeness","value":1.0},
|{"entity":"Multicolumn","instance":"att1,att2",
|"name":"Uniqueness","value":0.25}]"""
"""[
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

scala 2.13 collections break the previous ordering of elements

|{"entity":"Multicolumn","instance":"att1,att2","name":"Uniqueness","value":0.25},
|{"entity":"Column","instance":"att1","name":"Completeness","value":1.0}
|]"""
.stripMargin.replaceAll("\n", "")

assertSameResultsJson(successMetricsResultsJson, expectedJson)
Expand Down
Loading