Skip to content

Commit

Permalink
[WX-1615] Stop converting metadata values to strings before inserting…
Browse files Browse the repository at this point in the history
… them into the database (#7427)
  • Loading branch information
rsaperst committed May 9, 2024
1 parent e03b417 commit 7d5d5fd
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import cromwell.core.Dispatcher.ServiceDispatcher
import cromwell.core.Mailbox.PriorityMailbox
import cromwell.core.WorkflowId
import cromwell.core.instrumentation.InstrumentationPrefixes
import cromwell.services.metadata.{MetadataEvent, MetadataValue}
import cromwell.services.metadata.{MetadataEvent, MetadataString, MetadataValue}
import cromwell.services.metadata.MetadataService._
import cromwell.services.metadata.impl.MetadataStatisticsRecorder.MetadataStatisticsRecorderSettings
import cromwell.services.{EnhancedBatchActor, MetadataServicesStore}
Expand Down Expand Up @@ -65,7 +65,12 @@ class WriteMetadataActor(override val batchSize: Int,
val metadataEvents =
metadataWriteAction.events.map { event =>
event.value match {
case Some(eventVal) => event.copy(value = Option(MetadataValue(StringUtil.cleanUtf8mb4(eventVal.value))))
case Some(eventVal) =>
if (eventVal.valueType == MetadataString && metadataKeysToClean.contains(event.key.key)) {
event.copy(value = Option(MetadataValue(StringUtil.cleanUtf8mb4(eventVal.value))))
} else {
event
}
case None => event
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package cromwell.services.metadata.impl

import java.sql.{Connection, Timestamp}

import akka.actor.ActorRef
import akka.testkit.{TestFSMRef, TestProbe}
import cats.data.NonEmptyVector
Expand All @@ -19,7 +18,7 @@ import cromwell.services.metadata.MetadataService.{
}
import cromwell.services.metadata.impl.MetadataStatisticsRecorder.MetadataStatisticsDisabled
import cromwell.services.metadata.impl.WriteMetadataActorSpec.BatchSizeCountingWriteMetadataActor
import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue}
import cromwell.services.metadata.{MetadataEvent, MetadataInt, MetadataKey, MetadataValue}
import org.scalatest.concurrent.Eventually
import org.scalatest.flatspec.AnyFlatSpecLike
import org.scalatest.matchers.should.Matchers
Expand Down Expand Up @@ -178,7 +177,7 @@ class WriteMetadataActorSpec extends TestKitSuite with AnyFlatSpecLike with Matc

sanitizedWriteActions.map { writeAction =>
writeAction.events.map { event =>
if (event.value.getOrElse(fail("Removed value from metadata event")).value.matches("[\\x{10000}-\\x{FFFFF}]")) {
if (event.value.getOrElse(fail("Removed value from metadata event")).value.contains("\uD83C\uDF89")) {
fail("Metadata event contains emoji")
}

Expand Down Expand Up @@ -220,7 +219,7 @@ class WriteMetadataActorSpec extends TestKitSuite with AnyFlatSpecLike with Matc

sanitizedWriteActions.map { writeAction =>
writeAction.events.map { event =>
if (event.value.getOrElse(fail("Removed value from metadata event")).value.matches("[\\x{10000}-\\x{FFFFF}]")) {
if (event.value.getOrElse(fail("Removed value from metadata event")).value.contains("\uD83C\uDF89")) {
fail("Metadata event contains emoji")
}

Expand All @@ -231,6 +230,90 @@ class WriteMetadataActorSpec extends TestKitSuite with AnyFlatSpecLike with Matc
}
}

it should s"test sanitize inputs does not modify metadata values whose keys are not included in the keys to clean" in {
val registry = TestProbe().ref
val writeActor =
TestFSMRef(new BatchSizeCountingWriteMetadataActor(10, 10.millis, registry, Int.MaxValue, List("some_key")) {
override val metadataDatabaseInterface = mockDatabaseInterface(100)
})

def metadataEvent(index: Int, probe: ActorRef) = PutMetadataActionAndRespond(
List(
MetadataEvent(MetadataKey(WorkflowId.randomId(), None, "metadata_key"), MetadataValue(s"🎉_$index"))
),
probe
)

val probes = (0 until 43)
.map { _ =>
val probe = TestProbe()
probe
}
.zipWithIndex
.map { case (probe, index) =>
probe -> metadataEvent(index, probe.ref)
}

val metadataWriteActions = probes.map(probe => probe._2).toVector
val metadataWriteActionNE = NonEmptyVector(metadataWriteActions.head, metadataWriteActions.tail)

val sanitizedWriteActions = writeActor.underlyingActor.sanitizeInputs(metadataWriteActionNE)

sanitizedWriteActions.map { writeAction =>
writeAction.events.map { event =>
if (!event.value.getOrElse(fail("Removed value from metadata event")).value.contains("\uD83C\uDF89")) {
fail("Metadata event was incorrectly sanitized")
}

if (event.value.getOrElse(fail("Removed value from metadata event")).value.contains("\uFFFD")) {
fail("Incorrectly replaced character in metadata event")
}
}
}
}

it should s"test sanitize inputs does not modify metadata values that are not strings" in {
val registry = TestProbe().ref
val writeActor =
TestFSMRef(new BatchSizeCountingWriteMetadataActor(10, 10.millis, registry, Int.MaxValue, List("metadata_key")) {
override val metadataDatabaseInterface = mockDatabaseInterface(100)
})

def metadataEvent(index: Int, probe: ActorRef) = PutMetadataActionAndRespond(
List(
MetadataEvent(MetadataKey(WorkflowId.randomId(), None, "metadata_key"), MetadataValue(100))
),
probe
)

val probes = (0 until 43)
.map { _ =>
val probe = TestProbe()
probe
}
.zipWithIndex
.map { case (probe, index) =>
probe -> metadataEvent(index, probe.ref)
}

val metadataWriteActions = probes.map(probe => probe._2).toVector
val metadataWriteActionNE = NonEmptyVector(metadataWriteActions.head, metadataWriteActions.tail)

val sanitizedWriteActions = writeActor.underlyingActor.sanitizeInputs(metadataWriteActionNE)

sanitizedWriteActions.map { writeAction =>
writeAction.events.map { event =>
if (!(event.value.getOrElse(fail("Removed value from metadata event")).valueType == MetadataInt)) {
fail("Changed metadata type")
}

if (!event.value.getOrElse(fail("Removed value from metadata event")).value.equals("100")) {
fail("Modified metadata value")
}
}
}
}

// Mock database interface.
// A customizable number of failures occur between each success
def mockDatabaseInterface(failuresBetweenEachSuccess: Int) = new MetadataSqlDatabase with SqlDatabase {
Expand Down

0 comments on commit 7d5d5fd

Please sign in to comment.