From b649b6ef5a6f68d494460d58a99fe84a5d3162ca Mon Sep 17 00:00:00 2001 From: Branislav Burdiliak Date: Wed, 19 Jun 2024 20:56:25 +0200 Subject: [PATCH 1/8] Bump SBT version to 1.9.0 --- project/build.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/build.properties b/project/build.properties index f344c14..8fd7d2e 100755 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version = 1.8.2 +sbt.version = 1.9.0 From 1afa96571c39c7d9af99076b7f96e802b4368fcd Mon Sep 17 00:00:00 2001 From: Branislav Burdiliak Date: Wed, 19 Jun 2024 20:57:36 +0200 Subject: [PATCH 2/8] Introduce a service for embeddings The PineconeEmbeddingsService has a single generate embeddings method. --- .../cequence/pineconescala/JsonFormats.scala | 7 ++- .../pineconescala/service/EndPoint.scala | 14 ++--- .../PineconeEmbeddingsServiceImpl.scala | 53 +++++++++++++++++++ .../response/GenerateEmbeddingsResponse.scala | 16 ++++++ .../settings/GenerateEmbeddingsSettings.scala | 21 ++++++++ .../service/PineconeEmbeddingsService.scala | 25 +++++++++ .../service/PineconeServiceConsts.scala | 9 ++-- 7 files changed, 134 insertions(+), 11 deletions(-) create mode 100644 pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeEmbeddingsServiceImpl.scala create mode 100644 pinecone-core/src/main/scala/io/cequence/pineconescala/domain/response/GenerateEmbeddingsResponse.scala create mode 100644 pinecone-core/src/main/scala/io/cequence/pineconescala/domain/settings/GenerateEmbeddingsSettings.scala create mode 100644 pinecone-core/src/main/scala/io/cequence/pineconescala/service/PineconeEmbeddingsService.scala diff --git a/pinecone-client/src/main/scala/io/cequence/pineconescala/JsonFormats.scala b/pinecone-client/src/main/scala/io/cequence/pineconescala/JsonFormats.scala index c52f2c8..9da8b95 100644 --- a/pinecone-client/src/main/scala/io/cequence/pineconescala/JsonFormats.scala +++ b/pinecone-client/src/main/scala/io/cequence/pineconescala/JsonFormats.scala @@ -3,7 +3,7 @@ package io.cequence.pineconescala import io.cequence.pineconescala.domain.response._ import io.cequence.pineconescala.domain.{Metric, PVector, PodType, SparseVector} import io.cequence.wsclient.JsonUtil.enumFormat -import play.api.libs.json.{Format, Json} +import play.api.libs.json.{Format, Json, Reads} object JsonFormats { // vector-stuff formats @@ -77,4 +77,9 @@ object JsonFormats { Json.format[ServerlessIndexSpec] implicit val serverlessIndexInfoFormat: Format[ServerlessIndexInfo] = Json.format[ServerlessIndexInfo] + + implicit val embeddingUsageInfoReads: Reads[EmbeddingsUsageInfo] = + Json.reads[EmbeddingsUsageInfo] + implicit val embeddingInfoReads: Reads[EmbeddingsInfo] = Json.reads[EmbeddingsInfo] + implicit val embeddingResponseReads: Reads[GenerateEmbeddingsResponse] = Json.reads[GenerateEmbeddingsResponse] } diff --git a/pinecone-client/src/main/scala/io/cequence/pineconescala/service/EndPoint.scala b/pinecone-client/src/main/scala/io/cequence/pineconescala/service/EndPoint.scala index a8a2845..62a920a 100644 --- a/pinecone-client/src/main/scala/io/cequence/pineconescala/service/EndPoint.scala +++ b/pinecone-client/src/main/scala/io/cequence/pineconescala/service/EndPoint.scala @@ -1,16 +1,13 @@ package io.cequence.pineconescala.service -import io.cequence.pineconescala.domain.Metric -import io.cequence.pineconescala.domain.settings.IndexSettings.{ - CreatePodBasedIndexSettings, - CreateServerlessIndexSettings -} -import io.cequence.wsclient.domain.NamedEnumValue +import io.cequence.pineconescala.domain.settings.IndexSettings.{CreatePodBasedIndexSettings, CreateServerlessIndexSettings} +import io.cequence.wsclient.domain.{EnumValue, NamedEnumValue} sealed abstract class EndPoint(value: String = "") extends NamedEnumValue(value) object EndPoint { case object describe_index_stats extends EndPoint + case object embed extends EndPoint case object query extends EndPoint case object vectors_delete extends EndPoint("vectors/delete") case object vectors_fetch extends EndPoint("vectors/fetch") @@ -56,6 +53,11 @@ object Tag { case object region extends Tag case object spec extends Tag case object shards extends Tag + case object inputs extends Tag + case object input_type extends Tag + case object model extends Tag + case object parameters extends Tag + case object truncate extends Tag // TODO: move elsewhere def fromCreatePodBasedIndexSettings( diff --git a/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeEmbeddingsServiceImpl.scala b/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeEmbeddingsServiceImpl.scala new file mode 100644 index 0000000..ae1911b --- /dev/null +++ b/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeEmbeddingsServiceImpl.scala @@ -0,0 +1,53 @@ +package io.cequence.pineconescala.service + +import io.cequence.pineconescala.domain.response.GenerateEmbeddingsResponse +import io.cequence.pineconescala.domain.settings.GenerateEmbeddingsSettings +import io.cequence.wsclient.JsonUtil.{JsonOps, toJson} +import io.cequence.wsclient.service.ws.WSRequestHelper +import play.api.libs.json.{JsObject, JsValue} +import io.cequence.pineconescala.JsonFormats._ + +import scala.concurrent.Future + +abstract class PineconeEmbeddingsServiceImpl extends PineconeEmbeddingsService with WSRequestHelper { + + override protected type PEP = EndPoint + override protected type PT = Tag + + /** + * Uses the specified model to generate embeddings for the input sequence. + * + * @param inputs + * Input sequence for which to generate embeddings. + * @param settings + * @return + * list of embeddings inside an envelope + */ + override def createEmbeddings(inputs: Seq[String], settings: GenerateEmbeddingsSettings): Future[GenerateEmbeddingsResponse] = { + val basicParams: Seq[(Tag, Option[JsValue])] = jsonBodyParams( + Tag.inputs -> Some(inputs), + Tag.model -> Some(settings.model) + ) + val otherParams: (Tag, Option[JsValue]) = { + Tag.parameters -> Some( + JsObject( + Seq( + Tag.input_type.toString() -> toJson(settings.input_type), + Tag.truncate.toString() -> toJson(settings.truncate) + ) + ) + ) + } + execPOST( + EndPoint.embed, + bodyParams = basicParams :+ otherParams + ).map( + _.asSafe[GenerateEmbeddingsResponse] + ) + + } + + override def close(): Unit = + client.close() + +} diff --git a/pinecone-core/src/main/scala/io/cequence/pineconescala/domain/response/GenerateEmbeddingsResponse.scala b/pinecone-core/src/main/scala/io/cequence/pineconescala/domain/response/GenerateEmbeddingsResponse.scala new file mode 100644 index 0000000..a1b4170 --- /dev/null +++ b/pinecone-core/src/main/scala/io/cequence/pineconescala/domain/response/GenerateEmbeddingsResponse.scala @@ -0,0 +1,16 @@ +package io.cequence.pineconescala.domain.response + +case class GenerateEmbeddingsResponse( + data: Seq[Double], + model: String, + usage: EmbeddingsUsageInfo +) + +case class EmbeddingsInfo( + embedding: Seq[Double], + index: Int +) + +case class EmbeddingsUsageInfo( + total_tokens: Int +) diff --git a/pinecone-core/src/main/scala/io/cequence/pineconescala/domain/settings/GenerateEmbeddingsSettings.scala b/pinecone-core/src/main/scala/io/cequence/pineconescala/domain/settings/GenerateEmbeddingsSettings.scala new file mode 100644 index 0000000..3e2d85d --- /dev/null +++ b/pinecone-core/src/main/scala/io/cequence/pineconescala/domain/settings/GenerateEmbeddingsSettings.scala @@ -0,0 +1,21 @@ +package io.cequence.pineconescala.domain.settings + +import io.cequence.wsclient.domain.EnumValue + +case class GenerateEmbeddingsSettings( + // ID of the model to use. + model: String, + + // Common property used to distinguish between types of data. + input_type: Option[String] = None, + + // The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models. + truncate: String = "END" +) + +sealed trait EmbeddingsEncodingFormat extends EnumValue + +object EmbeddingsEncodingFormat { + case object float extends EmbeddingsEncodingFormat + case object base64 extends EmbeddingsEncodingFormat +} \ No newline at end of file diff --git a/pinecone-core/src/main/scala/io/cequence/pineconescala/service/PineconeEmbeddingsService.scala b/pinecone-core/src/main/scala/io/cequence/pineconescala/service/PineconeEmbeddingsService.scala new file mode 100644 index 0000000..f5b0c2c --- /dev/null +++ b/pinecone-core/src/main/scala/io/cequence/pineconescala/service/PineconeEmbeddingsService.scala @@ -0,0 +1,25 @@ +package io.cequence.pineconescala.service + +import io.cequence.pineconescala.domain.response.GenerateEmbeddingsResponse +import io.cequence.pineconescala.domain.settings.GenerateEmbeddingsSettings +import io.cequence.wsclient.service.CloseableService + +import scala.concurrent.Future + +trait PineconeEmbeddingsService extends CloseableService with PineconeServiceConsts { + + /** + * Uses the specified model to generate embeddings for the input sequence. + * + * @param inputs + * Input sequence for which to generate embeddings. + * @param settings + * @return + * list of embeddings inside an envelope + */ + def createEmbeddings( + inputs: Seq[String], + settings: GenerateEmbeddingsSettings = DefaultSettings.GenerateEmbeddings + ): Future[GenerateEmbeddingsResponse] + +} diff --git a/pinecone-core/src/main/scala/io/cequence/pineconescala/service/PineconeServiceConsts.scala b/pinecone-core/src/main/scala/io/cequence/pineconescala/service/PineconeServiceConsts.scala index 32746d1..c7755bc 100644 --- a/pinecone-core/src/main/scala/io/cequence/pineconescala/service/PineconeServiceConsts.scala +++ b/pinecone-core/src/main/scala/io/cequence/pineconescala/service/PineconeServiceConsts.scala @@ -1,9 +1,6 @@ package io.cequence.pineconescala.service -import io.cequence.pineconescala.domain.settings.IndexSettings.{ - CreatePodBasedIndexSettings, - CreateServerlessIndexSettings -} +import io.cequence.pineconescala.domain.settings.IndexSettings.{CreatePodBasedIndexSettings, CreateServerlessIndexSettings} import io.cequence.pineconescala.domain.{Metric, PodType} import io.cequence.pineconescala.domain.settings._ @@ -38,5 +35,9 @@ trait PineconeServiceConsts { CloudProvider.AWS, Region.EUWest1 ) + + val GenerateEmbeddings = GenerateEmbeddingsSettings( + model = "multilingual-e5-large" + ) } } From 3e66e3267a40b30e3e1051b42334cdb5c9d993c5 Mon Sep 17 00:00:00 2001 From: Branislav Burdiliak Date: Sun, 30 Jun 2024 12:36:18 +0200 Subject: [PATCH 3/8] Rename embeddings to inference service --- ...ingsServiceImpl.scala => PineconeInferenceServiceImpl.scala} | 2 +- ...neEmbeddingsService.scala => PineconeInferenceService.scala} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename pinecone-client/src/main/scala/io/cequence/pineconescala/service/{PineconeEmbeddingsServiceImpl.scala => PineconeInferenceServiceImpl.scala} (93%) rename pinecone-core/src/main/scala/io/cequence/pineconescala/service/{PineconeEmbeddingsService.scala => PineconeInferenceService.scala} (89%) diff --git a/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeEmbeddingsServiceImpl.scala b/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceServiceImpl.scala similarity index 93% rename from pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeEmbeddingsServiceImpl.scala rename to pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceServiceImpl.scala index ae1911b..9849dee 100644 --- a/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeEmbeddingsServiceImpl.scala +++ b/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceServiceImpl.scala @@ -9,7 +9,7 @@ import io.cequence.pineconescala.JsonFormats._ import scala.concurrent.Future -abstract class PineconeEmbeddingsServiceImpl extends PineconeEmbeddingsService with WSRequestHelper { +abstract class PineconeInferenceServiceImpl extends PineconeInferenceService with WSRequestHelper { override protected type PEP = EndPoint override protected type PT = Tag diff --git a/pinecone-core/src/main/scala/io/cequence/pineconescala/service/PineconeEmbeddingsService.scala b/pinecone-core/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceService.scala similarity index 89% rename from pinecone-core/src/main/scala/io/cequence/pineconescala/service/PineconeEmbeddingsService.scala rename to pinecone-core/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceService.scala index f5b0c2c..73de298 100644 --- a/pinecone-core/src/main/scala/io/cequence/pineconescala/service/PineconeEmbeddingsService.scala +++ b/pinecone-core/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceService.scala @@ -6,7 +6,7 @@ import io.cequence.wsclient.service.CloseableService import scala.concurrent.Future -trait PineconeEmbeddingsService extends CloseableService with PineconeServiceConsts { +trait PineconeInferenceService extends CloseableService with PineconeServiceConsts { /** * Uses the specified model to generate embeddings for the input sequence. From ad47e14822c479251a8e27152ca115202d9d5c24 Mon Sep 17 00:00:00 2001 From: Branislav Burdiliak Date: Sun, 30 Jun 2024 13:03:49 +0200 Subject: [PATCH 4/8] Provide factory for the inference service --- .../PineconeInferenceServiceImpl.scala | 50 +++++++++++++++++-- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceServiceImpl.scala b/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceServiceImpl.scala index 9849dee..d5d3c0a 100644 --- a/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceServiceImpl.scala +++ b/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceServiceImpl.scala @@ -1,15 +1,26 @@ package io.cequence.pineconescala.service +import akka.stream.Materializer import io.cequence.pineconescala.domain.response.GenerateEmbeddingsResponse import io.cequence.pineconescala.domain.settings.GenerateEmbeddingsSettings import io.cequence.wsclient.JsonUtil.{JsonOps, toJson} -import io.cequence.wsclient.service.ws.WSRequestHelper +import io.cequence.wsclient.service.ws.{Timeouts, WSRequestHelper} import play.api.libs.json.{JsObject, JsValue} import io.cequence.pineconescala.JsonFormats._ +import io.cequence.pineconescala.PineconeScalaClientException +import play.api.libs.ws.StandaloneWSRequest -import scala.concurrent.Future +import scala.concurrent.{ExecutionContext, Future} -abstract class PineconeInferenceServiceImpl extends PineconeInferenceService with WSRequestHelper { +private class PineconeInferenceServiceImpl( + apiKey: String, + override val coreUrl: String, + explicitTimeouts: Option[Timeouts] = None +)( + implicit val ec: ExecutionContext, + val materializer: Materializer +) extends PineconeInferenceService + with WSRequestHelper { override protected type PEP = EndPoint override protected type PT = Tag @@ -23,7 +34,10 @@ abstract class PineconeInferenceServiceImpl extends PineconeInferenceService wit * @return * list of embeddings inside an envelope */ - override def createEmbeddings(inputs: Seq[String], settings: GenerateEmbeddingsSettings): Future[GenerateEmbeddingsResponse] = { + override def createEmbeddings( + inputs: Seq[String], + settings: GenerateEmbeddingsSettings + ): Future[GenerateEmbeddingsResponse] = { val basicParams: Seq[(Tag, Option[JsValue])] = jsonBodyParams( Tag.inputs -> Some(inputs), Tag.model -> Some(settings.model) @@ -47,7 +61,35 @@ abstract class PineconeInferenceServiceImpl extends PineconeInferenceService wit } + override def addHeaders(request: StandaloneWSRequest) = { + val apiKeyHeader = ("Api-Key", apiKey) + request.addHttpHeaders(apiKeyHeader) + } + + override protected def handleErrorCodes( + httpCode: Int, + message: String + ): Nothing = + throw new PineconeScalaClientException(s"Code ${httpCode} : ${message}") + override def close(): Unit = client.close() } + +object PineconeInferenceServiceFactory extends PineconeServiceFactoryHelper { + + def apply( + apiKey: String, + indexHostURL: String, + timeouts: Option[Timeouts] + )( + implicit ec: ExecutionContext, + materializer: Materializer + ): PineconeInferenceService = { + val indexHostURLWithEndingSlash = + if (indexHostURL.endsWith("/")) indexHostURL else s"$indexHostURL/" + new PineconeInferenceServiceImpl(apiKey, indexHostURLWithEndingSlash, timeouts) + } + +} From 08087bd06747f89b9053501e9912983f4ee8dfa4 Mon Sep 17 00:00:00 2001 From: Branislav Burdiliak Date: Mon, 1 Jul 2024 09:58:25 +0200 Subject: [PATCH 5/8] Test the inference API --- .../PineconeInferenceServiceImpl.scala | 28 +++++++++---- ...lessPineconeInferenceServiceImplSpec.scala | 41 +++++++++++++++++++ .../settings/GenerateEmbeddingsSettings.scala | 7 +++- 3 files changed, 68 insertions(+), 8 deletions(-) create mode 100644 pinecone-client/src/test/scala/io/cequence/pineconescala/service/ServerlessPineconeInferenceServiceImplSpec.scala diff --git a/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceServiceImpl.scala b/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceServiceImpl.scala index d5d3c0a..0a70781 100644 --- a/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceServiceImpl.scala +++ b/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceServiceImpl.scala @@ -1,20 +1,21 @@ package io.cequence.pineconescala.service import akka.stream.Materializer +import com.typesafe.config.Config import io.cequence.pineconescala.domain.response.GenerateEmbeddingsResponse -import io.cequence.pineconescala.domain.settings.GenerateEmbeddingsSettings +import io.cequence.pineconescala.domain.settings.{GenerateEmbeddingsSettings, IndexSettings} import io.cequence.wsclient.JsonUtil.{JsonOps, toJson} import io.cequence.wsclient.service.ws.{Timeouts, WSRequestHelper} import play.api.libs.json.{JsObject, JsValue} import io.cequence.pineconescala.JsonFormats._ import io.cequence.pineconescala.PineconeScalaClientException +import io.cequence.wsclient.domain.WsRequestContext import play.api.libs.ws.StandaloneWSRequest import scala.concurrent.{ExecutionContext, Future} private class PineconeInferenceServiceImpl( apiKey: String, - override val coreUrl: String, explicitTimeouts: Option[Timeouts] = None )( implicit val ec: ExecutionContext, @@ -24,6 +25,8 @@ private class PineconeInferenceServiceImpl( override protected type PEP = EndPoint override protected type PT = Tag + override val coreUrl: String = "https://api.pinecone.io/" + override protected val requestContext = WsRequestContext(explTimeouts = explicitTimeouts) /** * Uses the specified model to generate embeddings for the input sequence. @@ -79,17 +82,28 @@ private class PineconeInferenceServiceImpl( object PineconeInferenceServiceFactory extends PineconeServiceFactoryHelper { - def apply( + def apply[S <: IndexSettings]( apiKey: String, - indexHostURL: String, timeouts: Option[Timeouts] )( implicit ec: ExecutionContext, materializer: Materializer ): PineconeInferenceService = { - val indexHostURLWithEndingSlash = - if (indexHostURL.endsWith("/")) indexHostURL else s"$indexHostURL/" - new PineconeInferenceServiceImpl(apiKey, indexHostURLWithEndingSlash, timeouts) + new PineconeInferenceServiceImpl(apiKey, timeouts) + } + + def apply( + config: Config + )( + implicit ec: ExecutionContext, + materializer: Materializer + ): PineconeInferenceService = { + val timeouts = loadTimeouts(config) + + apply( + apiKey = config.getString(s"$configPrefix.apiKey"), + timeouts = timeouts.toOption + ) } } diff --git a/pinecone-client/src/test/scala/io/cequence/pineconescala/service/ServerlessPineconeInferenceServiceImplSpec.scala b/pinecone-client/src/test/scala/io/cequence/pineconescala/service/ServerlessPineconeInferenceServiceImplSpec.scala new file mode 100644 index 0000000..e844dab --- /dev/null +++ b/pinecone-client/src/test/scala/io/cequence/pineconescala/service/ServerlessPineconeInferenceServiceImplSpec.scala @@ -0,0 +1,41 @@ +package io.cequence.pineconescala.service + +import akka.actor.ActorSystem +import akka.stream.Materializer +import com.typesafe.config.{Config, ConfigFactory} +import org.scalatest.matchers.must.Matchers +import org.scalatest.matchers.should.Matchers.convertToAnyShouldWrapper +import org.scalatest.wordspec.AsyncWordSpec +import org.scalatest.GivenWhenThen + +import scala.concurrent.{ExecutionContext, Future} + +class ServerlessPineconeInferenceServiceImplSpec + extends AsyncWordSpec + with GivenWhenThen + with ServerlessFixtures with Matchers with PineconeServiceConsts{ + + implicit val ec: ExecutionContext = ExecutionContext.global + implicit val materializer: Materializer = Materializer(ActorSystem()) + + val serverlessConfig: Config = ConfigFactory.load("serverless.conf") + + def inferenceServiceBuilder: PineconeInferenceService = + PineconeInferenceServiceFactory(serverlessConfig) + + "Pinecone Inference Service" when { + + "create embeddings should provide embeddings for input data" in { + val service = inferenceServiceBuilder + for { + embeddings <- service.createEmbeddings(Seq("The quick brown fox jumped over the lazy dog"), + settings = DefaultSettings.GenerateEmbeddings.withPassageInputType.withEndTruncate) + } yield { + println(embeddings) + embeddings.data should not be empty + } + } + + + } +} diff --git a/pinecone-core/src/main/scala/io/cequence/pineconescala/domain/settings/GenerateEmbeddingsSettings.scala b/pinecone-core/src/main/scala/io/cequence/pineconescala/domain/settings/GenerateEmbeddingsSettings.scala index 3e2d85d..926312e 100644 --- a/pinecone-core/src/main/scala/io/cequence/pineconescala/domain/settings/GenerateEmbeddingsSettings.scala +++ b/pinecone-core/src/main/scala/io/cequence/pineconescala/domain/settings/GenerateEmbeddingsSettings.scala @@ -11,7 +11,12 @@ case class GenerateEmbeddingsSettings( // The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models. truncate: String = "END" -) +) { + def withPassageInputType = copy(input_type = Some("passage")) + def withQueryInputType = copy(input_type = Some("query")) + def withoutTruncate = copy(truncate = "none") + def withEndTruncate = copy(truncate = "end") +} sealed trait EmbeddingsEncodingFormat extends EnumValue From 91ec7dd1147fa772fec8f4498b8d607c3ad2e679 Mon Sep 17 00:00:00 2001 From: Branislav Burdiliak Date: Mon, 1 Jul 2024 12:17:16 +0200 Subject: [PATCH 6/8] Fix request/response protocol Additionally, provide a strongly-typed model for embedding's parameters (input type & truncate) --- .../cequence/pineconescala/JsonFormats.scala | 16 ++++++++++- .../PineconeInferenceServiceImpl.scala | 13 +++++---- ...lessPineconeInferenceServiceImplSpec.scala | 5 ++-- .../response/GenerateEmbeddingsResponse.scala | 4 ++- .../settings/GenerateEmbeddingsSettings.scala | 28 ++++++++++++++----- 5 files changed, 50 insertions(+), 16 deletions(-) diff --git a/pinecone-client/src/main/scala/io/cequence/pineconescala/JsonFormats.scala b/pinecone-client/src/main/scala/io/cequence/pineconescala/JsonFormats.scala index 9da8b95..023aaf5 100644 --- a/pinecone-client/src/main/scala/io/cequence/pineconescala/JsonFormats.scala +++ b/pinecone-client/src/main/scala/io/cequence/pineconescala/JsonFormats.scala @@ -1,9 +1,11 @@ package io.cequence.pineconescala import io.cequence.pineconescala.domain.response._ +import io.cequence.pineconescala.domain.settings.{EmbeddingsInputType, EmbeddingsTruncate} +import io.cequence.pineconescala.domain.settings.EmbeddingsInputType.{Passage, Query} import io.cequence.pineconescala.domain.{Metric, PVector, PodType, SparseVector} import io.cequence.wsclient.JsonUtil.enumFormat -import play.api.libs.json.{Format, Json, Reads} +import play.api.libs.json.{Format, JsString, Json, Reads, Writes} object JsonFormats { // vector-stuff formats @@ -78,8 +80,20 @@ object JsonFormats { implicit val serverlessIndexInfoFormat: Format[ServerlessIndexInfo] = Json.format[ServerlessIndexInfo] + // embeddings implicit val embeddingUsageInfoReads: Reads[EmbeddingsUsageInfo] = Json.reads[EmbeddingsUsageInfo] implicit val embeddingInfoReads: Reads[EmbeddingsInfo] = Json.reads[EmbeddingsInfo] + implicit val embeddingValuesReads: Reads[EmbeddingsValues] = Json.reads[EmbeddingsValues] implicit val embeddingResponseReads: Reads[GenerateEmbeddingsResponse] = Json.reads[GenerateEmbeddingsResponse] + + implicit val embeddingsInputTypeWrites: Writes[EmbeddingsInputType] = enumFormat( + Query, + Passage + ) + + implicit val embeddingsTruncateWrites: Writes[EmbeddingsTruncate] = enumFormat( + EmbeddingsTruncate.None, + EmbeddingsTruncate.End + ) } diff --git a/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceServiceImpl.scala b/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceServiceImpl.scala index 0a70781..1013af2 100644 --- a/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceServiceImpl.scala +++ b/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceServiceImpl.scala @@ -6,7 +6,7 @@ import io.cequence.pineconescala.domain.response.GenerateEmbeddingsResponse import io.cequence.pineconescala.domain.settings.{GenerateEmbeddingsSettings, IndexSettings} import io.cequence.wsclient.JsonUtil.{JsonOps, toJson} import io.cequence.wsclient.service.ws.{Timeouts, WSRequestHelper} -import play.api.libs.json.{JsObject, JsValue} +import play.api.libs.json.{JsArray, JsObject, JsValue, Json} import io.cequence.pineconescala.JsonFormats._ import io.cequence.pineconescala.PineconeScalaClientException import io.cequence.wsclient.domain.WsRequestContext @@ -42,15 +42,15 @@ private class PineconeInferenceServiceImpl( settings: GenerateEmbeddingsSettings ): Future[GenerateEmbeddingsResponse] = { val basicParams: Seq[(Tag, Option[JsValue])] = jsonBodyParams( - Tag.inputs -> Some(inputs), + Tag.inputs -> Some(JsArray(inputs.map(input => JsObject(Seq("text" -> toJson(input)))))), Tag.model -> Some(settings.model) ) val otherParams: (Tag, Option[JsValue]) = { Tag.parameters -> Some( JsObject( Seq( - Tag.input_type.toString() -> toJson(settings.input_type), - Tag.truncate.toString() -> toJson(settings.truncate) + Tag.input_type.toString() -> Json.toJson(settings.input_type), + Tag.truncate.toString() -> Json.toJson(settings.truncate) ) ) ) @@ -66,7 +66,10 @@ private class PineconeInferenceServiceImpl( override def addHeaders(request: StandaloneWSRequest) = { val apiKeyHeader = ("Api-Key", apiKey) - request.addHttpHeaders(apiKeyHeader) + val versionHeader = ("X-Pinecone-API-Version", "2024-07") + request + .addHttpHeaders(apiKeyHeader) + .addHttpHeaders(versionHeader) } override protected def handleErrorCodes( diff --git a/pinecone-client/src/test/scala/io/cequence/pineconescala/service/ServerlessPineconeInferenceServiceImplSpec.scala b/pinecone-client/src/test/scala/io/cequence/pineconescala/service/ServerlessPineconeInferenceServiceImplSpec.scala index e844dab..30b1b06 100644 --- a/pinecone-client/src/test/scala/io/cequence/pineconescala/service/ServerlessPineconeInferenceServiceImplSpec.scala +++ b/pinecone-client/src/test/scala/io/cequence/pineconescala/service/ServerlessPineconeInferenceServiceImplSpec.scala @@ -31,8 +31,9 @@ class ServerlessPineconeInferenceServiceImplSpec embeddings <- service.createEmbeddings(Seq("The quick brown fox jumped over the lazy dog"), settings = DefaultSettings.GenerateEmbeddings.withPassageInputType.withEndTruncate) } yield { - println(embeddings) - embeddings.data should not be empty + embeddings.data.size should be(1) + embeddings.data(0).values should not be empty + embeddings.usage.total_tokens should be(16) } } diff --git a/pinecone-core/src/main/scala/io/cequence/pineconescala/domain/response/GenerateEmbeddingsResponse.scala b/pinecone-core/src/main/scala/io/cequence/pineconescala/domain/response/GenerateEmbeddingsResponse.scala index a1b4170..e827e60 100644 --- a/pinecone-core/src/main/scala/io/cequence/pineconescala/domain/response/GenerateEmbeddingsResponse.scala +++ b/pinecone-core/src/main/scala/io/cequence/pineconescala/domain/response/GenerateEmbeddingsResponse.scala @@ -1,11 +1,13 @@ package io.cequence.pineconescala.domain.response case class GenerateEmbeddingsResponse( - data: Seq[Double], + data: Seq[EmbeddingsValues], model: String, usage: EmbeddingsUsageInfo ) +case class EmbeddingsValues(values: Seq[Double]) + case class EmbeddingsInfo( embedding: Seq[Double], index: Int diff --git a/pinecone-core/src/main/scala/io/cequence/pineconescala/domain/settings/GenerateEmbeddingsSettings.scala b/pinecone-core/src/main/scala/io/cequence/pineconescala/domain/settings/GenerateEmbeddingsSettings.scala index 926312e..acfd725 100644 --- a/pinecone-core/src/main/scala/io/cequence/pineconescala/domain/settings/GenerateEmbeddingsSettings.scala +++ b/pinecone-core/src/main/scala/io/cequence/pineconescala/domain/settings/GenerateEmbeddingsSettings.scala @@ -1,21 +1,35 @@ package io.cequence.pineconescala.domain.settings -import io.cequence.wsclient.domain.EnumValue +import io.cequence.wsclient.domain.{EnumValue, NamedEnumValue} case class GenerateEmbeddingsSettings( // ID of the model to use. model: String, // Common property used to distinguish between types of data. - input_type: Option[String] = None, + input_type: Option[EmbeddingsInputType] = None, // The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models. - truncate: String = "END" + truncate: EmbeddingsTruncate = EmbeddingsTruncate.End ) { - def withPassageInputType = copy(input_type = Some("passage")) - def withQueryInputType = copy(input_type = Some("query")) - def withoutTruncate = copy(truncate = "none") - def withEndTruncate = copy(truncate = "end") + def withPassageInputType = copy(input_type = Some(EmbeddingsInputType.Passage)) + def withQueryInputType = copy(input_type = Some(EmbeddingsInputType.Query)) + def withoutTruncate = copy(truncate = EmbeddingsTruncate.None) + def withEndTruncate = copy(truncate = EmbeddingsTruncate.End) +} + +sealed abstract class EmbeddingsInputType(name: String) extends NamedEnumValue(name) + +object EmbeddingsInputType { + case object Passage extends EmbeddingsInputType(name = "passage") + case object Query extends EmbeddingsInputType(name = "query") +} + +sealed abstract class EmbeddingsTruncate(name: String) extends NamedEnumValue(name) + +object EmbeddingsTruncate { + case object None extends EmbeddingsTruncate(name = "NONE") + case object End extends EmbeddingsTruncate(name = "END") } sealed trait EmbeddingsEncodingFormat extends EnumValue From cf2b9800227b52b1a7b137857fb01d2a3d1515b4 Mon Sep 17 00:00:00 2001 From: Branislav Burdiliak Date: Mon, 1 Jul 2024 12:28:59 +0200 Subject: [PATCH 7/8] Provide default None value for timeouts --- .../pineconescala/service/PineconeInferenceServiceImpl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceServiceImpl.scala b/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceServiceImpl.scala index 1013af2..cc3f0cd 100644 --- a/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceServiceImpl.scala +++ b/pinecone-client/src/main/scala/io/cequence/pineconescala/service/PineconeInferenceServiceImpl.scala @@ -87,7 +87,7 @@ object PineconeInferenceServiceFactory extends PineconeServiceFactoryHelper { def apply[S <: IndexSettings]( apiKey: String, - timeouts: Option[Timeouts] + timeouts: Option[Timeouts] = None )( implicit ec: ExecutionContext, materializer: Materializer From 648c79414dfbe6af90a1f38b7d522fc5656f2495 Mon Sep 17 00:00:00 2001 From: Branislav Burdiliak Date: Mon, 1 Jul 2024 12:29:13 +0200 Subject: [PATCH 8/8] Add service init and usage into README --- README.md | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fb1fe66..5a7978b 100755 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ Then you can obtain a service (pod or serverless-based) in one of the following - Custom config ```scala val config = ConfigFactory.load("path_to_my_custom_config") - val service = PineconeIndexServiceFactory(config) + val service = PineconeInd[ServerlessPineconeInferenceServiceImplSpec.scala](pinecone-client%2Fsrc%2Ftest%2Fscala%2Fio%2Fcequence%2Fpineconescala%2Fservice%2FServerlessPineconeInferenceServiceImplSpec.scala)exServiceFactory(config) ``` - Without config for pod-based service (with env) @@ -81,7 +81,7 @@ Then you can obtain a service (pod or serverless-based) in one of the following **Ib. Obtaining `PineconeVectorService`** -Same as with `PineconeIndexService`, you need to first provide implicit execution context and akka materializer. Then you can obtain a service in one of the following ways. +Same as with `PineconeIndexService`, you need to first provide implicit execution context and Akka materializer. Then you can obtain a service in one of the following ways. - Default config (expects env. variable(s) to be set as defined in `Config` section). Note that if the index with a given name is not available, the factory will return `None`. ```scala @@ -93,6 +93,23 @@ Same as with `PineconeIndexService`, you need to first provide implicit executio } ``` +**Ic. Obtaining `PineconeInferenceService`** + +Same as with `PineconeIndexService`, you need to first provide implicit execution context and Akka materializer. Then you can obtain a service in one of the following ways. + +With config +```scala + val config = ConfigFactory.load("path_to_my_custom_config") + val service = PineconeInferenceServiceFactory(config) +``` + +Directly with api-key +```scala + val service = PineconeInferenceServiceFactory( + apiKey = "your_api_key" + ) +``` + - Custom config ```scala val config = ConfigFactory.load("path_to_my_custom_config") @@ -374,6 +391,19 @@ Examples: println(stats) ) ``` + +**Inference Operations** + +- Generate embeddings + +```scala + pineconeInferenceService.createEmbeddings(Seq("The quick brown fox jumped over the lazy dog")).map { embeddings => + println(embeddings.data.mkString("\n")) + } + +} +``` + ## Demo For ready-to-run demos pls. refer to separate seed projects: