Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 32 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ Then you can obtain a service (pod or serverless-based) in one of the following
- Custom config
```scala
val config = ConfigFactory.load("path_to_my_custom_config")
val service = PineconeIndexServiceFactory(config)
val service = PineconeInd[ServerlessPineconeInferenceServiceImplSpec.scala](pinecone-client%2Fsrc%2Ftest%2Fscala%2Fio%2Fcequence%2Fpineconescala%2Fservice%2FServerlessPineconeInferenceServiceImplSpec.scala)exServiceFactory(config)
```

- Without config for pod-based service (with env)
Expand All @@ -81,7 +81,7 @@ Then you can obtain a service (pod or serverless-based) in one of the following

**Ib. Obtaining `PineconeVectorService`**

Same as with `PineconeIndexService`, you need to first provide implicit execution context and akka materializer. Then you can obtain a service in one of the following ways.
Same as with `PineconeIndexService`, you need to first provide implicit execution context and Akka materializer. Then you can obtain a service in one of the following ways.

- Default config (expects env. variable(s) to be set as defined in `Config` section). Note that if the index with a given name is not available, the factory will return `None`.
```scala
Expand All @@ -93,6 +93,23 @@ Same as with `PineconeIndexService`, you need to first provide implicit executio
}
```

**Ic. Obtaining `PineconeInferenceService`**

Same as with `PineconeIndexService`, you need to first provide implicit execution context and Akka materializer. Then you can obtain a service in one of the following ways.

With config
```scala
val config = ConfigFactory.load("path_to_my_custom_config")
val service = PineconeInferenceServiceFactory(config)
```

Directly with api-key
```scala
val service = PineconeInferenceServiceFactory(
apiKey = "your_api_key"
)
```

- Custom config
```scala
val config = ConfigFactory.load("path_to_my_custom_config")
Expand Down Expand Up @@ -374,6 +391,19 @@ Examples:
println(stats)
)
```

**Inference Operations**

- Generate embeddings

```scala
pineconeInferenceService.createEmbeddings(Seq("The quick brown fox jumped over the lazy dog")).map { embeddings =>
println(embeddings.data.mkString("\n"))
}

}
```

## Demo

For ready-to-run demos pls. refer to separate seed projects:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package io.cequence.pineconescala

import io.cequence.pineconescala.domain.response._
import io.cequence.pineconescala.domain.settings.{EmbeddingsInputType, EmbeddingsTruncate}
import io.cequence.pineconescala.domain.settings.EmbeddingsInputType.{Passage, Query}
import io.cequence.pineconescala.domain.{Metric, PVector, PodType, SparseVector}
import io.cequence.wsclient.JsonUtil.enumFormat
import play.api.libs.json.{Format, Json}
import play.api.libs.json.{Format, JsString, Json, Reads, Writes}

object JsonFormats {
// vector-stuff formats
Expand Down Expand Up @@ -77,4 +79,21 @@ object JsonFormats {
Json.format[ServerlessIndexSpec]
implicit val serverlessIndexInfoFormat: Format[ServerlessIndexInfo] =
Json.format[ServerlessIndexInfo]

// embeddings
implicit val embeddingUsageInfoReads: Reads[EmbeddingsUsageInfo] =
Json.reads[EmbeddingsUsageInfo]
implicit val embeddingInfoReads: Reads[EmbeddingsInfo] = Json.reads[EmbeddingsInfo]
implicit val embeddingValuesReads: Reads[EmbeddingsValues] = Json.reads[EmbeddingsValues]
implicit val embeddingResponseReads: Reads[GenerateEmbeddingsResponse] = Json.reads[GenerateEmbeddingsResponse]

implicit val embeddingsInputTypeWrites: Writes[EmbeddingsInputType] = enumFormat(
Query,
Passage
)

implicit val embeddingsTruncateWrites: Writes[EmbeddingsTruncate] = enumFormat(
EmbeddingsTruncate.None,
EmbeddingsTruncate.End
)
}
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
package io.cequence.pineconescala.service

import io.cequence.pineconescala.domain.Metric
import io.cequence.pineconescala.domain.settings.IndexSettings.{
CreatePodBasedIndexSettings,
CreateServerlessIndexSettings
}
import io.cequence.wsclient.domain.NamedEnumValue
import io.cequence.pineconescala.domain.settings.IndexSettings.{CreatePodBasedIndexSettings, CreateServerlessIndexSettings}
import io.cequence.wsclient.domain.{EnumValue, NamedEnumValue}

sealed abstract class EndPoint(value: String = "") extends NamedEnumValue(value)

object EndPoint {
case object describe_index_stats extends EndPoint
case object embed extends EndPoint
case object query extends EndPoint
case object vectors_delete extends EndPoint("vectors/delete")
case object vectors_fetch extends EndPoint("vectors/fetch")
Expand Down Expand Up @@ -56,6 +53,11 @@ object Tag {
case object region extends Tag
case object spec extends Tag
case object shards extends Tag
case object inputs extends Tag
case object input_type extends Tag
case object model extends Tag
case object parameters extends Tag
case object truncate extends Tag

// TODO: move elsewhere
def fromCreatePodBasedIndexSettings(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package io.cequence.pineconescala.service

import akka.stream.Materializer
import com.typesafe.config.Config
import io.cequence.pineconescala.domain.response.GenerateEmbeddingsResponse
import io.cequence.pineconescala.domain.settings.{GenerateEmbeddingsSettings, IndexSettings}
import io.cequence.wsclient.JsonUtil.{JsonOps, toJson}
import io.cequence.wsclient.service.ws.{Timeouts, WSRequestHelper}
import play.api.libs.json.{JsArray, JsObject, JsValue, Json}
import io.cequence.pineconescala.JsonFormats._
import io.cequence.pineconescala.PineconeScalaClientException
import io.cequence.wsclient.domain.WsRequestContext
import play.api.libs.ws.StandaloneWSRequest

import scala.concurrent.{ExecutionContext, Future}

private class PineconeInferenceServiceImpl(
apiKey: String,
explicitTimeouts: Option[Timeouts] = None
)(
implicit val ec: ExecutionContext,
val materializer: Materializer
) extends PineconeInferenceService
with WSRequestHelper {

override protected type PEP = EndPoint
override protected type PT = Tag
override val coreUrl: String = "https://api.pinecone.io/"
override protected val requestContext = WsRequestContext(explTimeouts = explicitTimeouts)

/**
* Uses the specified model to generate embeddings for the input sequence.
*
* @param inputs
* Input sequence for which to generate embeddings.
* @param settings
* @return
* list of embeddings inside an envelope
*/
override def createEmbeddings(
inputs: Seq[String],
settings: GenerateEmbeddingsSettings
): Future[GenerateEmbeddingsResponse] = {
val basicParams: Seq[(Tag, Option[JsValue])] = jsonBodyParams(
Tag.inputs -> Some(JsArray(inputs.map(input => JsObject(Seq("text" -> toJson(input)))))),
Tag.model -> Some(settings.model)
)
val otherParams: (Tag, Option[JsValue]) = {
Tag.parameters -> Some(
JsObject(
Seq(
Tag.input_type.toString() -> Json.toJson(settings.input_type),
Tag.truncate.toString() -> Json.toJson(settings.truncate)
)
)
)
}
execPOST(
EndPoint.embed,
bodyParams = basicParams :+ otherParams
).map(
_.asSafe[GenerateEmbeddingsResponse]
)

}

override def addHeaders(request: StandaloneWSRequest) = {
val apiKeyHeader = ("Api-Key", apiKey)
val versionHeader = ("X-Pinecone-API-Version", "2024-07")
request
.addHttpHeaders(apiKeyHeader)
.addHttpHeaders(versionHeader)
}

override protected def handleErrorCodes(
httpCode: Int,
message: String
): Nothing =
throw new PineconeScalaClientException(s"Code ${httpCode} : ${message}")

override def close(): Unit =
client.close()

}

object PineconeInferenceServiceFactory extends PineconeServiceFactoryHelper {

def apply[S <: IndexSettings](
apiKey: String,
timeouts: Option[Timeouts] = None
)(
implicit ec: ExecutionContext,
materializer: Materializer
): PineconeInferenceService = {
new PineconeInferenceServiceImpl(apiKey, timeouts)
}

def apply(
config: Config
)(
implicit ec: ExecutionContext,
materializer: Materializer
): PineconeInferenceService = {
val timeouts = loadTimeouts(config)

apply(
apiKey = config.getString(s"$configPrefix.apiKey"),
timeouts = timeouts.toOption
)
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package io.cequence.pineconescala.service

import akka.actor.ActorSystem
import akka.stream.Materializer
import com.typesafe.config.{Config, ConfigFactory}
import org.scalatest.matchers.must.Matchers
import org.scalatest.matchers.should.Matchers.convertToAnyShouldWrapper
import org.scalatest.wordspec.AsyncWordSpec
import org.scalatest.GivenWhenThen

import scala.concurrent.{ExecutionContext, Future}

class ServerlessPineconeInferenceServiceImplSpec
extends AsyncWordSpec
with GivenWhenThen
with ServerlessFixtures with Matchers with PineconeServiceConsts{

implicit val ec: ExecutionContext = ExecutionContext.global
implicit val materializer: Materializer = Materializer(ActorSystem())

val serverlessConfig: Config = ConfigFactory.load("serverless.conf")

def inferenceServiceBuilder: PineconeInferenceService =
PineconeInferenceServiceFactory(serverlessConfig)

"Pinecone Inference Service" when {

"create embeddings should provide embeddings for input data" in {
val service = inferenceServiceBuilder
for {
embeddings <- service.createEmbeddings(Seq("The quick brown fox jumped over the lazy dog"),
settings = DefaultSettings.GenerateEmbeddings.withPassageInputType.withEndTruncate)
} yield {
embeddings.data.size should be(1)
embeddings.data(0).values should not be empty
embeddings.usage.total_tokens should be(16)
}
}


}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package io.cequence.pineconescala.domain.response

case class GenerateEmbeddingsResponse(
data: Seq[EmbeddingsValues],
model: String,
usage: EmbeddingsUsageInfo
)

case class EmbeddingsValues(values: Seq[Double])

case class EmbeddingsInfo(
embedding: Seq[Double],
index: Int
)

case class EmbeddingsUsageInfo(
total_tokens: Int
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package io.cequence.pineconescala.domain.settings

import io.cequence.wsclient.domain.{EnumValue, NamedEnumValue}

case class GenerateEmbeddingsSettings(
// ID of the model to use.
model: String,

// Common property used to distinguish between types of data.
input_type: Option[EmbeddingsInputType] = None,

// The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models.
truncate: EmbeddingsTruncate = EmbeddingsTruncate.End
) {
def withPassageInputType = copy(input_type = Some(EmbeddingsInputType.Passage))
def withQueryInputType = copy(input_type = Some(EmbeddingsInputType.Query))
def withoutTruncate = copy(truncate = EmbeddingsTruncate.None)
def withEndTruncate = copy(truncate = EmbeddingsTruncate.End)
}

sealed abstract class EmbeddingsInputType(name: String) extends NamedEnumValue(name)

object EmbeddingsInputType {
case object Passage extends EmbeddingsInputType(name = "passage")
case object Query extends EmbeddingsInputType(name = "query")
}

sealed abstract class EmbeddingsTruncate(name: String) extends NamedEnumValue(name)

object EmbeddingsTruncate {
case object None extends EmbeddingsTruncate(name = "NONE")
case object End extends EmbeddingsTruncate(name = "END")
}

sealed trait EmbeddingsEncodingFormat extends EnumValue

object EmbeddingsEncodingFormat {
case object float extends EmbeddingsEncodingFormat
case object base64 extends EmbeddingsEncodingFormat
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package io.cequence.pineconescala.service

import io.cequence.pineconescala.domain.response.GenerateEmbeddingsResponse
import io.cequence.pineconescala.domain.settings.GenerateEmbeddingsSettings
import io.cequence.wsclient.service.CloseableService

import scala.concurrent.Future

trait PineconeInferenceService extends CloseableService with PineconeServiceConsts {

/**
* Uses the specified model to generate embeddings for the input sequence.
*
* @param inputs
* Input sequence for which to generate embeddings.
* @param settings
* @return
* list of embeddings inside an envelope
*/
def createEmbeddings(
inputs: Seq[String],
settings: GenerateEmbeddingsSettings = DefaultSettings.GenerateEmbeddings
): Future[GenerateEmbeddingsResponse]

}
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
package io.cequence.pineconescala.service

import io.cequence.pineconescala.domain.settings.IndexSettings.{
CreatePodBasedIndexSettings,
CreateServerlessIndexSettings
}
import io.cequence.pineconescala.domain.settings.IndexSettings.{CreatePodBasedIndexSettings, CreateServerlessIndexSettings}
import io.cequence.pineconescala.domain.{Metric, PodType}
import io.cequence.pineconescala.domain.settings._

Expand Down Expand Up @@ -38,5 +35,9 @@ trait PineconeServiceConsts {
CloudProvider.AWS,
Region.EUWest1
)

val GenerateEmbeddings = GenerateEmbeddingsSettings(
model = "multilingual-e5-large"
)
}
}
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version = 1.8.2
sbt.version = 1.9.0