-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
649b71f
commit 38b252d
Showing
6 changed files
with
210 additions
and
116 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
from embedbase import get_app | ||
from embedbase.embedding.base import Embedder | ||
import uvicorn | ||
from sentence_transformers import SentenceTransformer | ||
from embedbase_qdrant import Qdrant | ||
|
||
class LocalEmbedder(Embedder): | ||
EMBEDDING_MODEL = "all-MiniLM-L6-v2" | ||
|
||
def __init__(self, model: str = EMBEDDING_MODEL, **kwargs): | ||
super().__init__(**kwargs) | ||
self.model = SentenceTransformer(model) | ||
self._dimensions = self.model.get_sentence_embedding_dimension() | ||
|
||
@property | ||
def dimensions(self) -> int: | ||
""" | ||
Return the dimensions of the embeddings | ||
:return: dimensions of the embeddings | ||
""" | ||
return self._dimensions | ||
|
||
def is_too_big(self, text: str) -> bool: | ||
""" | ||
Check if text is too big to be embedded, | ||
delegating the splitting UX to the caller | ||
:param text: text to check | ||
:return: True if text is too big, False otherwise | ||
""" | ||
return len(text) > self.model.get_max_seq_length() | ||
|
||
async def embed(self, data): | ||
""" | ||
Embed a list of strings or a single string | ||
:param data: list of strings or a single string | ||
:return: list of embeddings | ||
""" | ||
embeddings = self.model.encode(data) | ||
return embeddings.tolist() if isinstance(data, list) else [embeddings.tolist()] | ||
|
||
|
||
app = get_app().use_embedder(LocalEmbedder()).use_db(Qdrant()).run() | ||
|
||
if __name__ == "__main__": | ||
uvicorn.run(app, reload=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
embedbase | ||
sentence-transformers | ||
git+https://github.com/different-ai/embedbase-qdrant.git |
Oops, something went wrong.