Skip to content

Commit

Permalink
feat(stores): Initial vectors, ids, and delete in MemoryVectorStore
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmigloz committed Aug 18, 2023
1 parent ea090dc commit 4c17a81
Show file tree
Hide file tree
Showing 2 changed files with 229 additions and 59 deletions.
124 changes: 91 additions & 33 deletions packages/langchain/lib/src/documents/vector_stores/memory.dart
Original file line number Diff line number Diff line change
Expand Up @@ -7,65 +7,88 @@ import '../embeddings/base.dart';
import '../models/models.dart';
import 'base.dart';

/// {@template memory_vector_store}
/// Vector store that stores vectors in memory.
///
/// By default, it uses cosine similarity to compare vectors.
///
/// It iterates over all vectors in the store to find the most similar vectors.
/// This is not efficient for large vector stores as it has a time complexity
/// of O(vector_dimensionality * num_vectors).
/// {@endtemplate}
///
/// For more efficient vector stores, see [VertexAIMatchingEngine].
class MemoryVectorStore extends VectorStore {
/// {@macro memory_vector_store}
/// Main constructor for [MemoryVectorStore].
///
/// - [embeddings] is the embeddings model to use to embed the documents.
/// - [similarityFunction] is the similarity function to use when comparing
/// vectors. By default, it uses cosine similarity.
/// - [initialMemoryVectors] is an optional list of [MemoryVector] to
/// initialize the vector store with. This is useful when loading a vector
/// store from a database or file.
///
/// If you want to create and populate a [MemoryVectorStore] from a list of
/// documents or texts, use [MemoryVectorStore.fromDocuments] or
/// [MemoryVectorStore.fromText].
MemoryVectorStore({
required super.embeddings,
this.similarityFunction = cosineSimilarity,
});
final List<MemoryVector>? initialMemoryVectors,
}) : memoryVectors = [...?initialMemoryVectors];

/// Similarity function to use when comparing vectors.
final double Function(List<double> a, List<double> b) similarityFunction;

/// Vectors stored in memory.
final List<MemoryVector> memoryVectors = [];
final List<MemoryVector> memoryVectors;

/// Creates a vector store from a list of documents.
///
/// - [documents] is a list of documents to add to the vector store.
/// - [embeddings] is the embeddings model to use to embed the documents.
static Future<MemoryVectorStore> fromDocuments({
required final List<Document> documents,
required final Embeddings embeddings,
}) async {
final store = MemoryVectorStore(embeddings: embeddings);
await store.addDocuments(documents: documents);
return store;
}

/// Creates a vector store from a list of texts.
///
/// - [ids] is a list of ids to add to the vector store.
/// - [texts] is a list of texts to add to the vector store.
/// - [metadatas] is a list of metadata to add to the vector store.
/// - [embeddings] is the embeddings model to use to embed the texts.
static Future<MemoryVectorStore> fromText({
final List<String>? ids,
required final List<String> texts,
required final List<Map<String, dynamic>> metadatas,
final List<Map<String, dynamic>>? metadatas,
required final Embeddings embeddings,
}) async {
assert(
ids == null || ids.length == texts.length,
'ids and texts must have the same length',
);
assert(
metadatas == null || metadatas.length == texts.length,
'metadatas and texts must have the same length',
);
final vs = MemoryVectorStore(embeddings: embeddings);
await vs.addDocuments(
documents: texts
.mapIndexed(
(final i, final text) => Document(
id: ids?[i],
pageContent: text,
metadata: i < metadatas.length ? metadatas[i] : const {},
metadata: metadatas?[i] ?? const {},
),
)
.toList(growable: false),
);
return vs;
}

/// Creates a vector store from a list of documents.
///
/// - [documents] is a list of documents to add to the vector store.
/// - [embeddings] is the embeddings model to use to embed the documents.
static Future<MemoryVectorStore> fromDocuments({
required final List<Document> documents,
required final Embeddings embeddings,
}) async {
final store = MemoryVectorStore(embeddings: embeddings);
await store.addDocuments(documents: documents);
return store;
}

@override
Future<List<String>> addVectors({
required final List<List<double>> vectors,
Expand All @@ -75,18 +98,20 @@ class MemoryVectorStore extends VectorStore {
vectors.mapIndexed((final i, final vector) {
final doc = documents[i];
return MemoryVector(
content: doc.pageContent,
document: doc,
embedding: vector,
metadata: doc.metadata,
);
}),
);
return const [];
}

@override
Future<bool> delete({required final List<String> ids}) {
throw UnimplementedError();
Future<bool> delete({required final List<String> ids}) async {
memoryVectors.removeWhere(
(final vector) => ids.contains(vector.document.id),
);
return true;
}

@override
Expand All @@ -109,10 +134,7 @@ class MemoryVectorStore extends VectorStore {
return searches
.map(
(final search) => (
Document(
pageContent: memoryVectors[search.key].content,
metadata: memoryVectors[search.key].metadata,
),
memoryVectors[search.key].document,
search.value,
),
)
Expand All @@ -121,20 +143,56 @@ class MemoryVectorStore extends VectorStore {
}

/// {@template memory_vector}
/// Represents a vector in memory.
/// Represents an entry of [MemoryVectorStore].
/// {@endtemplate}
@immutable
class MemoryVector {
/// {@macro memory_vector}
const MemoryVector({
required this.content,
required this.document,
required this.embedding,
required this.metadata,
});

final String content;
/// Document associated with the vector.
final Document document;

/// Vector embedding.
final List<double> embedding;
final Map<String, dynamic> metadata;

/// Creates a vector from a map.
factory MemoryVector.fromMap(final Map<String, dynamic> map) {
return MemoryVector(
document: Document.fromMap(map['document'] as Map<String, dynamic>),
embedding: map['embedding'] as List<double>,
);
}

/// Converts the vector to a map.
Map<String, dynamic> toMap() {
return {
'document': document.toMap(),
'embedding': embedding,
};
}

@override
bool operator ==(covariant final MemoryVector other) {
return identical(this, other) ||
runtimeType == other.runtimeType &&
document == other.document &&
const ListEquality<double>().equals(embedding, other.embedding);
}

@override
int get hashCode =>
document.hashCode ^ const ListEquality<double>().hash(embedding);

@override
String toString() {
return 'MemoryVector{'
'document: $document, '
'embedding: ${embedding.length}}';
}
}

/// Measures the cosine of the angle between two vectors in a vector space.
Expand Down
Loading

0 comments on commit 4c17a81

Please sign in to comment.