From 41c66fa83fec83e8e393a4870f86472c48eb52d9 Mon Sep 17 00:00:00 2001 From: Jon Craton Date: Sat, 20 Apr 2024 22:01:14 -0400 Subject: [PATCH] Add chunk test --- languagemodels/embeddings.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/languagemodels/embeddings.py b/languagemodels/embeddings.py index 7368e10..1b15139 100644 --- a/languagemodels/embeddings.py +++ b/languagemodels/embeddings.py @@ -100,7 +100,7 @@ def chunk_doc(doc, name="", chunk_size=64, chunk_overlap=8): >>> chunk_doc("") [] - >>> chunk_doc("" + >>> chunk_doc( ... "It was the best of times, it was the worst of times, it was the age " ... "of wisdom, it was the age of foolishness, it was the epoch of belief, " ... "it was the epoch of incredulity, it was the season of Light, it was " @@ -112,6 +112,16 @@ def chunk_doc(doc, name="", chunk_size=64, chunk_overlap=8): ... "good or for evil, in the superlative degree of comparison only.") ['It was the best of times...'] + >>> chunk_doc( + ... "One morning, when Gregor Samsa woke from troubled dreams, he found " + ... "himself transformed in his bed into a horrible vermin. He lay on his " + ... "armour-like back, and if he lifted his head a little he could see " + ... "his brown belly, slightly domed and divided by arches into stiff " + ... "sections. The bedding was hardly able to cover it and seemed ready " + ... "to slide off any moment. His many legs, pitifully thin compared with " + ... "the size of the rest of him, waved about helplessly as he looked.") + ['One morning, ...'] + >>> chunk_doc("Hello") ['Hello']