In [28]:
from llamaapi import LlamaAPI
import time
import nltk

from Document import Document

print("=== Downloading necessary NLTK pakages, if not already present")

d1 = nltk.download("punkt", quiet=True)
d2 = nltk.download("stopwords", quiet=True)
d3 = nltk.download("wordnet", quiet=True)


_MODEL = "llama-13b-chat"
_CONTEXT_SIZE = 10000  # * (2**20)  # In MB
_DISTANCE_THRESHOLD = 0.20

# A bit of obfuscation to make crawlers' life miserable
# It's just the a.pi k.ey, written so that it's not a clear string
# also, `LlamaAPI' is decoupled from the actual string so as to make it even harder to parse it

_a1 = "AbZ".split("b")[1]
_a2 = "BSaNbNTlp0o0bILov9Z3U7XmnP4DhwrV24jgq"
_a3 = "A7kX0SPThAArXd0jNZxQ2WZ"
_call = lambda x: LlamaAPI(x + _a1)

llama = _call(f"LL-{_a3}2l1{_a2}")


print(f"=== Loaded\nUsing model ``{_MODEL}``")
print(
    f"Context window limit: {_CONTEXT_SIZE} Bytes, i.e {round(_CONTEXT_SIZE / (2**20), 4)} MB"
)
print(f"Distance threshold: {_DISTANCE_THRESHOLD}")

=== Downloading necessary NLTK pakages, if not already present
=== Loaded
Using model ``llama-13b-chat``
Context window limit: 0.00476837158203125 MB
Distance threshold: 0.2


In [29]:
path = "./document.txt"
path1 = "./test1.txt"
path2 = "./test2.txt"
path3 = "./test3.txt"

document1 = Document(path=path1).make_bow()
document2 = Document(path=path2).make_bow()
document3 = Document(path=path3).make_bow()
# document.counts()

text1 = document1.text(escape=False)
text2 = document2.text(escape=False)
text3 = document3.text(escape=False)

document1.distance(document3)
document2.distance(document3)
document3.distance(document2)
document3.distance(document3)

0.0

In [30]:
def recursive_split(doc):
    if doc.size() <= _CONTEXT_SIZE:
        return [doc]
    ret = doc.split_half()
    return recursive_split(ret.left) + recursive_split(ret.right)

In [31]:
def split_document(doc):
    docs = recursive_split(doc)
    # Now we have the document evenly split into slices smaller than the context window. They might be not distant enough
    if len(docs) == 1:
        return docs
    distant_enough = False
    while not distant_enough:
        tmp = []
        distant_enough = True
        i = 0
        while i < len(docs):
            if i + 1 >= len(docs):
                tmp.append(docs[i])
            else:
                if docs[i].distance(docs[i + 1]) < _DISTANCE_THRESHOLD:
                    distant_enough = False
                    ret1 = docs[i].split_half()
                    ret2 = docs[i + 1].split_half()
                    tmp.extend([ret1.left, ret1.right, ret2.left, ret2.right])
                    i += 1  # skip one
                else:
                    tmp.append(docs[i])
            i += 1
        docs = tmp

    return docs

In [32]:
doc = Document(path=path)
print(f"Original document size: {doc.size()} Bytes")
print(f"Maximum context window size: {_CONTEXT_SIZE} Bytes")
docs = split_document(doc)
print(f"{len(docs)} slices extracted")
distances = []
min_distance = 1
if len(docs) > 1:
    for i in range(1, len(docs)):
        distances.append(docs[i - 1].distance(docs[i]))
    min_distance = min(distances)
print(f"Distances: {distances}")
sizes = [d.size() for d in docs]
print(f"Sizes: {sizes}")

max_size = max(sizes)
print([d.text(escape=False) for d in docs])
print(f"Minimum distance between consecutive slices: {min_distance}")
print(f"Maximum size among slices: {max_size} Bytes")
assert min_distance >= _DISTANCE_THRESHOLD
assert max_size <= _CONTEXT_SIZE

Original document size: 45174 Bytes
Maximum context window size: 5000 Bytes
16 slices extracted
Distances: [0.3753, 0.5904, 0.5648, 0.5908, 0.5242, 0.6447, 0.7456, 0.8184, 0.6999, 0.5961, 0.7708, 0.6712, 0.692, 0.6875, 0.7988]
Sizes: [2792, 2609, 2655, 2800, 2759, 2913, 2941, 2923, 2857, 2833, 2943, 2749, 2749, 2801, 2910, 2940]
['Per favore riassumimi il testo seguente , che comincia con Embodied e finisce con also Embodied cognition is the concept suggesting that many features of cognition are shaped by the state and capacities of the organism The cognitive features include a wide spectrum of cognitive functions such as perception biases memory recall comprehension and highlevel mental constructs ( such as meaning attribution and Categorization The embodied mind thesis challenges other theories such as Cognitivism ( psychology ) Theory File : Cartesian Cognitive ModelpngProponents of the embodied cognition thesis emphasize the active and significant role the Body ( biology ) This dou

In [33]:
def query_LLM(doc):
    # API Request JSON Cell
    api_request_json = {
        "model": _MODEL,
        "messages": [
            {"role": "system", "content": "You are a serious assistant"},
            {
                "role": "user",
                "content": doc.text(),
            },
        ],
    }
    # Try because it could answer an error if requests are made too fast
    j = 0
    limit = 10
    while j < limit:
        try:
            # Make your request and handle the response
            response = llama.run(api_request_json)
            resp = response.json()
            break
        except:
            print("Error. Waiting 3 seconds and trying again.")
            time.sleep(3)
            j += 1
    if j == limit:
        print(f"Reached the limit of {limit} tries")
        return ""
    else:
        return resp["choices"][0]["message"]["content"]

In [34]:
collated = []
for d in docs:
    time.sleep(1)  # Just to not overload the API and get limited
    collated.append(query_LLM(d))

Error. Waiting 3 seconds and trying again.


In [None]:
collated

["Greetings, I'm here to help you with any questions you may have. I understand that you would like to know more about the concept of embodied cognition. Embodied cognition is the idea that our cognitive processes, including perception, memory, and reasoning, are shaped by our bodily experiences and the environments we inhabit. This theory challenges other cognitive theories, such as cognitivism, which suggest that cognition is solely based on internal mental processes.\n\nThe embodied cognition thesis emphasizes the active and significant role of the body in shaping cognition. This includes the idea that cognitive processes are not just located within the brain, but are distributed throughout the body and shaped by its interactions with the environment.\n\nThere are different approaches to understanding embodied cognition, and some authors argue that cognition depends on an agent's body and its interactions with the environment. Others emphasize the role of the body in shaping specifi