In [2]:
def load_sentences(path: str):
    chunks = []
    with open(path) as file:
        for line in file.readlines():
            line = line.strip()
            if line:
                chunks.append(line)
    return chunks

meditations = '../data/meditations.txt'
chunks = load_sentences(meditations)

In [3]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from string import punctuation

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def clean_text(chunk: str):
    chunk = chunk.lower()
    chunk = re.sub(r'[^a-z\s]', '', chunk)
    tokens = word_tokenize(chunk)
    cleaned_tokens = [
        lemmatizer.lemmatize(word) for word in tokens
        if word not in stop_words and word not in punctuation
    ]
    cleaned_chunk = ' '.join(cleaned_tokens)
    return cleaned_chunk

chunks_clean = list(clean_text(chunk) for chunk in chunks)


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/garrett.partenza/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/garrett.partenza/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/garrett.partenza/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [4]:
from gensim.models import FastText

corpus = list(chunk.split() for chunk in chunks_clean)

model = FastText(
    corpus,
    vector_size=265,
    window=8,
    min_count=1,
    sg=0,
    workers=4
)

model.save("word2vec_model")

In [5]:
import numpy as np

def embed_chunk(chunk: str, model):
    embeddings = list(model.wv[word] for word in chunk.split())
    return np.mean(embeddings, axis=0)


model = FastText.load("word2vec_model")
vectors = list(embed_chunk(chunk, model) for chunk in chunks_clean)


In [6]:
import pandas as pd

database = pd.DataFrame.from_dict(
    {
        "chunk": chunks,
        "chunk_clean": chunks_clean,
        "vector": vectors
    }
)

database.head()

Unnamed: 0,chunk,chunk_clean,vector
0,From my grandfather Verus I learned good moral...,grandfather verus learned good moral governmen...,"[-0.116709344, 0.471837, 0.047672816, -0.34095..."
1,From the reputation and remembrance of my fath...,reputation remembrance father modesty manly ch...,"[-0.15210967, 0.614254, 0.062129255, -0.444069..."
2,"From my mother, piety and beneficence, and abs...",mother piety beneficence abstinence evil deed ...,"[-0.13348334, 0.53843445, 0.054877356, -0.3895..."
3,"From my great-grandfather, not to have frequen...",greatgrandfather frequented public school good...,"[-0.13885155, 0.56063956, 0.056867585, -0.4056..."
4,"From my governor, to be neither of the green n...",governor neither green blue party game circus ...,"[-0.1011597, 0.40916425, 0.041610878, -0.29586..."


In [None]:
from sklearn.metrics.pairwise import cosine_similarity

query = "death and pain"
top_k = 3

query_vector = embed_chunk(clean_text(query), model)

similarity_scores = cosine_similarity([query_vector], database.vector.to_list())[0]
similarity_pairs = list(zip(database.chunk, similarity_scores))
results = sorted(similarity_pairs, key=lambda x: x[1], reverse=True)

top_k_results = []
for k in range(top_k):
    print(results[k][0])
    top_k_results.append(results[k][0])

Frequently consider the connexion of all things in the universe and their relation to one another. For in a manner all things are implicated with one another, and all in this way are friendly to one another; for one thing comes in order after another, and this is by virtue of the active movement and mutual conspiration and the unity of the substance.
In my father I observed mildness of temper, and unchangeable resolution in the things which he had determined after due deliberation; and no vainglory in those things which men call honours; and a love of labour and perseverance; and a readiness to listen to those who had anything to propose for the common weal; and undeviating firmness in giving to every man according to his deserts; and a knowledge derived from experience of the occasions for vigorous action and for remission. And I observed that he had overcome all passion for boys; and he considered himself no more than any other citizen; and he released his friends from all obligation

In [28]:
import ollama

def generate_prompts(query, results):
    system_prompt = (
        "<system_prompt>"
        "You are a Stoic AI assistant, deeply versed in the teachings of Marcus Aurelius. "
        "Your job is to follow the user's task exactly, not straying from any of the directions provided to you."
        "</system_prompt>"
    )

    task_xml = (
        "<task>"
        "Analyze the following user query and the provided quotes from Marcus Aurelius' Meditations. "
        "Select the most relevant quote that addresses the user's concern. Structure your response as follows:"
        "<instructions>"
        "<step>Quote: Begin with the chosen quote, enclosed in quotation marks.</step>"
        "<step>Do not hallucinate the chosen quote, you must choose one from the given results.</step>"
        "<step>Interpretation: In 2-3 sentences, why you chose this quote, given the users original query.</step>"
        "<step>Write from the point of view that, the user is trusting that this is the most relevant quote.</step>"
        "<step>Advice: In 4-5 sentences, offer practical guidance based on the quote and Stoic principles.</step>"
        "<step>Do not write more than a few sentences outside of the selected quote.</step>"
        "<step>Do not discuss anything about stoicism outside of the quote and query.</step>"
        "</instructions>"
        "Maintain a wise and compassionate tone throughout your response. Aside from citing your chosen quote, use language that assumes you are speaking to the original user personally. Use language and style that mirrors that of a modern day philosopher spreading stoic wisdom to a student. Construct your response in parsable XML format with <quote>, <interpretation>, and <advice> for the keys mentioned in the afformentioned steps, including a <root> key for the root of the entire response."
        "</task>"
    )

    query_xml = f"<rag_query>{query}</rag_query>"

    search_results_xml = "<search_results>" + "".join(
        f"<search_result>{result}</search_result>" for result in results
    ) + "</search_results>"

    user_prompt = f"<user_prompt>{task_xml}{query_xml}{search_results_xml}</user_prompt>"

    return system_prompt, user_prompt

def stoic_guide(system_prompt, user_prompt):

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]

    response = ollama.chat(
        model='llama3:instruct',
        messages=messages
    )

    return response['message']['content']

system_prompt, user_prompt = generate_prompts(query, top_k_results)
print(stoic_guide(system_prompt, user_prompt))

<root>
<quote>Frequently consider the connexion of all things in the universe and their relation to one another. For in a manner all things are implicated with one another, and all in this way are friendly to one another; for one thing comes in order after another, and this is by virtue of the active movement and mutual conspiration and the unity of the substance.</quote>

<interpretation>I chose this quote because it highlights the interconnectedness of all living beings. Your query about humans connected to animals resonates with this idea, as it recognizes that our existence is deeply tied to the natural world. This quote encourages us to acknowledge the web of relationships between species and the universe itself, fostering a sense of compassion, understanding, and harmony.</interpretation>

<advice>As you reflect on the bond between humans and animals, remember that we are all part of a larger ecosystem. Cultivate empathy and appreciation for the natural world by observing the int