In [1]:
def load_sentences(path: str):
    chunks = []
    with open(path) as file:
        for line in file.readlines():
            line = line.strip()
            if line:
                chunks.append(line)
    return chunks

meditations = '../data/meditations.txt'
chunks = load_sentences(meditations)

In [2]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from string import punctuation

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def clean_text(chunk: str):
    chunk = chunk.lower()
    chunk = re.sub(r'[^a-z\s]', '', chunk)
    tokens = word_tokenize(chunk)
    cleaned_tokens = [
        lemmatizer.lemmatize(word) for word in tokens
        if word not in stop_words and word not in punctuation
    ]
    cleaned_chunk = ' '.join(cleaned_tokens)
    return cleaned_chunk

chunks_clean = list(clean_text(chunk) for chunk in chunks)


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/garrett.partenza/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/garrett.partenza/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/garrett.partenza/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [3]:
from gensim.models import FastText

corpus = list(chunk.split() for chunk in chunks_clean)

model = FastText(
    corpus,
    vector_size=265,
    window=8,
    min_count=1,
    sg=0,
    workers=4
)

model.save("word2vec_model")

In [4]:
import numpy as np

def embed_chunk(chunk: str, model):
    embeddings = list(model.wv[word] for word in chunk.split())
    return np.mean(embeddings, axis=0)


model = FastText.load("word2vec_model")
vectors = list(embed_chunk(chunk, model) for chunk in chunks_clean)


In [5]:
import pandas as pd

database = pd.DataFrame.from_dict(
    {
        "chunk": chunks,
        "chunk_clean": chunks_clean,
        "vector": vectors
    }
)

database.head()

Unnamed: 0,chunk,chunk_clean,vector
0,From my grandfather Verus I learned good moral...,grandfather verus learned good moral governmen...,"[-0.11705112, 0.47138304, 0.048543412, -0.3401..."
1,From the reputation and remembrance of my fath...,reputation remembrance father modesty manly ch...,"[-0.15250583, 0.61346704, 0.06324319, -0.44290..."
2,"From my mother, piety and beneficence, and abs...",mother piety beneficence abstinence evil deed ...,"[-0.1337932, 0.53759325, 0.05583894, -0.388414..."
3,"From my great-grandfather, not to have frequen...",greatgrandfather frequented public school good...,"[-0.13922194, 0.55995464, 0.0578883, -0.404660..."
4,"From my governor, to be neither of the green n...",governor neither green blue party game circus ...,"[-0.10146547, 0.40880674, 0.04237016, -0.29521..."


In [6]:
database[["chunk", "vector"]].to_csv("meditations.csv")

In [12]:
from sklearn.metrics.pairwise import cosine_similarity

query = "suffering"
top_k = 3

query_vector = embed_chunk(clean_text(query), model)

similarity_scores = cosine_similarity([query_vector], database.vector.to_list())[0]
similarity_pairs = list(zip(database.chunk, similarity_scores))
results = sorted(similarity_pairs, key=lambda x: x[1], reverse=True)

top_k_results = []
for k in range(top_k):
    print(results[k][0])
    top_k_results.append(results[k][0])

In the constitution of the rational animal I see no virtue which is opposed to justice; but I see a virtue which is opposed to love of pleasure, and that is temperance.
Consider, for example, the times of Vespasian. Thou wilt see all these things, people marrying, bringing up children, sick, dying, warring, feasting, trafficking, cultivating the ground, flattering, obstinately arrogant, suspecting, plotting, wishing for some to die, grumbling about the present, loving, heaping up treasure, desiring counsulship, kingly power. Well then, that life of these people no longer exists at all. Again, remove to the times of Trajan. Again, all is the same. Their life too is gone. In like manner view also the other epochs of time and of whole nations, and see how many after great efforts soon fell and were resolved into the elements. But chiefly thou shouldst think of those whom thou hast thyself known distracting themselves about idle things, neglecting to do what was in accordance with their pr

In [13]:
import ollama

def generate_prompts(query, results):
    system_prompt = (
        "<system_prompt>"
        "You are a Stoic AI assistant, deeply versed in the teachings of Marcus Aurelius. "
        "Your job is to follow the user's task exactly, not straying from any of the directions provided to you."
        "</system_prompt>"
    )

    task_xml = (
        "<task>"
        "Analyze the following user query and the provided quotes from Marcus Aurelius' Meditations. "
        "Select the most relevant quote that addresses the user's concern. Structure your response as follows:"
        "<instructions>"
        "<step>Quote: Begin with the chosen quote, enclosed in quotation marks.</step>"
        "<step>Do not hallucinate the chosen quote, you must choose one from the given results.</step>"
        "<step>Interpretation: In 2-3 sentences, why you chose this quote, given the users original query.</step>"
        "<step>Write from the point of view that, the user is trusting that this is the most relevant quote.</step>"
        "<step>Advice: In 4-5 sentences, offer practical guidance based on the quote and Stoic principles.</step>"
        "<step>Do not write more than a few sentences outside of the selected quote.</step>"
        "<step>Do not discuss anything about stoicism outside of the quote and query.</step>"
        "</instructions>"
        "Maintain a wise and compassionate tone throughout your response. Aside from citing your chosen quote, use language that assumes you are speaking to the original user personally. Use language and style that mirrors that of a modern day philosopher spreading stoic wisdom to a student. Construct your response in parsable XML format with <quote>, <interpretation>, and <advice> for the keys mentioned in the afformentioned steps, including a <root> key for the root of the entire response."
        "</task>"
    )

    query_xml = f"<rag_query>{query}</rag_query>"

    search_results_xml = "<search_results>" + "".join(
        f"<search_result>{result}</search_result>" for result in results
    ) + "</search_results>"

    user_prompt = f"<user_prompt>{task_xml}{query_xml}{search_results_xml}</user_prompt>"

    return system_prompt, user_prompt

def stoic_guide(system_prompt, user_prompt):

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]

    response = ollama.chat(
        model='llama3:instruct',
        messages=messages
    )

    return response['message']['content']

system_prompt, user_prompt = generate_prompts(query, top_k_results)
print(stoic_guide(system_prompt, user_prompt))

<root>
<quote>In the constitution of the rational animal I see no virtue which is opposed to justice; but I see a virtue which is opposed to love of pleasure, and that is temperance.</quote>

<interpretation>I chose this quote because your query seems to be seeking guidance on how to navigate uncertainty and the fleeting nature of life. This quote speaks directly to the idea that our virtues are not in opposition to one another, but rather, some virtues may balance out others. In this case, temperance can help us find balance in the midst of uncertainty.</interpretation>

<advice>As you ponder your concerns about the test and the nature of reality, remember that true virtue lies in finding harmony within yourself. Practice temperance by acknowledging your desires and fears, but also recognize their fleeting nature. Focus on what is within your control, rather than getting caught up in worries about the unknown. Trust that, with a balanced mindset, you can navigate any situation that co