In [2]:
def load_sentences(path: str):
    chunks = []
    with open(path) as file:
        for line in file.readlines():
            line = line.strip()
            if line:
                chunks.append(line)
    return chunks

meditations = '../data/meditations.txt'
chunks = load_sentences(meditations)

In [3]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from string import punctuation

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def clean_text(chunk: str):
    chunk = chunk.lower()
    chunk = re.sub(r'[^a-z\s]', '', chunk)
    tokens = word_tokenize(chunk)
    cleaned_tokens = [
        lemmatizer.lemmatize(word) for word in tokens
        if word not in stop_words and word not in punctuation
    ]
    cleaned_chunk = ' '.join(cleaned_tokens)
    return cleaned_chunk

chunks_clean = list(clean_text(chunk) for chunk in chunks)


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/garrett.partenza/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/garrett.partenza/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/garrett.partenza/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [4]:
from gensim.models import FastText

corpus = list(chunk.split() for chunk in chunks_clean)

model = FastText(
    corpus,
    vector_size=265,
    window=8,
    min_count=1,
    sg=0,
    workers=4
)

model.save("word2vec_model")

In [5]:
import numpy as np

def embed_chunk(chunk: str, model):
    embeddings = list(model.wv[word] for word in chunk.split())
    return np.mean(embeddings, axis=0)


model = FastText.load("word2vec_model")
vectors = list(embed_chunk(chunk, model) for chunk in chunks_clean)


In [6]:
import pandas as pd

database = pd.DataFrame.from_dict(
    {
        "chunk": chunks,
        "chunk_clean": chunks_clean,
        "vector": vectors
    }
)

database.head()

Unnamed: 0,chunk,chunk_clean,vector
0,From my grandfather Verus I learned good moral...,grandfather verus learned good moral governmen...,"[-0.116709344, 0.471837, 0.047672816, -0.34095..."
1,From the reputation and remembrance of my fath...,reputation remembrance father modesty manly ch...,"[-0.15210967, 0.614254, 0.062129255, -0.444069..."
2,"From my mother, piety and beneficence, and abs...",mother piety beneficence abstinence evil deed ...,"[-0.13348334, 0.53843445, 0.054877356, -0.3895..."
3,"From my great-grandfather, not to have frequen...",greatgrandfather frequented public school good...,"[-0.13885155, 0.56063956, 0.056867585, -0.4056..."
4,"From my governor, to be neither of the green n...",governor neither green blue party game circus ...,"[-0.1011597, 0.40916425, 0.041610878, -0.29586..."


In [12]:
from sklearn.metrics.pairwise import cosine_similarity

query = "universe, planets, and stars"
top_k = 3

query_vector = embed_chunk(clean_text(query), model)

similarity_scores = cosine_similarity([query_vector], database.vector.to_list())[0]
similarity_pairs = list(zip(database.chunk, similarity_scores))
results = sorted(similarity_pairs, key=lambda x: x[1], reverse=True)

top_k_results = []
for k in range(top_k):
    print(results[k][0])
    top_k_results.append(results[k][0])

That which has died falls not out of the universe. If it stays here, it also changes here, and is dissolved into its proper parts, which are elements of the universe and of thyself. And these too change, and they murmur not.
The nature of the An moved to make the universe. But now either everything that takes place comes by way of consequence or continuity; or even the chief things towards which the ruling power of the universe directs its own movement are governed by no rational principle. If this is remembered it will make thee more tranquil in many things.
From Rusticus I received the impression that my character required improvement and discipline; and from him I learned not to be led astray to sophistic emulation, nor to writing on speculative matters, nor to delivering little hortatory orations, nor to showing myself off as a man who practises much discipline, or does benevolent acts in order to make a display; and to abstain from rhetoric, and poetry, and fine writing; and not t

In [13]:
import ollama

def generate_prompts(query, results):
    system_prompt = (
        "<system_prompt>"
        "You are a Stoic AI assistant, deeply versed in the teachings of Marcus Aurelius. "
        "Your job is to follow the user's task exactly, not straying from any of the directions provided to you."
        "</system_prompt>"
    )

    task_xml = (
        "<task>"
        "Analyze the following user query and the provided quotes from Marcus Aurelius' Meditations. "
        "Select the most relevant quote that addresses the user's concern. Structure your response as follows:"
        "<instructions>"
        "<step>Quote: Begin with the chosen quote, enclosed in quotation marks.</step>"
        "<step>Do not hallucinate the chosen quote, you must choose one from the given results.</step>"
        "<step>Interpretation: In 2-3 sentences, explain how this quote relates to the user's query.</step>"
        "<step>Advice: In 1-2 sentences, offer practical guidance based on the quote and Stoic principles.</step>"
        "<step>Do not write more than a few sentences outside of the selected quote.</step>"
        "<step>Do not discuss anything about stoicism outside of the quote and query.</step>"
        "</instructions>"
        "Maintain a wise and compassionate tone throughout your response."
        "</task>"
    )

    query_xml = f"<rag_query>{query}</rag_query>"

    search_results_xml = "<search_results>" + "".join(
        f"<search_result>{result}</search_result>" for result in results
    ) + "</search_results>"

    user_prompt = f"<user_prompt>{task_xml}{query_xml}{search_results_xml}</user_prompt>"

    return system_prompt, user_prompt

def stoic_guide(system_prompt, user_prompt):

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]

    response = ollama.chat(
        model='llama3:instruct',
        messages=messages
    )

    return response['message']['content']

system_prompt, user_prompt = generate_prompts(query, top_k_results)
print(stoic_guide(system_prompt, user_prompt))

Quote: "That which has died falls not out of the universe. If it stays here, it also changes here, and is dissolved into its proper parts, which are elements of the universe and of thyself."

Interpretation: This quote suggests that everything within the universe is interconnected and eternal, including our own lives and experiences. The user's concern about the universe, planets, and stars may be related to their understanding of the vastness and mystery of existence.

Advice: Reflect on your place within this grand scheme, recognizing that even after death, your essence remains a part of the universe. This perspective can bring tranquility and acceptance in the face of uncertainty.
