# Quote Generation/Retrieval



In [9]:
from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
import os
import openai

# import utils
import configparser
from pprint import pprint
from llama_index import SimpleDirectoryReader


import os
import openai


import jupyter_black

jupyter_black.load()
# Access values from the sections

import sys

sys.path.append("../app")
import conn_utils

OPENAI_API_KEY = conn_utils.get_open_ai_key("../config.ini")

os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
directory = "index_store"

In [11]:
!ls ./../data/

Amours.txt               Heroides.txt             MetamorphosesVIII_XV.txt
Fasti.txt                MetamorphosesI_VII.txt   RemediaAmoris.txt


In [14]:
f1 = "./../data/RemediaAmoris.txt"
f3 = "./../data/Heroides.txt"
f4 = "./../data/Amours.txt"
f5 = "./../data/Fasti.txt"
f6 = "./../data/MetamorphosesI_VII.txt"
f7 = "./../data/MetamorphosesVIII_XV.txt"
docs = [f1, f3, f4, f5, f6, f7]
documents = SimpleDirectoryReader(input_files=docs).load_data()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=25)
texts = text_splitter.create_documents(docs)
directory = "index_store"
vector_index = FAISS.from_documents(texts, OpenAIEmbeddings())
vector_index.save_local(directory)

vector_index = FAISS.load_local("index_store", OpenAIEmbeddings())
retriever = vector_index.as_retriever(search_type="similarity", search_kwargs={"k": 6})

In [17]:
qa_interface = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(),
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
)

response = qa_interface(
    """
I am a big fan of ovid. 
Please recommend memorable quotes to me.
"""
)

print(response["result"])

Sure! Here are some memorable quotes from Ovid's works:

1. "Dripping water hollows out stone, not through force but through persistence." - From Metamorphoses
2. "The cause is hidden, but the result is well known." - From Metamorphoses
3. "Fortune and love favor the brave." - From Amores
4. "Let others praise ancient times; I am glad I was born in these." - From Amores
5. "Love is a kind of warfare." - From Remedia Amoris
6. "Time flies, never to be recalled." - From Heroides
7. "The day, water, sun, moon, night - I do not have to purchase these things with money." - From Fasti

These are just a few examples, but Ovid's works are filled with many memorable quotes. Enjoy exploring his writings!


### Compare stored quotes to newly generated ones

In [37]:
import pandas as pd
import spacy

nlp = spacy.load("en_core_web_sm")

# Read in stored quotes
df = pd.read_json("./../ovid_quotes.json")

input_string = response["result"]


import re

# Extract lines starting with a number and strip the number
lines_with_stripped_numbers = [
    re.sub(r"^\d+\.\s*", "", line.strip())
    for line in input_string.splitlines()
    if re.match(r"^\d+\.", line)
]

# Print the extracted lines
for line in lines_with_stripped_numbers:
    print(line)

"Dripping water hollows out stone, not through force but through persistence." - From Metamorphoses
"The cause is hidden, but the result is well known." - From Metamorphoses
"Fortune and love favor the brave." - From Amores
"Let others praise ancient times; I am glad I was born in these." - From Amores
"Love is a kind of warfare." - From Remedia Amoris
"Time flies, never to be recalled." - From Heroides
"The day, water, sun, moon, night - I do not have to purchase these things with money." - From Fasti


In [44]:
from itertools import product

# Compare to quotes
generated_quotes = lines_with_stripped_numbers

# Load the spaCy English model
nlp = spacy.load("en_core_web_sm")

# Process each quote and calculate similarity scores
similarity_data = []

for generated_quote, actual_quote in product(generated_quotes, df["Quote"]):
    # Process the quotes with spaCy
    doc_generated = nlp(generated_quote)
    doc_actual = nlp(actual_quote)

    # Calculate similarity score
    similarity_score = doc_generated.similarity(doc_actual)

    # Append data to the list
    similarity_data.append(
        {
            "generated_quotes": generated_quote,
            "actual_quotes": actual_quote,
            "similarity_score": similarity_score,
        }
    )

# Create a DataFrame from the similarity data
similarity_df = pd.DataFrame(similarity_data)

  similarity_score = doc_generated.similarity(doc_actual)


In [45]:
# Print the resulting DataFrame
display(similarity_df.sort_values("similarity_score"))

Unnamed: 0,generated_quotes,actual_quotes,similarity_score
110,"""The day, water, sun, moon, night - I do not h...",We take no pleasure in permitted joys. But wha...,0.063819
97,"""The day, water, sun, moon, night - I do not h...",Now are fields of corn where Troy once stood.,0.067263
4,"""Dripping water hollows out stone, not through...","Let him who loves, where love success may find...",0.075733
36,"""Fortune and love favor the brave."" - From Amores","Let him who loves, where love success may find...",0.095973
33,"""Fortune and love favor the brave."" - From Amores",Now are fields of corn where Troy once stood.,0.101306
...,...,...,...
105,"""The day, water, sun, moon, night - I do not h...","The mind, conscious of rectitude, laughed to s...",0.531386
30,"""The cause is hidden, but the result is well k...",We take no pleasure in permitted joys. But wha...,0.549881
70,"""Love is a kind of warfare."" - From Remedia Am...",Love yields to business. If you seek a way out...,0.588051
54,"""Let others praise ancient times; I am glad I ...",Love yields to business. If you seek a way out...,0.592658


In [None]:
# For sanity-checking on how well a similarity_score does for retrieving a "memorable quote"

In [50]:
print("highest score comparison:")
idx = 54
print("GENERATED:", similarity_df.iloc[54]["generated_quotes"])
print("ACTUAL:", similarity_df.iloc[54]["actual_quotes"])

highest score comparison:
GENERATED: "Let others praise ancient times; I am glad I was born in these." - From Amores
ACTUAL: Love yields to business. If you seek a way out of love, be busy; you'll be safe then.


### Storage of Additional Quotes