# Load the dataset

In [22]:
import pandas as pd

df = pd.read_json("hf://datasets/Abirate/english_quotes/quotes.jsonl", lines=True)
df.head()

Unnamed: 0,quote,author,tags
0,‚ÄúBe yourself; everyone else is already taken.‚Äù,Oscar Wilde,"[be-yourself, gilbert-perreira, honesty, inspi..."
1,"‚ÄúI'm selfish, impatient and a little insecure....",Marilyn Monroe,"[best, life, love, mistakes, out-of-control, t..."
2,‚ÄúTwo things are infinite: the universe and hum...,Albert Einstein,"[human-nature, humor, infinity, philosophy, sc..."
3,"‚ÄúSo many books, so little time.‚Äù",Frank Zappa,"[books, humor]"
4,‚ÄúA room without books is like a body without a...,Marcus Tullius Cicero,"[books, simile, soul]"


# Clean & Preprocess the Data

In [23]:
# Drop missing values
df = df.dropna(subset=["quote", "author", "tags"])
# Normalize text
df["quote"] = df["quote"].str.lower()
df["author"] = df["author"].str.lower()
df["tags"] = df["tags"].apply(lambda x: [tag.lower() for tag in x])

# Create a combined search text
df["search_text"] = df.apply(lambda row: f"{row['quote']} by {row['author']} - {' '.join(row['tags'])}", axis=1)


# Encode Quotes using Sentence Transformers

In [24]:
from sentence_transformers import SentenceTransformer
import numpy as np

# Load the pre-trained model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Encode the quotes
embeddings = model.encode(df["search_text"].tolist(), show_progress_bar=True)


Batches:   0%|          | 0/79 [00:00<?, ?it/s]

# Build the FAISS Index

In [25]:
!pip install faiss-cpu



In [26]:
import faiss

# Create FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))

# Optionally save the index
faiss.write_index(index, "quotes_index.faiss")


# Define Search Function

In [27]:
def search_quotes(query, top_k=5):
    query_embedding = model.encode([query])
    distances, indices = index.search(np.array(query_embedding), top_k)

    results = []
    for idx in indices[0]:
        results.append({
            "quote": df.iloc[idx]["quote"],
            "author": df.iloc[idx]["author"],
            "tags": df.iloc[idx]["tags"],
            "score": float(distances[0][list(indices[0]).index(idx)])
        })
    return results


# Build the Streamlit App (app.py)

In [28]:
!pip install streamlit



In [29]:
import streamlit as st

st.title("üìú Semantic Quote Finder")

query = st.text_input("üîç Enter your query (e.g., 'quotes about courage by women authors'):")

if query:
    results = search_quotes(query, top_k=5)
    st.subheader("Top Matches:")
    for i, result in enumerate(results, 1):
        st.markdown(f"**{i}.** *\"{result['quote']}\"* ‚Äî **{result['author']}**")
        st.write(f"Tags: {', '.join(result['tags'])}")
        st.write(f"Similarity Score: {result['score']:.2f}")




# RAG Evaluation

In [30]:
test_queries = [
    "Quotes about courage by women authors",
    "Funny quotes about death",
    "All Oscar Wilde quotes tagged with humor"
]

for query in test_queries:
    print(f"\nQuery: {query}")
    for res in search_quotes(query, top_k=3):
        print(f"‚Üí \"{res['quote']}\" ‚Äî {res['author']} | Tags: {', '.join(res['tags'])}")



Query: Quotes about courage by women authors
‚Üí "‚Äúwe believe in ordinary acts of bravery, in the courage that drives one person to stand up for another.‚Äù" ‚Äî veronica roth, | Tags: inspirational-quotes, strength-and-courage
‚Üí "‚Äúwell-behaved women seldom make history.‚Äù" ‚Äî laurel thatcher ulrich, | Tags: inspirational, misattributed-eleanor-roosevelt, women
‚Üí "‚Äúmen are afraid that women will laugh at them. women are afraid that men will kill them.‚Äù" ‚Äî margaret atwood | Tags: apocryphal, feminism

Query: Funny quotes about death
‚Üí "‚Äúdon't think of it as dying, said death. just think of it as leaving early to avoid the rush.‚Äù" ‚Äî terry pratchett, | Tags: death, humor
‚Üí "‚Äúit is a curious thing, the death of a loved one. we all know that our time in this world is limited, and that eventually all of us will end up underneath some sheet, never to wake up. and yet it is always a surprise when it happens to someone we know. it is like walking up the stairs to yo