In [1]:
!pip install --upgrade pip
!pip install transformers sentence-transformers faiss-cpu accelerate bitsandbytes




In [3]:
import json

with open("./content/shakespeare_chunked.json", "r", encoding="utf-8") as f:
    raw_chunks = json.load(f)

print(f"Total chunks: {len(raw_chunks)}")
print("Sample entry:", raw_chunks[0])

Total chunks: 5621
Sample entry: {'Play': 'Henry IV', 'PlayerLine': "ACT I\nSCENE I. London. The palace.\nEnter KING HENRY, LORD JOHN OF LANCASTER, the EARL of WESTMORELAND, SIR WALTER BLUNT, and others\n\nKING HENRY IV: So shaken as we are, so wan with care,\nFind we a time for frighted peace to pant,\nAnd breathe short-winded accents of new broils\nTo be commenced in strands afar remote.\nNo more the thirsty entrance of this soil\nShall daub her lips with her own children's blood,\nNor more shall trenching war channel her fields,\nNor bruise her flowerets with the armed hoofs\nOf hostile paces: those opposed eyes,\nWhich, like the meteors of a troubled heaven,\nAll of one nature, of one substance bred,\nDid lately meet in the intestine shock\nAnd furious close of civil butchery\nShall now, in mutual well-beseeming ranks,\nMarch all one way and be no more opposed\nAgainst acquaintance, kindred and allies:", 'Act': '1', 'Scene': '1', 'Speakers': ['KING HENRY IV'], 'firstLine': '1', 'la

In [4]:
def normalize(text: str) -> str:
    return "\n".join([line.strip() for line in text.strip().splitlines() if line.strip()])

documents = []
for idx, chunk in enumerate(raw_chunks):
    doc_id = f"chunk_{idx}"
    documents.append({
        "id": doc_id,
        "text": normalize(chunk.get("PlayerLine", "")),
        "metadata": {
            "play": chunk.get("Play", ""),
            "act": chunk.get("Act", ""),
            "scene": chunk.get("Scene", ""),
            "firstLine": chunk.get("firstLine", ""),
            "lastLine": chunk.get("lastLine", ""),
            "characters present": ", ".join(chunk.get("CharactersPresent", [])),
            "speakers": ", ".join(chunk.get("Speakers", []))
        }
    })

print(f"Built {len(documents)} documents.")
print("Example:", json.dumps(documents[0], indent=4, ensure_ascii=False))


Built 5621 documents.
Example: {
    "id": "chunk_0",
    "text": "ACT I\nSCENE I. London. The palace.\nEnter KING HENRY, LORD JOHN OF LANCASTER, the EARL of WESTMORELAND, SIR WALTER BLUNT, and others\nKING HENRY IV: So shaken as we are, so wan with care,\nFind we a time for frighted peace to pant,\nAnd breathe short-winded accents of new broils\nTo be commenced in strands afar remote.\nNo more the thirsty entrance of this soil\nShall daub her lips with her own children's blood,\nNor more shall trenching war channel her fields,\nNor bruise her flowerets with the armed hoofs\nOf hostile paces: those opposed eyes,\nWhich, like the meteors of a troubled heaven,\nAll of one nature, of one substance bred,\nDid lately meet in the intestine shock\nAnd furious close of civil butchery\nShall now, in mutual well-beseeming ranks,\nMarch all one way and be no more opposed\nAgainst acquaintance, kindred and allies:",
    "metadata": {
        "play": "Henry IV",
        "act": "1",
        "scene":

In [6]:
from sentence_transformers import SentenceTransformer
import numpy as np
import json

embedder = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")

# embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# texts = [doc["text"] for doc in documents]
# embeddings = embedder.encode(
#     texts,
#     batch_size=32,
#     show_progress_bar=True,
#     convert_to_numpy=True
# )

# print("Embeddings shape:", embeddings.shape)

# Save to disk (these will be gone when session is reseted)
# np.save("/content/shakespeare_chunked_embeddings.npy", embeddings)
with open("./content/shakespeare_chunked_emb.json", "w", encoding="utf-8") as f:
    json.dump(documents, f, ensure_ascii=False, indent=2)


In [9]:
import faiss
import numpy as np

embeddings = np.load("./content/shakespeare_chunked_embeddings.npy")
D = embeddings.shape[1]
index = faiss.IndexFlatL2(D)
index.add(embeddings.astype("float32"))

print("Indexed vectors count:", index.ntotal)

faiss.write_index(index, "./content/shakespeare_chunked_index.bin")
print("FAISS index saved.")


Indexed vectors count: 5621
FAISS index saved.


In [10]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

model_id = "Qwen/Qwen1.5-7B-Chat"

# Configure 4-bit quantization via bitsandbytes for free gpu size
# todo: i think i can increase it a bit
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True

    # load_in_8bit=True,
    # bnb_8bit_compute_dtype=torch.float16,
    # bnb_8bit_quant_type="nf8",
    # bnb_8bit_use_double_quant=True

    # load_in_16bit=True,
    # bnb_16bit_compute_dtype=torch.float32,
    # bnb_16bit_quant_type="nf16",
    # bnb_16bit_use_double_quant=True
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

print("Model loaded with 4-bit quantization. Device(s):", model.device)


Fetching 4 files: 100%|██████████| 4/4 [01:57<00:00, 29.26s/it] 
Loading checkpoint shards: 100%|██████████| 4/4 [00:06<00:00,  1.56s/it]


Model loaded with 4-bit quantization. Device(s): cuda:0


In [11]:
import faiss
import json
import numpy as np

# same here, file will be removed when session resets
index = faiss.read_index("./content/shakespeare_chunked_index.bin")
with open("./content/shakespeare_chunked_emb.json", "r", encoding="utf-8") as f:
    documents = json.load(f)
embeddings = np.load("./content/shakespeare_chunked_embeddings.npy")

print("Index, metadata, and embeddings reloaded. Total vectors:", index.ntotal)


Index, metadata, and embeddings reloaded. Total vectors: 5621


In [15]:
import torch

def embed_query(query: str):
    vec = embedder.encode([query], convert_to_numpy=True)
    return vec.astype("float32")

def retrieve_top_k(query: str, k: int = 5):
    print(query)
    q_vec = embed_query(query)
    distances, indices = index.search(q_vec, k)
    results = []
    for dist, idx in zip(distances[0], indices[0]):
        doc = documents[idx]
        results.append({
            "id": doc["id"],
            "text": doc["text"],
            "metadata": doc["metadata"],
            "score": float(dist)
        })
    return results

def build_prompt(retrieved_chunks, user_question, style="shake"):
    if style == "shake":
        tone_instruction = "Respond in Shakespearean English, quoting from these passages."
    else:
        tone_instruction = "Respond clearly as a Shakespeare expert, quoting from these passages."

    header = f"You are a Shakespeare expert. {tone_instruction}\n\n"
    passages = ""
    for i, chunk in enumerate(retrieved_chunks, start=1):
        passages += f"[Passage {i}]\n{chunk['text']}\n\n"
    question_block = f"### Question:\n{user_question}\n\n### Answer:\n"
    return header + passages + question_block

def generate_answer(prompt: str, max_new_tokens: int = 300, temperature: float = 0.8):
    inputs = tokenizer(prompt, return_tensors="pt", padding=True).to(model.device)
    with torch.no_grad():
        output_ids = model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            do_sample=True,
            top_p=0.9,
            repetition_penalty=1.1,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    generated = output_ids[0][ inputs["input_ids"].shape[1]: ]
    return tokenizer.decode(generated, skip_special_tokens=True).strip()


In [16]:
def user_query(user_question: str):
  top_chunks = retrieve_top_k(user_question, k=5)
  print("Retrieved passages (top 5):")
  for i, c in enumerate(top_chunks, start=1):
      snippet = c["text"].replace("\n", " ")[:200]
      print(f" {i}. [score {c['score']:.2f}] {snippet}...")

  prompt = build_prompt(top_chunks, user_question, style="shake")

  print("\nGenerating answer...\n")
  answer = generate_answer(prompt, max_new_tokens=200, temperature=0.8)
  print("=== ShakespeareBot Answer ===")
  print(answer)
  print("#"*50)

# user_query("Why does Hamlet kill Polonius?")

user_query("Why does Hamlet not kill Claudius, please use source from Play: Hamlet Act 3, Scene 3?")
# user_query("How and why does Hamlet get his friends Rosencrantz and Guildenstern killed?")
# user_query("How did Ophelia die?")
# user_query("Why was Hamlet angry with his mother Gertrude?")

Why does Hamlet not kill Claudius, please use source from Play: Hamlet Act 3, Scene 3?
Retrieved passages (top 5):
 1. [score 0.54] HAMLET: ACT III SCENE I. A room in the castle. Enter KING CLAUDIUS, QUEEN GERTRUDE, POLONIUS, OPHELIA, ROSENCRANTZ, and GUILDENSTERN KING CLAUDIUS: And can you, by no drift of circumstance, Get from h...
 2. [score 0.59] KING CLAUDIUS: Exeunt ROSENCRANTZ and GUILDENSTERN And, England, if my love thou hold'st at aught-- As my great power thereof may give thee sense, Since yet thy cicatrice looks raw and red After the D...
 3. [score 0.59] KING CLAUDIUS: Time qualifies the spark and fire of it. There lives within the very flame of love A kind of wick or snuff that will abate it, And nothing is at a like goodness still, For goodness, gro...
 4. [score 0.60] HAMLET: SCENE III. Another room in the castle. Enter KING CLAUDIUS, attended KING CLAUDIUS: I have sent to seek him, and to find the body. How dangerous is it that this man goes loose! Yet must not we...
 