In [1]:
import chromadb

client = chromadb.Client()

### Preparing the data for vector search

In [3]:
from pathlib import Path
from evaluator.data.file_io import read_json_from_file
from evaluator.models.qa import Concepts

# Prepara data for loading
chunk_file = Path("processed/ap_history_concepts.json")
concepts = Concepts
concepts = read_json_from_file(chunk_file, Concepts)

ids:list = []
documents:list = []
for concept_id, concept in concepts.chunks.items():
    documents.append(concept)
    ids.append(concept_id)

  from .autonotebook import tqdm as notebook_tqdm


### Using a collection with default embedder model

Model: all-MiniLM-L6-v2

In [4]:
# Create a collection with default embedder
collection_with_default_embedder = client.get_or_create_collection(name="ap_history_concepts")

# Add documents to the colleciton using the default embedder
collection = collection_with_default_embedder
collection.upsert(
    documents=documents,
    ids=ids
)

In [5]:
results = collection.query(
    query_texts=["In the period 1550-1750, most of the world’s ten largest cities were located in which of the following regions? a) The Middle East b) Western Europe c) North Africa and southern Europe d) South Asia e) East Asia"],
    n_results=5, # how many results to return
    include=['documents', 'distances', 'metadatas']
)
print("\nChromaDB Query Results:")
for i, doc in enumerate(results['documents'][0]):
    print(f"  Result {i+1}:")
    print(f"    Text: {doc}")
    print(f"    Distance: {results['distances'][0][i]}")
    print(f"    Metadata: {results['metadatas'][0][i]}")


ChromaDB Query Results:
  Result 1:
    Text: The more factories that developed in favorable locations, the larger cities would grow. In 1800, along with London, the Chinese cities of Beijing (Peking) and Canton ranked in the top three, but just 100 years later, nine of the ten largest cities in the world were in Europe or the United States.
    Distance: 0.7605158090591431
    Metadata: None
  Result 2:
    Text: Focus On: Urbanization If trade is the way you make your living, chances are you are spending lots of time in cities. Traders and merchants needed a place to meet and conduct business and this period saw the growth of urban culture throughout the world, mostly as a result of trade contacts and networks. Along with trade, cities showcased the wealth and power of the rulers who both controlled and beneted from the trade. Urban centers usually developed along trade routes or in locations necessary for strategic defense. In the early years, the most populous cities were in the M

### Preparing prompts

In [40]:
def query(collection, query: str) -> list:
        results = collection.query(
            query_texts=[query],
            n_results=3,
            include=["documents", "distances", "metadatas"],
        )
        if not results["documents"]:
            return ""        

        return results["documents"][0]

def generate_prompt(question_with_options:str, context_docs: list):
    formatted_context = "\n".join([f"[{i+1}] {doc}" for i, doc in enumerate(context_docs)])

    prompt = f""".
    Context Documents:
    {formatted_context}

    Question:
    {question_with_options}
    """
    return prompt

In [41]:
questions: list =[
    "Ghana in the 900s and France in the 1600s had which of the following characteristics in common? a) Parliamentary government b) Divine monarchy c) Matrilineal descent d) Islam e) Trade based on gold and manufacturing good",
    "During the eighteenth century, which of the following reigned, but did not rule? a) The Ottoman sultan b) The king of France c) The Chinese emperor d) The Japanese emperor e) The Russian Tzar",
    "Which of the following was the most important factor enabling the Spanish to defeat the Aztec Empire? a) The Spanish were able to field larger armies than the Aztecs b) The Spanish were able to use their understanding of Aztec culture to create effective propaganda that weakened Aztec resistance. c) The Spanish were able to exploit discontent within the Aztec state to trigger a revolt of the lower classes against the Aztec ruling class. d) The Spanish were able to form military alliances with other indigenous peoples who were enemies of the Aztecs. e) The Spanish were able to devise effective countermeasures to the horse cavalry that formed the bulk of the Aztec army."
    ]

for question in questions:
    print(generate_prompt(question, query(collection, question)))
    print("-----------------------------------------------------------------")

.
    Context Documents:
    [1] which they had little of but which existed in the Sahara. When they encountered the Islamic traders along the salt road, they started trading for a lot more than just salt. The consequence was an explosion of trade. Why were the Islamic traders so interested in trading with west African kingdoms? Because in Ghana (about 800–1000 C.E.) and Mali (about 1200–1450 C.E.), there was tons, and we mean tons, of gold. A little sand in your eyes was probably worth some gold in your hand. So the Islamic traders kept coming. The constant trade brought more than just Islamic goods to Ghana and Mali; it brought Islam. For Ghana the result was devastating. The empire was subjected to a Holy War led by an Islamic group intent on converting (or else killing) them. While Ghana was able to defeat the
[2] empire was subjected to a Holy War led by an Islamic group intent on converting (or else killing) them. While Ghana was able to defeat the Islamic forces, their empire fe

### Using a custom embedder

In [10]:
from chromadb import Documents, EmbeddingFunction, Embeddings
from litellm import embedding


class CustomEmbedder(EmbeddingFunction):
    def __call__(self, input: Documents) -> Embeddings:
        # embed the documents somehow
        response = embedding(
          model="ollama/nomic-embed-text",
        # model="gemini/text-embedding-004",
          input=["This is a embeddings test."],
        )
        return response


collection_with_custom_embedder = client.get_or_create_collection(name="ap_history_concepts_custom")

collection = collection_with_custom_embedder
collection.upsert(
    documents=documents,
    ids=ids
)

In [16]:
question_with_options ="Ghana in the 900s and France in the 1600s had which of the following characteristics in common? a) Parliamentary government b) Divine monarchy c) Matrilineal descent d) Islam e) Trade based on gold and manufacturing good"
print(generate_prompt(question_with_options, query(collection, question_with_options)))

.
    Context Documents:
    [1] which they had little of but which existed in the Sahara. When they encountered the Islamic traders along the salt road, they started trading for a lot more than just salt. The consequence was an explosion of trade. Why were the Islamic traders so interested in trading with west African kingdoms? Because in Ghana (about 800–1000 C.E.) and Mali (about 1200–1450 C.E.), there was tons, and we mean tons, of gold. A little sand in your eyes was probably worth some gold in your hand. So the Islamic traders kept coming. The constant trade brought more than just Islamic goods to Ghana and Mali; it brought Islam. For Ghana the result was devastating. The empire was subjected to a Holy War led by an Islamic group intent on converting (or else killing) them. While Ghana was able to defeat the
[2] empire was subjected to a Holy War led by an Islamic group intent on converting (or else killing) them. While Ghana was able to defeat the Islamic forces, their empire fe

### Exploring how re-ranking works

- **Vector Search:** Query with question to retrieve a broad set of candidates.
    
- **Reranking:** Use a **cross-encoder** on the retrieved candidates (question + each document) to precisely rank their relevance and select the very best ones for your LLM.

In [48]:
from sentence_transformers.cross_encoder import CrossEncoder

# Pre-trained cross encoder
model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L6-v2")

def query_with_reranking(collection, query: str) -> list:
    results = collection.query(
        query_texts=[query],
        n_results=50,
        include=["documents", "distances", "metadatas"],
    )
    if not results["documents"]:
        return ""        

    search_results: list(str) = []
    for result in results["documents"][0]:
        search_results.append(result)

    ranks = model.rank(query, search_results)
    # Print the scores

    final_results: list(str) =[]
    for rank in ranks[:3]:
        print(f"{rank['score']:.2f}\t{rank['corpus_id']}\t{search_results[rank['corpus_id']]}")
        final_results.append(search_results[rank['corpus_id']])
    
    return final_results        

In [49]:
questions: list =[
    "Ghana in the 900s and France in the 1600s had which of the following characteristics in common? a) Parliamentary government b) Divine monarchy c) Matrilineal descent d) Islam e) Trade based on gold and manufacturing good",
    "During the eighteenth century, which of the following reigned, but did not rule? a) The Ottoman sultan b) The king of France c) The Chinese emperor d) The Japanese emperor e) The Russian Tzar",
    "Which of the following was the most important factor enabling the Spanish to defeat the Aztec Empire? a) The Spanish were able to field larger armies than the Aztecs b) The Spanish were able to use their understanding of Aztec culture to create effective propaganda that weakened Aztec resistance. c) The Spanish were able to exploit discontent within the Aztec state to trigger a revolt of the lower classes against the Aztec ruling class. d) The Spanish were able to form military alliances with other indigenous peoples who were enemies of the Aztecs. e) The Spanish were able to devise effective countermeasures to the horse cavalry that formed the bulk of the Aztec army."
    ]

for question in questions:
    print(generate_prompt(question, query_with_reranking(collection, question)))
    print("-----------------------------------------------------------------")



-4.42	0	which they had little of but which existed in the Sahara. When they encountered the Islamic traders along the salt road, they started trading for a lot more than just salt. The consequence was an explosion of trade. Why were the Islamic traders so interested in trading with west African kingdoms? Because in Ghana (about 800–1000 C.E.) and Mali (about 1200–1450 C.E.), there was tons, and we mean tons, of gold. A little sand in your eyes was probably worth some gold in your hand. So the Islamic traders kept coming. The constant trade brought more than just Islamic goods to Ghana and Mali; it brought Islam. For Ghana the result was devastating. The empire was subjected to a Holy War led by an Islamic group intent on converting (or else killing) them. While Ghana was able to defeat the
-5.32	1	empire was subjected to a Holy War led by an Islamic group intent on converting (or else killing) them. While Ghana was able to defeat the Islamic forces, their empire fell into decline. By t