In [1]:
import os
import sys

def get_secret(key_name):
    # CHECK: Are we running in Google Colab?
    in_colab = 'google.colab' in sys.modules

    if in_colab:
        # OPTION A: Try Native Colab Secrets (works in Browser, sometimes fails in VS Code extension)
        try:
            from google.colab import userdata
            
            return userdata.get(key_name)
        
        except (ImportError, Exception):
            # OPTION B: Fallback to Manual Input or Drive if native secrets fail
            print(f"⚠️ Colab Secret '{key_name}' not found. Please set it manually.")
            from getpass import getpass
            
            return getpass(f"Enter value for {key_name}: ")
    else:
        # OPTION C: We are running locally (Mac M1)
        from dotenv import load_dotenv
        load_dotenv() # Load .env from local folder
        
        return os.getenv(key_name)

In [None]:
OPENAI_API_KEY = get_secret('OPENAI_API_KEY')
COHERE_API = get_secret('COHERE_API')

⚠️ Colab Secret 'OPENAI_API_KEY' not found. Please set it manually.
⚠️ Colab Secret 'COHERNCE_API' not found. Please set it manually.


In [3]:
# %%capture
# !pip install langchain==0.2.5 faiss-cpu==1.8.0 cohere==5.5.8 langchain-community==0.2.5 rank_bm25==0.2.2 sentence-transformers==3.0.1
# !pip install llama-cpp-python==0.2.78  --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124

## IMPORTANT: Make sure to restart the session after installing the packages above.

In [4]:
import faiss
import numpy as np

x = np.random.rand(10, 128).astype(np.float32)
index = faiss.IndexFlatL2(128)
index.add(x)

print("FAISS sanity OK:", index.ntotal)

FAISS sanity OK: 10


## Dense retrieval example


### 1. Getting text archive and chunking it


In [None]:
import cohere

coh = cohere.Client(COHERE_API)

In [6]:
text = """
Interstellar is a 2014 epic science fiction film co-written, directed, and produced by Christopher Nolan.
It stars Matthew McConaughey, Anne Hathaway, Jessica Chastain, Bill Irwin, Ellen Burstyn, Matt Damon, and Michael Caine.
Set in a dystopian future where humanity is struggling to survive, the film follows a group of astronauts who travel through a wormhole near Saturn in search of a new home for mankind.

Brothers Christopher and Jonathan Nolan wrote the screenplay, which had its origins in a script Jonathan developed in 2007.
Caltech theoretical physicist and 2017 Nobel laureate in Physics[4] Kip Thorne was an executive producer, acted as a scientific consultant, and wrote a tie-in book, The Science of Interstellar.
Cinematographer Hoyte van Hoytema shot it on 35 mm movie film in the Panavision anamorphic format and IMAX 70 mm.
Principal photography began in late 2013 and took place in Alberta, Iceland, and Los Angeles.
Interstellar uses extensive practical and miniature effects and the company Double Negative created additional digital effects.

Interstellar premiered on October 26, 2014, in Los Angeles.
In the United States, it was first released on film stock, expanding to venues using digital projectors.
The film had a worldwide gross over $677 million (and $773 million with subsequent re-releases), making it the tenth-highest grossing film of 2014.
It received acclaim for its performances, direction, screenplay, musical score, visual effects, ambition, themes, and emotional weight.
It has also received praise from many astronomers for its scientific accuracy and portrayal of theoretical astrophysics. Since its premiere, Interstellar gained a cult following,[5] and now is regarded by many sci-fi experts as one of the best science-fiction films of all time.
Interstellar was nominated for five awards at the 87th Academy Awards, winning Best Visual Effects, and received numerous other accolades"""

In [7]:
texts = text.split('.')
texts = [t.strip(' \n') for t in texts]
texts = [t.strip() for t in text.split('.') if t.split()]

### 2. Embedding the text chunks


In [None]:
import numpy as np

response = coh.embed(
    texts=texts,
    input_type='search_document'
).embeddings

embeds = np.array(response)
embeds = np.ascontiguousarray(embeds, dtype=np.float32)

print(embeds.shape)

(15, 4096)


In [9]:
print(type(embeds))
print(embeds.dtype)
print(embeds.ndim)
print(embeds.shape)

<class 'numpy.ndarray'>
float32
2
(15, 4096)


### 3. Build the search index


In [None]:
import faiss

dim = embeds.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(np.array(embeds, dtype=np.float32, copy=True))

print('FAISS index size:', index.ntotal)

FAISS index size: 15


### 4. Search the index


In [11]:
import pandas as pd

def search(query, number_of_results=3):
    # 1. get the query's embedding
    query_embed = coh.embed(texts=[query], input_type='search_query').embeddings[0]
    
    # 2. retrieve the nearest neighbours
    distances, similar_item_ids = index.search(np.float32([query_embed]), number_of_results)
    
    # 3. format the results
    texts_np = np.array(texts)
    results = pd.DataFrame(data={
        'texts': texts_np[similar_item_ids[0]],
        'distance': distances[0]
    })
    
    # 4. print and return the results
    print(f"query: '{query}'\nNearest neighbours:")
    
    return results

In [12]:
query = 'how precise was the science'
results = search(query=query)

print(results)

query: 'how precise was the science'
Nearest neighbours:
                                               texts      distance
0  It has also received praise from many astronom...  10757.366211
1  Caltech theoretical physicist and 2017 Nobel l...  11566.133789
2  Interstellar uses extensive practical and mini...  11922.839844


In [14]:
from rank_bm25 import BM25Okapi
from sklearn.feature_extraction import _stop_words
import string

def bm25_tokenizer(text):
    tokenized_doc = []
    
    for token in text.lower().split():
        token = token.strip(string.punctuation)
        
        if len(token) > 0 and token not in _stop_words.ENGLISH_STOP_WORDS:
            tokenized_doc.append(token)
            
    return tokenized_doc

In [16]:
from tqdm import tqdm

tokenized_corpus = []

for passage in tqdm(texts):
    tokenized_corpus.append(bm25_tokenizer(passage))
    
bm25 = BM25Okapi(tokenized_corpus)

100%|██████████| 15/15 [00:00<00:00, 57456.22it/s]


In [17]:
def keyword_search(query, top_k=3, num_candidates=15):
    print('Input question:', query)
    
    bm25_scores = bm25.get_scores(bm25_tokenizer(query))
    top_n = np.argpartition(bm25_scores, -num_candidates)[-num_candidates:]
    bm25_hits = [{
        'corpus_id': idx,
        'score': bm25_scores[idx]
    } for idx in top_n]
    bm25_hits = sorted(bm25_hits, key=lambda x: x['score'], reverse=True)
    
    print(f'Top-3 lexical search (BM25) hists')
    
    for hit in bm25_hits[0:top_k]:
        print('\t{:.3f}\t{}'.format(hit['score'], texts[hit['corpus_id']].replace('\n', ' ')))

In [18]:
keyword_search(query='how precise was the science')

Input question: how precise was the science
Top-3 lexical search (BM25) hists
	1.789	Interstellar is a 2014 epic science fiction film co-written, directed, and produced by Christopher Nolan
	1.373	Caltech theoretical physicist and 2017 Nobel laureate in Physics[4] Kip Thorne was an executive producer, acted as a scientific consultant, and wrote a tie-in book, The Science of Interstellar
	0.000	It stars Matthew McConaughey, Anne Hathaway, Jessica Chastain, Bill Irwin, Ellen Burstyn, Matt Damon, and Michael Caine


### Caveats of dense retrieval


In [19]:
query = 'What is the mass of the moon?'
results = search(query)
print(results)

query: 'What is the mass of the moon?'
Nearest neighbours:
                                               texts      distance
0  Cinematographer Hoyte van Hoytema shot it on 3...  12854.443359
1  The film had a worldwide gross over $677 milli...  13301.007812
2  It has also received praise from many astronom...  13332.000977


## Reranking example


In [20]:
query = 'how precise was the science'
results = coh.rerank(query=query, documents=texts, top_n=3, return_documents=True)
results.results

[RerankResponseResultsItem(document=RerankResponseResultsItemDocument(text='It has also received praise from many astronomers for its scientific accuracy and portrayal of theoretical astrophysics'), index=12, relevance_score=0.15239799),
 RerankResponseResultsItem(document=RerankResponseResultsItemDocument(text='The film had a worldwide gross over $677 million (and $773 million with subsequent re-releases), making it the tenth-highest grossing film of 2014'), index=10, relevance_score=0.050354082),
 RerankResponseResultsItem(document=RerankResponseResultsItemDocument(text='Interstellar is a 2014 epic science fiction film co-written, directed, and produced by Christopher Nolan'), index=0, relevance_score=0.0350424)]

In [21]:
for idx, result in enumerate(results.results):
    print(idx, result.relevance_score, result.document.text)

0 0.15239799 It has also received praise from many astronomers for its scientific accuracy and portrayal of theoretical astrophysics
1 0.050354082 The film had a worldwide gross over $677 million (and $773 million with subsequent re-releases), making it the tenth-highest grossing film of 2014
2 0.0350424 Interstellar is a 2014 epic science fiction film co-written, directed, and produced by Christopher Nolan


In [22]:
def keyword_and_reranking_search(query, top_k=3, num_candidates=10):
    print('Input question:', query)
    
    bm25_scores = bm25.get_scores(bm25_tokenizer(query))
    top_n = np.argpartition(bm25_scores, -num_candidates)[-num_candidates:]
    bm25_hits = [{
        'corpus_id': idx,
        'score': bm25_scores[idx]
    } for idx in top_n]
    
    bm25_hits = sorted(bm25_hits, key=lambda x: x['score'], reverse=True)
    
    print(f'Top-3 lexical search (BM25) hits')
    
    for hit in bm25_hits[0:top_k]:
        print('\t{:.3f}\t{}'.format(hit['score'], texts[hit['corpus_id']].replace('\n', ' ')))
        
    docs = [texts[hit['corpus_id']] for hit in bm25_hits]
    
    print(f'\nTop-3 hits by rank-API ({len(bm25_hits)} BM25 hits re-ranked)')
    
    results = coh.rerank(query=query, documents=docs, top_n=top_k, return_documents=True)
    
    for hit in results.results:
        print('\t{:.3f}\t{}'.format(hit.relevance_score, hit.document.text.replace('\n', ' ')))

In [23]:
keyword_and_reranking_search(query='how precise was the science')

Input question: how precise was the science
Top-3 lexical search (BM25) hits
	1.789	Interstellar is a 2014 epic science fiction film co-written, directed, and produced by Christopher Nolan
	1.373	Caltech theoretical physicist and 2017 Nobel laureate in Physics[4] Kip Thorne was an executive producer, acted as a scientific consultant, and wrote a tie-in book, The Science of Interstellar
	0.000	Interstellar uses extensive practical and miniature effects and the company Double Negative created additional digital effects

Top-3 hits by rank-API (10 BM25 hits re-ranked)
	0.035	Interstellar is a 2014 epic science fiction film co-written, directed, and produced by Christopher Nolan
	0.032	It stars Matthew McConaughey, Anne Hathaway, Jessica Chastain, Bill Irwin, Ellen Burstyn, Matt Damon, and Michael Caine
	0.031	Caltech theoretical physicist and 2017 Nobel laureate in Physics[4] Kip Thorne was an executive producer, acted as a scientific consultant, and wrote a tie-in book, The Science of In

## Retreival augmented generation


### Example: grounded generation with an LLM API


In [25]:
query = 'income generated'

# 1. retrieval
results = search(query)

# 2. grounded generation
docs_dict = [{
    'text': text
} for text in results['texts']]

response = coh.chat(message=query, documents=docs_dict)

print(response.text)

query: 'income generated'
Nearest neighbours:
The film Interstellar generated a worldwide gross of over $677 million, and $773 million with subsequent re-releases.


In [26]:
response



In [27]:
response.citations

[ChatCitation(start=9, end=21, text='Interstellar', document_ids=['doc_1', 'doc_2'], type='TEXT_CONTENT'),
 ChatCitation(start=34, end=70, text='worldwide gross of over $677 million', document_ids=['doc_0'], type='TEXT_CONTENT'),
 ChatCitation(start=76, end=117, text='$773 million with subsequent re-releases.', document_ids=['doc_0'], type='TEXT_CONTENT')]

### Example: RAG with local models


#### Loading the generation model


In [28]:
!wget https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf

--2025-12-28 09:02:10--  https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf
Resolving huggingface.co (huggingface.co)... 18.239.50.49, 18.239.50.16, 18.239.50.80, ...
Connecting to huggingface.co (huggingface.co)|18.239.50.49|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cas-bridge.xethub.hf.co/xet-bridge-us/662698108f7573e6a6478546/df220524a4e4a750fe1c325e41f09ff69137f38b52d8831ba22dcbee3cc8ab6d?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=cas%2F20251228%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20251228T090210Z&X-Amz-Expires=3600&X-Amz-Signature=5cc3348d21adec6f1d7a3d0501fe1399dc2b1edfe4648d22f4f48f75ccad9fc0&X-Amz-SignedHeaders=host&X-Xet-Cas-Uid=public&response-content-disposition=inline%3B+filename*%3DUTF-8%27%27Phi-3-mini-4k-instruct-q4.gguf%3B+filename%3D%22Phi-3-mini-4k-instruct-q4.gguf%22%3B&x-id=GetObject&Expires=1766916130&Policy=eyJTdGF0

In [30]:
from langchain import LlamaCpp

llm = LlamaCpp(
    model_path='Phi-3-mini-4k-instruct-q4.gguf',
    n_gpu_layers=-1,
    max_tokens=500,
    n_ctx=2048,
    seed=42,
    verbose=False
)

### Loading the embedding model


In [32]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(model_name='BAAI/bge-small-en-v1.5')

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

### Prepare the vector database


In [33]:
from langchain.vectorstores import FAISS

db = FAISS.from_texts(texts, embedding_model)

### RAG prompt


In [34]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA

template = '''<|user|>
{context}

Provide a concise answer to the following question using the relevant information provided above:
{question}<|end|>
<|assistant|>'''

prompt = PromptTemplate(template=template, input_variables=['context', 'question'])

rag = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever=db.as_retriever(),
    chain_type_kwargs={
        'prompt': prompt
    },
    verbose=True
)

In [35]:
rag.invoke('income generated')



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


{'query': 'income generated',
 'result': ' The text does not provide specific information about the income generated from "Interstellar." It details aspects related to production, release methods, cinematography, and contributions of individuals like Kip Thorne but doesn\'t mention any financial earnings. To answer questions regarding its income, one would need additional data on box office earnings or sales figures.'}