## Library Imports

In [None]:
%load_ext autoreload 
%autoreload 2
import os
import nest_asyncio

nest_asyncio.apply()

After 2 years of reading and testing every 𝘁𝗶𝗺𝗲 𝘀𝗲𝗿𝗶𝗲𝘀 𝗳𝗼𝘂𝗻𝗱𝗮𝘁𝗶𝗼𝗻 𝗺𝗼𝗱𝗲𝗹, my conclusion is this:

➡️ 𝗗𝗲𝗰𝗼𝗱𝗲𝗿-𝗼𝗻𝗹𝘆 models lead in forecasting.

➡️ 𝗘𝗻𝗰𝗼𝗱𝗲𝗿 𝗺𝗼𝗱𝗲𝗹𝘀 work better for "time series understanding" tasks—e.g. imputation, anomaly detection.

➡️ 𝗘𝗻𝗰𝗼𝗱𝗲𝗿-𝗗𝗲𝗰𝗼𝗱𝗲𝗿 𝗺𝗼𝗱𝗲𝗹𝘀 (e.g. Chronos) remain underexplored. TimeGPT is likely one.

This mirrors NLP: encoders for supervised tasks like text classification, decoders for text generation.

Btw, a remarkable forecasting model is Toto. Tutorials in the comments! 👇

### Variables

In [None]:
QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost")
QDRANT_PORT = int(os.getenv("QDRANT_PORT", 6333))
OLLAMA_BASE_URL = os.getenv("OLLAMA_HOST", "localhost")
OLLAMA_PORT = int(os.getenv("OLLAMA_PORT", 11434))
DATA_DIR = "../docs"
REQUIRED_EXTS = [".txt"]

## Setup the Qdrant vector DB 

In [None]:
import qdrant_client

collection_name = "rag_cc"
client = qdrant_client.QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)

### Read the documents from a DIR

In [None]:
from llama_index.core import SimpleDirectoryReader

input_dir_path = DATA_DIR
loader = SimpleDirectoryReader(
    input_dir=DATA_DIR, required_exts=REQUIRED_EXTS, recursive=True
)
docs = loader.load_data()

In [None]:
type(docs), len(docs)

## Create an Index

In [None]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import VectorStoreIndex, StorageContext


def create_index(documents):
    # Create a QdrantVectorStore instance
    vector_store = QdrantVectorStore(client=client, collection_name=collection_name)

    # Configure storage settings by specifying the vector store as the storage backend
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # Create an index by embedding each document and storing it in the vector store
    index = VectorStoreIndex.from_documents(
        documents=documents, storage_context=storage_context
    )
    return index

### Load the embedding model and index the data 

In [None]:
from llama_index.embeddings.fastembed import FastEmbedEmbedding
from llama_index.core import Settings

embed_model = FastEmbedEmbedding(
    model_name="BAAI/bge-large-en-v1.5",
)
# Add the embedding model to the settings, to be used by the index creation process
Settings.embed_model = embed_model
index = create_index(docs)

### Load the LLM 

In [None]:
from llama_index.llms.ollama import Ollama

llm = Ollama(model="gemma3n:e2b", base_url=OLLAMA_BASE_URL, request_timeout=60)
Settings.llm = llm

### Define the Prompt Template 

In [None]:
from llama_index.core import PromptTemplate

template = """Context information is below:
              ---------------------
              {context_str}
              ---------------------
              Given the context information above I want you to think
              step by step to answer the query in a crisp manner,
              incase you don't know the answer say 'I don't know!'
            
              Query: {query_str}
        
              Answer:"""

qa_prompt_tmpl = PromptTemplate(template)

### Reranking 
- In re-ranking step we use a more sophisticated model (often a cross-encoder) evaluates the initial list of retrieved chunks alongside the query to assign a relevance score to each chunk

In [None]:
from llama_index.core.postprocessor import SentenceTransformerRerank

rerank = SentenceTransformerRerank(model="BAAI/bge-reranker-base", top_n=2)

### Query the Document 

- The query engine integrates the retrieval, re-ranking, and prompt based response generation steps.

In [None]:
query_engine = index.as_query_engine(similarity_top_k=10)
query_engine.update_prompts({"response_synthesizer:text_qa_template": qa_prompt_tmpl})

In [None]:
response = query_engine.query(
    """How did the structure of funding startups in batches contribute to the success and growth of the Y Combinator program and the startups involved?"""
)

In [None]:
from IPython.display import Markdown, display

display(Markdown(str(response)))


## Generating Evaluation Dataset using Ragas

### Load the knowledge Base 

In [None]:
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
loader = DirectoryLoader(
    "../docs/paul_graham",
)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=20)
documents = loader.load_and_split(text_splitter=text_splitter)

### Setting up Models

In [None]:
from langchain_ollama import ChatOllama
from langchain_ollama import OllamaEmbeddings

generator_llm = ChatOllama(model="phi3:3.8b", base_url=OLLAMA_BASE_URL)
critic_llm = ChatOllama(model="llama3.2:1b", base_url=OLLAMA_BASE_URL)
ollama_emb = OllamaEmbeddings(model="nomic-embed-text", base_url=OLLAMA_BASE_URL)

### Creating Ragas Testset Generator 

In [None]:
# generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
# dataset = generator.generate_with_langchain_docs(docs, testset_size=10)

In [None]:
from ragas.testset import TestsetGenerator
import pandas as pd

In [None]:
generator = TestsetGenerator.from_langchain(generator_llm, critic_llm, ollama_emb)

In [None]:
# FAILS, becaus of some dependency issues with langchain
# distribution = {"simple":0.6, "reasoning":0.3, "multi_context":0.245}
# testset = generator.generate_with_langchain_docs(documents, testset_size=10, query_distribution=distribution, raise_exceptions=True)

In [None]:
# Load the testset from a file
# test_df = testset.to_pandas().dropna()
test_df = pd.read_csv("../docs/paul_graham/test_data_paul_graham.csv").dropna()

- Below function that will accept the query engine and a question, and return the answer along with the context it looked at to generate the corresponding answer

In [None]:
def generate_response(query_engine, question):
    response = query_engine.query(question)
    return {
        "answer": response.response,
        "context": [c.node.get_content() for c in response.source_nodes],
    }

In [None]:
from datasets import Dataset
from tqdm.auto import tqdm

test_questions = test_df["question"].values

responses = [generate_response(query_engine, q) for q in tqdm(test_questions)]

In [None]:
for i, response in enumerate(responses):
    print(response.keys())
    break

In [None]:
dataset_dict = {
    "question": test_questions,
    "answer": [response["answer"] for response in responses],
    "contexts": [response["context"] for response in responses],
    "ground_truth": test_df["ground_truth"].values.tolist(),
}

ragas_eval_dataset = Dataset.from_dict(dataset_dict)

## Metric Computation

In [None]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.llms import Ollama

from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_correctness,
    context_recall,
    context_precision,
)

In [None]:
metrics = [faithfulness, answer_correctness, context_recall, context_precision]

evaluation_result = evaluate(
    llm=critic_llm, embeddings=ollama_emb, dataset=ragas_eval_dataset, metrics=metrics
)

In [None]:
eval_scores_df = pd.DataFrame(evaluation_result.scores)
eval_scores_df