In [None]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

In [1]:
!pip install transformers datasets faiss-cpu sentence-transformers

Collecting transformers
  Using cached transformers-4.47.1-py3-none-any.whl.metadata (44 kB)
Collecting datasets
  Using cached datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting faiss-cpu
  Using cached faiss_cpu-1.9.0.post1-cp310-cp310-win_amd64.whl.metadata (4.5 kB)
Collecting sentence-transformers
  Using cached sentence_transformers-3.3.1-py3-none-any.whl.metadata (10 kB)
Collecting filelock (from transformers)
  Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB)
Collecting huggingface-hub<1.0,>=0.24.0 (from transformers)
  Downloading huggingface_hub-0.27.1-py3-none-any.whl.metadata (13 kB)
Collecting numpy>=1.17 (from transformers)
  Downloading numpy-2.2.1-cp310-cp310-win_amd64.whl.metadata (60 kB)
Collecting pyyaml>=5.1 (from transformers)
  Using cached PyYAML-6.0.2-cp310-cp310-win_amd64.whl.metadata (2.1 kB)
Collecting regex!=2019.12.17 (from transformers)
  Using cached regex-2024.11.6-cp310-cp310-win_amd64.whl.metadata (41 kB)
Collecting requests (fro

In [1]:
import torch
print(torch.cuda.is_available())

True


In [2]:
sample = """
AWS Health provides improved visibility into planned lifecycle events

Posted On: Nov 9, 2023

AWS Health introduces new features to help you manage planned lifecycle events, such as Amazon EKS Kubernetes version end of standard support, Amazon RDS certificate rotations, and end of support for other open source software. AWS Health is the authoritative source of information about service events and scheduled changes affecting your AWS cloud resources.

These new features provide timely visibility into upcoming planned lifecycle events, a standardized data format that allows you to prepare and take actions, as well as the ability to dynamically track the completion of required actions at the resource-level. AWS Health also provides organization-wide visibility into planned lifecycle events for teams that manage workloads across the company.
"""

In [10]:
from sentence_transformers import SentenceTransformer
import faiss

# Load embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Embed the documents
documents = [sample]  # Replace with your dataset
embeddings = embedding_model.encode(documents)

# Build FAISS index
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

In [11]:
query = "What does AWS Health do?"
query_embedding = embedding_model.encode([query])
_, retrieved_docs = index.search(query_embedding, k=3)
context = " ".join([documents[i] for i in retrieved_docs[0]])

In [12]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# Load generative model
tokenizer = AutoTokenizer.from_pretrained("t5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")

# Prepare input
input_text = f"Context: {context} Query: {query}"
inputs = tokenizer(input_text, return_tensors="pt", truncation=True)

# Generate response
outputs = model.generate(**inputs)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


AWS Health provides organization-wide visibility into planned lifecycle events for teams that manage workloads across


In [None]:
def rag_pipeline(query, model, tokenizer, retrieval_model, faiss_index, documents):
    query_embedding = retrieval_model.encode([query])
    _, retrieved_docs = faiss_index.search(query_embedding, k=3)
    context = " ".join([documents[i] for i in retrieved_docs[0]])

    input_text = f"Context: {context} Query: {query}"
    inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
    outputs = model.generate(**inputs)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
import gradio as gr


def generate_response(query):
    return rag_pipeline(query, model, tokenizer, retrieval_model, index, documents)


gr.Interface(fn=generate_response, inputs="text", outputs="text").launch()