In [None]:
from sentence_transformers import SentenceTransformer
import faiss
from transformers import AutoTokenizer , AutoModelForSeq2SeqLM
import numpy as np

def build_index(docs) :
  model = SentenceTransformer('all-MiniLM-L6-v2')
  embeddings = model.encode(docs , convert_to_numpy=True)
  d = embeddings.shape[1]
  index = faiss.IndexFlatL2(d)
  index.add(embeddings)
  return index , embeddings , model

def retrieve(index , model , query , k=2) :
  q_emb = model.encode([query] , convert_to_numpy=True)
  D , I = index.search(q_emb , k)
  return I[0]


def answer_question(question , docs , index , embed_model) :
  ids = retrieve(index , embed_model, question , k = 2 )
  context = "\n".join([docs[i] for i in ids])
  prompt = f"Context: {context}\nQuestion:{question}\nAnswer."
  model_name = "google/flan-t5-small"
  tokenizer = AutoTokenizer.from_pretrained(model_name)
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
  inputs = tokenizer(prompt , return_tensors='pt',truncation=True , max_length=512)
  out = model.generate(**inputs , max_new_tokens =64)
  return tokenizer.decode(out[0] , skip_special_tokens=True)

def main() :
  docs = [
      "The Nile is the longest river in Africa and flows through Egypt",
      "Mount Everest is the highest Mountain in the world located in the Himalayas",
      "Python is a popular programming language for data science."
  ]

  index , embeddings , s_model = build_index(docs)
  q = "Which River Flows Through Egypt ? "
  ans = answer_question(q , docs , index , s_model)
  print("Question:",q)
  print("Answer:",ans)