In [None]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from transformers import pipeline
import torch
import time

# Install necessary libraries if not already present
try:
    import pennylane as qml
except ImportError:
    !pip install pennylane
    import pennylane as qml


class EfficientTamilRAG:

   def __init__(self, data_path='/content/dataset_KissanVanni_tamil.csv', sample_size=20000):
        """
        Initialize RAG system with a subset of the data
        """
        # Load and sample the dataset
        self.df = pd.read_csv(data_path)
        if len(self.df) > sample_size:
            self.df = self.df.sample(n=sample_size, random_state=42).reset_index(drop=True)

        # Initialize models
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.embedding_model = SentenceTransformer(
            'paraphrase-multilingual-MiniLM-L12-v2',
            device=self.device
        )

        # Precompute embeddings in batches to save memory
        self._precompute_embeddings()

        # Initialize smaller generation model for efficiency
        self.generator = pipeline(
            'text-generation',
            model='google/flan-t5-base', # Example: Flan-T5
            device=0 if self.device == 'cuda' else -1
        )



    def _precompute_embeddings(self, batch_size=256):
      """Compute embeddings in batches to manage memory, potentially using quantum embeddings"""
      print("Precomputing embeddings...")
      start_time = time.time()

      self.question_embeddings = []
      for i in range(0, len(self.df), batch_size):
          batch = self.df['question'].iloc[i:i+batch_size].tolist()
          # Classical Embeddings
          batch_embeddings = self.embedding_model.encode(
              batch,
              convert_to_tensor=True,
              show_progress_bar=False
          )
          self.question_embeddings.append(batch_embeddings.cpu().numpy()) #Keep classical as a backup

      self.question_embeddings = np.concatenate(self.question_embeddings)
      print(f"Embeddings computed in {time.time()-start_time:.2f} seconds")


    def quantum_embedding_function(self, classical_embeddings):
        """
        This is a placeholder for a quantum embedding function.
        You would replace this with your actual quantum embedding algorithm.
        """

        dev = qml.device("default.qubit", wires=4)

        @qml.qnode(dev)
        def circuit(inputs):
          # quantum circuit goes here

          # example
          qml.RX(inputs[0], wires=0)
          qml.RY(inputs[1], wires=1)
          return [qml.expval(qml.PauliZ(i)) for i in range(4)]

        quantum_embeddings = []
        for embedding in classical_embeddings:
          quantum_embeddings.append(circuit(embedding[:2])) # replace with your quantum embedding logic

        return np.array(quantum_embeddings)


In [7]:
# prompt: any query as input and respond from rag

import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from transformers import pipeline
import torch
import time

if __name__ == "__main__":
    # Initialize with limited rows
    rag = EfficientTamilRAG(sample_size=20000)

    # Get user query as input
    user_query = input("Enter your Tamil agricultural query: ")

    print(f"\n{'='*50}")
    print(f"Query: {user_query}")

    start_time = time.time()
    result = rag.generate(user_query)
    elapsed = time.time() - start_time

    print(f"Answer ({elapsed:.2f}s): {result['answer']}")
    print("\nRetrieved Context:")
    for i, ctx in enumerate(result['context'], 1):
        print(f"{i}. [Score: {ctx['score']:.3f}] {ctx['question']}")
        print(f"   {ctx['answer']}")


Precomputing embeddings...
Embeddings computed in 9.12 seconds


Device set to use cuda:0
The model 'T5ForConditionalGeneration' is not supported for text-generation. Supported models are ['AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'Gemma3ForCausalLM', 'Gemma3ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'GotOcr2ForConditionalGeneration', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseF

Enter your Tamil agricultural query: அமெரிக்காவில் எந்தெந்த நாடுகளில் மீன் பண்ணைகள் உள்ளன


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.



Query: அமெரிக்காவில் எந்தெந்த நாடுகளில் மீன் பண்ணைகள் உள்ளன




Answer (0.26s): 

Retrieved Context:
1. [Score: 1.000] அமெரிக்காவில் எந்தெந்த நாடுகளில் மீன் பண்ணைகள் உள்ளன
   கலிபோர்னியா, இடாஹோ, அலபாமா, ஆர்கன்சாஸ், லூசியானா, மிசிசிப்பி மற்றும் தென்கிழக்கு யு.எஸ் கடற்கரையில்
2. [Score: 1.000] அமெரிக்காவில் எந்தெந்த நாடுகளில் மீன் பண்ணைகள் உள்ளன
   கலிபோர்னியா, இடாஹோ, அலபாமா, ஆர்கன்சாஸ், லூசியானா, மிசிசிப்பி மற்றும் தென்கிழக்கு யு.எஸ் கடற்கரையில்
3. [Score: 1.000] அமெரிக்காவில் எந்தெந்த நாடுகளில் மீன் பண்ணைகள் உள்ளன
   கலிபோர்னியா, இடாஹோ, அலபாமா, ஆர்கன்சாஸ், லூசியானா, மிசிசிப்பி மற்றும் தென்கிழக்கு யு.எஸ் கடற்கரையில்


In [9]:
# prompt: save the raq system for future use

import pickle

# Assuming 'rag' is your initialized EfficientTamilRAG object
with open('rag_system.pkl', 'wb') as f:
    pickle.dump(rag, f)


In [None]:

import pickle

# Load the saved model
with open('rag_system.pkl', 'rb') as f:
    rag = pickle.load(f)

# Get user query as input
user_query = input("Enter your Tamil agricultural query: ")

print(f"\n{'='*50}")
print(f"Query: {user_query}")

start_time = time.time()
result = rag.generate(user_query)
elapsed = time.time() - start_time

print(f"Answer ({elapsed:.2f}s): {result['answer']}")
print("\nRetrieved Context:")
for i, ctx in enumerate(result['context'], 1):
    print(f"{i}. [Score: {ctx['score']:.3f}] {ctx['question']}")
    print(f"   {ctx['answer']}")
