# Setting up the api key for<br> Embedding ( we would use it for retrieval)

In [None]:
import google.generativeai as genai # For embedding for the retrieval
# Configure the Gemini API
genai.configure(api_key='YOUR_GEMINI_API_KEY')

# Filtering the prompt of the user<br>and matching the requirements<br>to the available data in the dataset

In [None]:
import os
from typing import Optional
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from langchain_google_genai import ChatGoogleGenerativeAI
import json

# Define the list of programming languages
TABS_COL = ['C++', 'C Sharp', 'Java', 'JavaScript', 'Python', 'Mojo', 'Rust']

class LanguageDetector(BaseModel):
    """Pydantic model to parse the language detection result."""
    language: Optional[str] = Field(
        description="The detected programming language or None if no match"
    )

def create_language_detection_prompt() -> PromptTemplate:
    parser = PydanticOutputParser(pydantic_object=LanguageDetector)
    template = """You are an assistant tasked with identifying if a user's question is related to one of the following programming languages: {language_list}. 
If the question is related to one of these languages, respond with the language name; otherwise, respond with 'none'. 
Ensure your response contains only the language name or 'none' without any additional text.
Question: {question}
Response: {format_instructions}"""
    prompt = PromptTemplate(
        template=template,
        input_variables=["question", "language_list"],
        partial_variables={
            "format_instructions": parser.get_format_instructions()
        },
        output_parser=parser
    )
    
    return prompt

def detect_programming_language(user_prompt: str) -> Optional[str]:
    # Ensure the Google API key is set
    if not os.environ.get("GOOGLE_API_KEY"):
        raise ValueError("Please set the GOOGLE_API_KEY environment variable")
    
    # Initialize the language detection prompt
    prompt = create_language_detection_prompt()
    
    # Initialize the Gemini Flash LLM
    llm = ChatGoogleGenerativeAI(
        model="gemini-pro",
        temperature=0.2
    )
    
    # Create the chain
    chain = prompt | llm
    
    try:
        # Invoke the chain
        result = chain.invoke({
            "question": user_prompt,
            "language_list": ", ".join(TABS_COL)
        })   
        return json.loads(result.content)
    except Exception as e:
        print(f"Error detecting language: {e}")
        return None
# Set your Google API key
os.environ["GOOGLE_API_KEY"] = "YOUR_GEMINI_API_KEY"



Prompt: 'What is c ++ ?'
Detected Language: C++



# test

In [None]:
# Test cases
test_prompts = [
    "What is c ++ ?"
]

for prompt in test_prompts:
    detected_language = detect_programming_language(prompt)
    print(f"Prompt: '{prompt}'\nDetected Language: {detected_language['language']}\n")

# Checking 

In [None]:
if detected_language['language'] in TABS_COL :
    print('in')
else :
    print('None')

# Loading the matched document <br>from the prompt of the user

In [None]:
import faiss

def load_rag_system(folder_path):
    # Load FAISS index
    index = faiss.read_index(os.path.join(folder_path, "index.faiss"))
    
    # Load documents (using JSON in this example)
    with open(os.path.join(folder_path, "documents.json"), "r") as f:
        documents = json.load(f)
    
    return index, documents

index , documents_json= load_rag_system(detected_language['language'])

sorted_data = sorted(documents_json.values(), key=lambda x: x['doc_id'])

# Extract the 'text' values from the sorted data
documents = [item['text'] for item in sorted_data]


# Defining the retriever of the chuncks

In [None]:
import numpy as np
def embed_text(text):
    """Embed text using Gemini's embedding-001 model."""
    embedding_result = genai.embed_content(
            model='models/embedding-001',
            content=text,
            task_type='retrieval_query'
        )
    return embedding_result



def retriever(query, k=3):
    embedding_result = embed_text(query)
    query_embedding = np.array(embedding_result['embedding']).astype('float32').reshape(1, -1)
    
    # Normalize the query vector
    faiss.normalize_L2(query_embedding)
    
    distances, indices = index.search(query_embedding, k)
    retrieved_docs = [documents[i] for i in indices[0]]
    return retrieved_docs

# Prompt engeneering and the final call

In [None]:
def generate_response(query, retrieved_docs):
    model = genai.GenerativeModel('models/gemini-1.5-flash')
    


    prompt = f"""Given the context and query, extract the most relevant facts:
    Context:
    {' '.join(retrieved_docs)}
    Query: {query}
    Provide a concise, factual response."""


    response = model.generate_content(prompt)
    return response.text

# Example usage
query = "what What do you think about c++ ?"
retrieved_docs = retriever(query)
response = generate_response(query, retrieved_docs)

print(f"Query: {query}")
print(f"Response: {response}")