In [1]:
#import dependencies

import json
import google.generativeai as genai
import os
from dotenv import load_dotenv
import ast
import pandas
import sys
import time
import requests
import chromadb
from chromadb.utils import embedding_functions
from datasets import load_dataset
load_dotenv()

True

In [2]:
# download dataset from HF and Convert to Pandas DataFrame



# data=pandas.read_csv('Content_Storage_df.csv')
# Content_Storage_df = pandas.DataFrame(data)


In [3]:
chroma_client = chromadb.PersistentClient(path="./chroma_db")
collection = chroma_client.get_collection("vector_embeddings")
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

In [41]:
def retrieve_context(question, top_k=3):
    embedding_fn = embedding_functions.DefaultEmbeddingFunction()
    question_embedding = embedding_fn([question])[0]

    # Retrieve top-k matching documents
    results = collection.query(
        query_embeddings=[question_embedding],
        n_results=top_k
    )
    if results["documents"]:
        # print(results["documents"])
        flat_documents = [doc for sublist in results["documents"] for doc in sublist]
        return " ".join(flat_documents) if flat_documents else "No relevant context found."
    
    return "No relevant context found."

def retrieve_grader_function(question):
    
    chat_session = retrieve_grader_1.start_chat(
                history=history
            )
    
    response = chat_session.send_message(question)
    
    model_response=response.text
    return model_response

# Web search function using Tavily API
def web_search(query, num_results=3):
    api_key = "your tavily api"  # Get API key from environment variable
    if not api_key:
        raise ValueError("Tavily API key is missing! Set 'TAVILY_API_KEY' as an environment variable.")

    url = "https://api.tavily.com/search"  # Tavily API endpoint

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"  # Use Bearer Token authentication
    }

    payload = {
        "query": query,
        "num_results": num_results
    }

    try:
        response = requests.post(url, json=payload, headers=headers)  # Use POST instead of GET
        response.raise_for_status()  # Raise error for bad responses (4xx, 5xx)
        data = response.json()
        if "results" in data and data["results"]:
            return "\n".join([res["content"] for res in data["results"]])
        else:
            return "No results found."

    except requests.exceptions.RequestException as e:
        return f"Error fetching data: {e}"

def answer_generator_function(question):
    
    chat_session = answer_generator_2.start_chat(
                history=history
            )
    
    response = chat_session.send_message(question)
    
    model_response=response.text
    return model_response
    
def hallucination_detection_function(question):
    
    chat_session = hallucination_detection_3.start_chat(
                history=history
            )
    
    response = chat_session.send_message(question)
    
    model_response=response.text
    return model_response
def question_resolving_detection_function(question):
    
    chat_session = question_resolving_detection_4.start_chat(
                history=history
            )
    
    response = chat_session.send_message(question)
    
    model_response=response.text
    return model_response


#Full Path
def Full_Flow(question):
    document = retrieve_context(question)


    model_input = f"question : {question} , document : {document}"
    output = retrieve_grader_function(model_input)
    
    if 'yes' in output:
        print('document found in database')
    elif 'no' in output:
        print('searching web.....')
        print('document found on web.....')
        document = web_search(question)
        if document == "No results found.":
            print('no result found on web')
            print('another alternative to be found for this.....')
            return None
    else:
        print('unexpeted error at web document retrieval part')
        
    #Generation of answer based on context
    model_input = f"question : {question},context : {document}"
    answer = answer_generator_function(model_input)
    print('answer fetched from document')

    #Hallucination detection to check the correctness of answer
    hallucination_check_input = f"context : {document}, answer : {answer}"
    hallucination_output = hallucination_detection_function(hallucination_check_input)
    if 'yes' in hallucination_output:
        print('hallucination detected')
        print('another alternative to be found for this.....')
        return None
    elif 'no' in hallucination_output:
        print('no hallucination detected')
        question_resolver_input = f' question: {question}, answer: {answer}'
        question_resolver_output = question_resolving_detection_function(question_resolver_input)
        if 'no' in question_resolver_output:
            print('the generated answer do not resolve the query\n')
            print('another alternative to be found for this.....')
            return None
        elif 'yes' in question_resolver_output:
            print('generated answer will resolve the query\n\n')
            return 'answer :'+answer
    else:
        print('error at hallucination detection output')
        return None

In [42]:
# Create the model
generation_config = {
  "temperature": 1,
  "top_p": 0.95,
  "top_k": 40,
  "max_output_tokens": 8190,
  "response_mime_type": "text/plain",
}

retrieve_grader_1 = genai.GenerativeModel(
  model_name="gemini-1.5-flash-8b",
  generation_config=generation_config,
  system_instruction="""You are a grader assessing the relevance of a retrieved document to a user question. 
                        Give a binary score 'yes' or 'no' to indicate whether the document is relevant.
                        Provide the binary score as JSON with a single key 'score'.
                        input format is 'question : question , document : document'. """
)

answer_generator_2 = genai.GenerativeModel(
  model_name="gemini-1.5-flash-8b",
  generation_config=generation_config,
  system_instruction="""You are an AI assistant designed for question-answering tasks.  
                        Use the provided context to generate accurate and relevant answers.  
                        If the answer is not found in the context, respond with "I don't know."  
                        Keep your response concise, with a maximum of three sentences.  
                        End with a positive thought related to the question. """
)
hallucination_detection_3 = genai.GenerativeModel(
  model_name="gemini-1.5-flash-8b",
  generation_config=generation_config,
  system_instruction="""You are verifying whether the model-generated answer is factually correct based on the provided context.  
                        If the answer includes information not found in the context, classify it as hallucinated.  
                        Respond with a JSON object containing a single key `"hallucination"`, with a value of `"yes"` or `"no"`.  
                        
                        Output Format:  
                        {
                          "hallucination": "yes"  // If the answer contains hallucinated information  
                        }  
                        {
                          "hallucination": "no"   // If the answer is fully supported by the context  
                        }   """
)
question_resolving_detection_4 = genai.GenerativeModel(
  model_name="gemini-1.5-flash-8b",
  generation_config=generation_config,
  system_instruction="""You are a grader evaluating whether an answer is useful in resolving the given question.  
                        Assess if the answer is relevant, clear, and provides sufficient information to address the question.  
                        Respond with a JSON object containing a single key `"score"`, with a value of `"yes"` or `"no"`.  
                        
                        Input Format:  
                        question: {question}, answer: {answer}  
                        
                        Output Format:  
                        {
                          "score": "yes"  // If the answer is useful  
                        }  
                        {
                          "score": "no"   // If the answer is not useful  
                        }  
                        """
)

In [43]:
history=[]

question = 'who is sachin tendulkar?'
answer = Full_Flow(question)
print(answer)

searching web.....
document found on web.....
answer fetched from document
no hallucination detected
generated answer will resolve the query


answer :Sachin Tendulkar is an Indian cricket player considered one of the greatest batsmen.  He was India's youngest Test cricketer at age 16 and scored a century on his first-class debut.  He holds numerous records, including scoring 10,000 runs in ODI competition.  His incredible career is inspiring.



In [None]:
# while True:
#     escapers= ['exit']
#     question = input('USER : ')
#     answer= 'MAHARAJ :' + generate_answer(question)
 
#     if question.lower() in escapers:
#         break
#     for char in answer:
#         sys.stdout.write(char)  # Write character without newline
#         sys.stdout.flush()      # Force immediate output
#         time.sleep(0.05)         # Adjust speed
#     print()