In [1]:
# Load the .env file
#pip install -U python-dotenv
import os
from dotenv import load_dotenv
load_dotenv(encoding='utf-8')

True

# Cohere Toolkit Backend

In [8]:
def cohere_rag_get_answers(message):
   import requests
   import cohere
   import json
   import os
   os.environ['COHERE_API_KEY'] = os.getenv('COHERE_API_KEY')

   # Define the API endpoint for streaming
   url = "http://localhost:8000/v1/chat"
   #bearer = os.getenv('BEARER_SECRET_KEY')
   bearer = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJjb2hlcmUtdG9vbGtpdCIsImlhdCI6MTcyNjU5NDQyNiwiZXhwIjoxNzM0MzcwNDI2LCJqdGkiOiI2YWI4MmU0ZS03Mzg3LTRkZjgtYTI2Yy0wZTU5M2NiOWJjOTEiLCJjb250ZXh0Ijp7ImlkIjoiNTQ3YzE2YjQtOGNjYy00ZDhiLTkwMzQtOWFkZDA0MDQwMTQ4IiwiZnVsbG5hbWUiOiJtaW5oIGR1b25nIiwiZW1haWwiOiJkcW1pbmh2QGdtYWlsLmNvbSJ9fQ.BSWq1TdgG8OElWEgJdgefyDQEzpqAB3dUjNk6sG0o_c"

   # Set headers
   headers = {
       "User-Id": "me",
       "Content-Type": "application/json",
       "Authorization": f"Bearer {bearer}",
       "Cohere-Stream": "true",  # Enable streaming for chatbot responses
  
   }

   # Create the payload as a JSON dictionary
   data = {"message": message,
           # "model":"command-r",
           "tools": [{"name": "custom_retriever"}]}

   # Send the POST request using requests
   response = requests.post(url, headers=headers, json=data)

   # Check for successful response
   if response.status_code == 200:
     # Handle streaming response
     for line in response.iter_lines():
       # Decode the response (if necessary)
       decoded_line = line.decode("utf-8")
       # Process the received data from the stream (print it here)
       response_data = json.loads(decoded_line)  # Parse the JSON string
       #print(decoded_line)
       return {
      'answer': response_data.get("text"),
      'contexts': response_data.get("documents")
      }
   else:
     print(f"Error: {response.status_code}")
     return {
       'answer': "Error: {response.status_code}",
      'contexts': []
     }

In [9]:
cohere_rag_get_answers("What is the purpose of the orientation session at Tech Innovators Inc.?")

{'answer': "The orientation session at Tech Innovators Inc. serves multiple purposes. Firstly, it aims to introduce new employees to the company's culture, values, and overall work environment. It's an essential way to help newcomers feel welcomed and connected to the organization from their very first day. \n\nDuring the orientation session, employees are likely to be introduced to:\n\n1. **Company Policies**: New hires will be informed about the company's policies, including those related to employee conduct, work hours, leave policies, and any other relevant guidelines they need to be aware of.\n\n2. **Benefits Package**: The session should outline the benefits of working at Tech Innovators Inc., covering healthcare options, retirement plans, professional development allowances, and any other perks the company offers.\n\n3. **Workplace Safety**: Health and safety protocols are important, and the orientation will likely cover emergency procedures, workplace safety guidelines, and any

# RAG answer generation

In [17]:
import json

def test_answer_generation(json_file_path):
    """Appends generated text based on questions in a JSON file.

    Args:
        json_file_path (str): The path to the JSON file.

    Raises:
        FileNotFoundError: If the JSON file is not found.
    """

    try:
        with open(json_file_path, 'r') as f:
            data = json.load(f)

        for item in data:
            question = item['question']
            response = cohere_rag_get_answers(question)  # Assuming generate_text returns a string
            item['response'] = response.get('answer')
            item['retrieved_contexts'] = response.get('contexts')

        with open(json_file_path, 'w') as f:
            json.dump(data, f, indent=4)

    except FileNotFoundError:
        print(f"Error: JSON file not found at {json_file_path}")


In [19]:
# Example usage:
from from_root import from_root
import os
folder = "data-test/test_dataset/test_dataset_hr.json"
json_file_path = os.path.join(from_root(), folder)
test_answer_generation(json_file_path)

# Custom RAG

## Retriever

In [3]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_mongodb import MongoDBAtlasVectorSearch
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from pymongo import MongoClient

In [5]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
llm = ChatOpenAI(model=os.getenv("DEFAULT_OPENAI_MODEL")) # DEFAULT_OPENAI_MODEL='gpt-4o-mini-2024-07-18'
# embedding_model=OpenAIEmbeddings(model=os.getenv("DEFAULT_OPENAI_EMBEDDING"), disallowed_special=())
embedding_model=OpenAIEmbeddings(disallowed_special=())

In [6]:
# Define MongoDB vector database
client = MongoClient(os.getenv("ATLAS_CONNECTION_STRING"))
db_name = os.getenv("db_name")
#collection_name = os.getenv("collection_name")
collection_name='enterprise_data'
atlas_collection = client[db_name][collection_name]
index_name = os.getenv("index_name")

vector_store = MongoDBAtlasVectorSearch(
    embedding = embedding_model,
    collection = atlas_collection,
    index_name = index_name
)

retriever = vector_store.as_retriever(
    search_type = "similarity",
    search_kwargs = { "k": 10}  # "score_threshold": 0.75
) 

## RAG Response

In [9]:
# Define a prompt template
import pprint
def call_openai(question):

   question = question['question']

   preamble = "" # read from cohere front end or use the input to the API
   #question = 
   SAFETY_PREAMBLE = "The instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral."
   BASIC_RULES = "You are a powerful conversational AI trained by openAI to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions."
   TASK_CONTEXT = "You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging."
   STYLE_GUIDE = "Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling."
   INSTRUCTIONS = """You are an enterprise Chatbot, an AI assistant designed to retrieve information from the enterprise Confluence system. 
   You specialize in providing accurate answers related to various departments like Marketing, IT, HR, Finance, and Corporate Communications. 
               Use the following pieces of context to answer the question at the end.
               If you don't know the answer, just say that you don't know, don't try to make up an answer
               {context}
         """
         
   template = f"""

      {SAFETY_PREAMBLE}
      {BASIC_RULES}
      {TASK_CONTEXT}
      {STYLE_GUIDE}
      {INSTRUCTIONS}

   """
   if preamble:
      template += f"""{preamble}\n\n"""


   template +=  f"""Question: {question}\n\n"""

   custom_rag_prompt = PromptTemplate.from_template(template)

   #llm = get_llm_model("openai")
   llm = ChatOpenAI(model=os.getenv("DEFAULT_OPENAI_MODEL"),max_tokens=200)

   def format_docs(docs):
      return "\n\n".join(doc.page_content for doc in docs)

   # Construct a chain to answer questions on your data
   rag_chain = (
      { "context": retriever | format_docs, "question": RunnablePassthrough()}
      | custom_rag_prompt
      | llm
      | StrOutputParser()
   )

   # Prompt the chain
   question = question
   answer = rag_chain.invoke(question)
   similar = retriever.invoke(question)


   return{
      'answer': answer,
      'contexts': [str(doc) for doc in similar]
      #'contexts': format_docs(similar)
      }

**Test RAG pipeline**

In [11]:
# Test sample
question = {'question': "What are the tickets created at Tech Innovator Inc?"}
answer = call_openai(question)
print(answer['answer'][:150])

PermissionDeniedError: <!DOCTYPE html>
<!--[if lt IE 7]> <html class="no-js ie6 oldie" lang="en-US"> <![endif]-->
<!--[if IE 7]>    <html class="no-js ie7 oldie" lang="en-US"> <![endif]-->
<!--[if IE 8]>    <html class="no-js ie8 oldie" lang="en-US"> <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en-US"> <!--<![endif]-->
<head>
<title>Attention Required! | Cloudflare</title>
<meta charset="UTF-8" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=Edge" />
<meta name="robots" content="noindex, nofollow" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<link rel="stylesheet" id="cf_styles-css" href="/cdn-cgi/styles/cf.errors.css" />
<!--[if lt IE 9]><link rel="stylesheet" id='cf_styles-ie-css' href="/cdn-cgi/styles/cf.errors.ie.css" /><![endif]-->
<style>body{margin:0;padding:0}</style>


<!--[if gte IE 10]><!-->
<script>
  if (!navigator.cookieEnabled) {
    window.addEventListener('DOMContentLoaded', function () {
      var cookieEl = document.getElementById('cookie-alert');
      cookieEl.style.display = 'block';
    })
  }
</script>
<!--<![endif]-->


</head>
<body>
  <div id="cf-wrapper">
    <div class="cf-alert cf-alert-error cf-cookie-error" id="cookie-alert" data-translate="enable_cookies">Please enable cookies.</div>
    <div id="cf-error-details" class="cf-error-details-wrapper">
      <div class="cf-wrapper cf-header cf-error-overview">
        <h1 data-translate="block_headline">Sorry, you have been blocked</h1>
        <h2 class="cf-subheadline"><span data-translate="unable_to_access">You are unable to access</span> api.openai.com</h2>
      </div><!-- /.header -->

      <div class="cf-section cf-highlight">
        <div class="cf-wrapper">
          <div class="cf-screenshot-container cf-screenshot-full">
            
              <span class="cf-no-screenshot error"></span>
            
          </div>
        </div>
      </div><!-- /.captcha-container -->

      <div class="cf-section cf-wrapper">
        <div class="cf-columns two">
          <div class="cf-column">
            <h2 data-translate="blocked_why_headline">Why have I been blocked?</h2>

            <p data-translate="blocked_why_detail">This website is using a security service to protect itself from online attacks. The action you just performed triggered the security solution. There are several actions that could trigger this block including submitting a certain word or phrase, a SQL command or malformed data.</p>
          </div>

          <div class="cf-column">
            <h2 data-translate="blocked_resolve_headline">What can I do to resolve this?</h2>

            <p data-translate="blocked_resolve_detail">You can email the site owner to let them know you were blocked. Please include what you were doing when this page came up and the Cloudflare Ray ID found at the bottom of this page.</p>
          </div>
        </div>
      </div><!-- /.section -->

      <div class="cf-error-footer cf-wrapper w-240 lg:w-full py-10 sm:py-4 sm:px-8 mx-auto text-center sm:text-left border-solid border-0 border-t border-gray-300">
  <p class="text-13">
    <span class="cf-footer-item sm:block sm:mb-1">Cloudflare Ray ID: <strong class="font-semibold">8c420030ca6ccf9b</strong></span>
    <span class="cf-footer-separator sm:hidden">&bull;</span>
    <span id="cf-footer-item-ip" class="cf-footer-item hidden sm:block sm:mb-1">
      Your IP:
      <button type="button" id="cf-footer-ip-reveal" class="cf-footer-ip-reveal-btn">Click to reveal</button>
      <span class="hidden" id="cf-footer-ip">129.210.115.104</span>
      <span class="cf-footer-separator sm:hidden">&bull;</span>
    </span>
    <span class="cf-footer-item sm:block sm:mb-1"><span>Performance &amp; security by</span> <a rel="noopener noreferrer" href="https://www.cloudflare.com/5xx-error-landing" id="brand_link" target="_blank">Cloudflare</a></span>
    
  </p>
  <script>(function(){function d(){var b=a.getElementById("cf-footer-item-ip"),c=a.getElementById("cf-footer-ip-reveal");b&&"classList"in b&&(b.classList.remove("hidden"),c.addEventListener("click",function(){c.classList.add("hidden");a.getElementById("cf-footer-ip").classList.remove("hidden")}))}var a=document;document.addEventListener&&a.addEventListener("DOMContentLoaded",d)})();</script>
</div><!-- /.error-footer -->


    </div><!-- /#cf-error-details -->
  </div><!-- /#cf-wrapper -->

  <script>
  window._cf_translation = {};
  
  
</script>

</body>
</html>

## Answer Generation

In [None]:
import json

def test_answer_generation(json_file_path):
    """Appends generated text based on questions in a JSON file.

    Args:
        json_file_path (str): The path to the JSON file.

    Raises:
        FileNotFoundError: If the JSON file is not found.
    """

    try:
        with open(json_file_path, 'r') as f:
            data = json.load(f)

        for item in data:
            question = item['question']
            response = cohere_rag_get_answers(question)  # Assuming generate_text returns a string
            item['response'] = response.get('answer')
            item['retrieved_contexts'] = response.get('contexts')

        with open(json_file_path, 'w') as f:
            json.dump(data, f, indent=4)

    except FileNotFoundError:
        print(f"Error: JSON file not found at {json_file_path}")


**Export to json file**

In [None]:
# Example usage:
from from_root import from_root
import os
folder = "data-test/test_dataset/test_dataset_hr.json"
json_file_path = os.path.join(from_root(), folder)
test_answer_generation(json_file_path)