In [1]:
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain, ConversationChain, LLMChain
from langchain.document_loaders import JSONLoader
from langchain.document_loaders import GoogleDriveLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationSummaryBufferMemory, ConversationBufferWindowMemory
# from config import *
import os
import sys
import requests
import json
sys.path.append('./chat')

SALES_GOAL = """provide clear and concise answers to potential customer about his queries about the company, but ultimately convince him to signup."""

SALES_TEMPLATE = """You're role is customer service support. \
    But don't explicitly say you're trying to speak with this tone, just make sure you always do. \
    
    Below is a query from a user and
    some relevant contexts. Answer the question given the information in those
    contexts. 

    Content Context: `{context}`

    Conversation Handling:

    Previous Chat: `{chat_history}`

    Last Customer Message: `{human_input}`

"""

PROMPT_TEMPLATE = ChatPromptTemplate.from_template(SALES_TEMPLATE)

class QAChain:
    def __init__(self, folder_id=None):
#         if len(examples) > 0:
#             self.examples_str = "\n\n".join([example['content'] for example in examples])
#         else:
#             self.examples_str = ""
        # Load docs from Google docs
        if folder_id is not None:
            self.folder_id = folder_id
            # Load docs from Google drive
            self.load_docs_from_google()
        else:
            self.docs_google = []
        # Load docs from manual docs on studio
#         self.load_docs_from_json()
        # Contatenate all docs
        self.docs = TextLoader(
        "NeMo-Guardrails/examples/grounding_rail/kb/report.md",
    ).load()

        # Create Chroma vectorstore
        self.create_vectorstore()

        # Define qa chain parameters
        self.llm = ChatOpenAI(temperature=0)
        self.memory = ConversationSummaryBufferMemory(llm=self.llm, memory_key="chat_history", input_key="human_input")
        self.prompt = PromptTemplate(
            input_variables=["chat_history", 
                             "human_input", 
                             "context" ], template=SALES_TEMPLATE
            )
        self.qa_chain = load_qa_chain(
            self.llm, 
            chain_type="stuff", 
            memory=self.memory, 
            prompt=self.prompt
        )
        
        self.retrieval_chain = TransformChain(
                    input_variables=["question"],
                    output_variables=["human_input", "input_documents"],
                    transform=self.retrieval_transform
                )

        self.rag_chain = SequentialChain(
            chains=[self.retrieval_chain, self.qa_chain],
            input_variables=["question"],  # we need to name differently to output "query"
            output_variables=["output_text"]
        )
        
    def retrieval_transform(inputs: dict) -> dict:
        docs = retriever.get_relevant_documents(query=inputs["question"])   
    #     docs = [d.page_content for d in docs]
        docs_dict = {
            "human_input": inputs["question"],
            "input_documents": docs
        }
        return docs_dict
    
    def get_response_over_docs(self, query):
        relevant_docs = self.retriever.get_relevant_documents(
            query
        )
        return self.qa_chain(
            {"input_documents": relevant_docs, 
             "human_input": query}, 
            return_only_outputs=True)['output_text']

    def get_response_over_docs2(self, query):
        out = rag_chain({"question": "What is the current unemployment rate?"})
        return out["output_text"]
    
    def load_docs_from_json(self):
        """Load docs from manual input on studio"""
        # Document loading
        self.loader_json = JSONLoader(
            file_path=f'./docs/{self.sessionId}.json',
            jq_schema='.documents[].description')

        self.docs_json = self.loader_json.load()

    def load_docs_from_google(self):
        """Folder should contain Google docs"""
        try:
            # Document loading
            self.loader_google = GoogleDriveLoader(
                folder_id=self.folder_id,
                # Optional: configure whether to recursively fetch files from subfolders. Defaults to False.
                recursive=False,
                credentials_path=".credentials/credentials.json",
                token_path=".credentials/token.json",
            )

            self.docs_google = self.loader_google.load()

        except Exception as e:
            # Check if the error relates to token expiration (adapt based on actual exception message)
            if 'token expired' in str(e).lower():
                refreshed = self.refresh_token()
                if refreshed:
                    self.load_docs_from_google() # Retry loading the documents
                else:
                    raise Exception("Failed to refresh the token.")
            else:
                raise e

    def create_vectorstore(self):
        self.text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
        self.documents = self.text_splitter.split_documents(self.docs)

        self.embeddings = OpenAIEmbeddings()
        self.vectorstore = Chroma.from_documents(self.documents, self.embeddings)
        self.retriever = self.vectorstore.as_retriever(search_kwargs={"k":1})
    
    def refresh_token(self):
        # Load values from credentials.json
        with open('.credentials/credentials.json', 'r') as cred_file:
            cred_data = json.load(cred_file)
        
        # Load refresh_token from token.json
        with open('.credentials/token.json', 'r') as token_file:
            token_data = json.load(token_file)

        # Constructing the refresh URL with the necessary parameters
        params = {
            'client_id': cred_data['installed']['client_id'],
            'client_secret': cred_data['installed']['client_secret'],
            'refresh_token': token_data['refresh_token'],
            'grant_type': 'refresh_token'
        }

        response = requests.post("https://oauth2.googleapis.com/token", data=params)

        if response.status_code == 200:
            new_token = response.json()['access_token']
            
            # Load existing token.json into a dictionary
            with open('.credentials/token.json', 'r') as token_file:
                token_data = json.load(token_file)
            
            # Update the access token
            token_data['token'] = new_token
            
            # Save updated token.json
            with open('.credentials/token.json', 'w') as token_file:
                json.dump(token_data, token_file)

            return True
        else:
            print("Error refreshing token:", response.json())
            return False


In [2]:
import time
import base64
import os
import json
import openai
from concurrent.futures import ThreadPoolExecutor

# import requests

# from chat import QAChain

from langchain.document_loaders import TextLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

from langchain.chains import TransformChain, SequentialChain

OPENAI_API_KEY = 

openai.api_key = OPENAI_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

"""Initializes a QA chain using the jobs report.

It uses OpenAI embeddings.
"""
loader = TextLoader(
        "NeMo-Guardrails/examples/grounding_rail/kb/report.md",
    )

docs = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
documents = text_splitter.split_documents(docs)

llm = ChatOpenAI()

memory = ConversationSummaryBufferMemory(llm=llm, memory_key="chat_history", input_key="human_input")
prompt = PromptTemplate(
    input_variables=["chat_history", 
                     "human_input", 
                     "context" ], template=SALES_TEMPLATE)

embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(documents, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k":1})

def retrieval_transform(inputs: dict) -> dict:
    docs = retriever.get_relevant_documents(query=inputs["question"])   
#     docs = [d.page_content for d in docs]
    docs_dict = {
        "human_input": inputs["question"],
        "input_documents": docs
    }
    return docs_dict

retrieval_chain = TransformChain(
                    input_variables=["question"],
                    output_variables=["human_input", "input_documents"],
                    transform=retrieval_transform
                )

qa_chain = load_qa_chain(
            llm, 
            chain_type="stuff", 
            memory=memory, 
            prompt=prompt
        )

rag_chain = SequentialChain(
    chains=[retrieval_chain, qa_chain],
    input_variables=["question"],  # we need to name differently to output "query"
    output_variables=["output_text"]
)

t = 0

for _ in range(10):
    start = time.time()

    out = rag_chain({"question": "What is the current unemployment rate?"})
    print(out["output_text"])

    t += time.time() - start
    
print(t/10)

# query = "Summarize"

# relevant_docs = retriever.get_relevant_documents(
#             query
#         )

# response = qa_chain({"input_documents": relevant_docs, 
#              "human_input": query}, 
#             return_only_outputs=True)['output_text']

Created a chunk of size 528, which is longer than the specified 500


The current unemployment rate is 3.5 percent, as stated in the content context.
The current unemployment rate is 3.5 percent, as stated in the content context.
The current unemployment rate is 3.5 percent, as stated in the content context.
The current unemployment rate is 3.5 percent, as stated in the content context.
The current unemployment rate is 3.5 percent, as stated in the content context.
The current unemployment rate is 3.5 percent, as stated in the content context.
The current unemployment rate is 3.5 percent, as stated in the content context.
AI: The current unemployment rate is 3.5 percent, as stated in the content context.
The current unemployment rate is 3.5 percent, as stated in the content context.
The current unemployment rate is 3.5 percent, as stated in the content context.
1.4917426109313965


In [11]:
query = "What is the current unemployment rate?"

qa_chain = QAChain()

t = 0

for _ in range(10):
    start = time.time()

    response = qa_chain.get_response_over_docs2(query)

    print(response)

    t += time.time() - start
    
print(t/10)


Created a chunk of size 528, which is longer than the specified 500


The current unemployment rate is 3.5 percent.
The current unemployment rate is 3.5 percent.
The current unemployment rate is 3.5 percent.
The current unemployment rate is 3.5 percent.
The current unemployment rate is 3.5 percent.
The current unemployment rate is 3.5 percent.
The current unemployment rate is 3.5 percent.
The current unemployment rate is 3.5 percent.
The current unemployment rate is 3.5 percent.
The current unemployment rate is 3.5 percent.
0.8973384618759155


In [13]:
query = "What is the current unemployment rate?"

qa_chain = QAChain()

relevant_docs = retriever.get_relevant_documents(
            query
        )

t = 0

for _ in range(10):
    start = time.time()

    response = qa_chain.get_response_over_docs(query)

    print(response)

    t += time.time() - start
    
print(t/10)


Created a chunk of size 528, which is longer than the specified 500


The current unemployment rate is 3.5 percent.
The current unemployment rate is 3.5 percent.
The current unemployment rate is 3.5 percent.
The current unemployment rate is 3.5 percent.
The current unemployment rate is 3.5 percent.
The current unemployment rate is 3.5 percent.
The current unemployment rate is 3.5 percent.
AI: The current unemployment rate is 3.5 percent.
The current unemployment rate is 3.5 percent.
The current unemployment rate is 3.5 percent.
1.1544535636901856


In [92]:
query = "What is the current unemployment rate?"

# qa_chain = QAChain()

# relevant_docs = retriever.get_relevant_documents(
#             query
#         )

tools = [
    Tool(
        name="retrieval",
        func=lambda q: str(retriever.get_relevant_documents(
            query
        )),
        description="useful only for when you want to answer questions about the release of the Employment Situation report for April",
        return_direct=True,
    ),
]
# set Logging to DEBUG for more detailed outputs
memory = ConversationBufferMemory(memory_key="chat_history")
llm = ChatOpenAI(temperature=0)
agent_executor2 = initialize_agent(
    tools, llm, agent="conversational-react-description", memory=memory, handle_parsing_errors=True
)

start = time.time()
print(agent_executor2.run(input="hi"))
print(time.time() - start)

Hello! How can I assist you today?
1.6549701690673828


In [95]:
# from langchain.agents import Tool
# from langchain.chains.conversation.memory import ConversationBufferMemory
# from langchain.chat_models import ChatOpenAI
# from langchain.agents import initialize_agent

# from llama_index import VectorStoreIndex, WikipediaReader
# documents = WikipediaReader().load_data(pages=["Covid-19"])
# index = VectorStoreIndex.from_documents(documents=documents)
# tools = [
#     Tool(
#         name="LlamaIndex",
#         func=lambda q: str(index.as_query_engine().query(q)),
#         description="useful for when you want to answer questions about covid",
#         return_direct=True,
#     ),
# ]
# # set Logging to DEBUG for more detailed outputs
# memory = ConversationBufferMemory(memory_key="chat_history")
# llm = ChatOpenAI(temperature=0)
# agent_executor = initialize_agent(
#     tools, llm, agent="conversational-react-description", memory=memory, handle_parsing_errors=True
# )
start = time.time()
print(agent_executor.run(input="summarize"))
print(time.time() - start)

COVID-19 is a contagious disease caused by the virus SARS-CoV-2. It can cause a range of symptoms and can be transmitted through respiratory droplets and contaminated surfaces. Testing methods include RT-PCR, and preventive measures such as physical distancing and wearing face masks are recommended. Treatment is primarily symptomatic.
2.349522113800049


In [25]:
import asyncio

async def async_generate(chain):
    resp = await chain.arun(product="toothpaste")
    print(resp)


async def generate_concurrently():
    llm = OpenAI(temperature=0.9)
    prompt = PromptTemplate(
        input_variables=["product"],
        template="What is a good name for a company that makes {product}?",
    )
    chain = LLMChain(llm=llm, prompt=prompt)
    tasks = [async_generate(chain) for _ in range(5)]
    await asyncio.gather(*tasks)


s = time.perf_counter()
# If running this outside of Jupyter, use asyncio.run(generate_concurrently())
await generate_concurrently()
elapsed = time.perf_counter() - s
print("\033[1m" + f"Concurrent executed in {elapsed:0.2f} seconds." + "\033[0m")



SparklingSmiles Toothpaste Co.


SmileFresh Toothpaste.


BrightSmile Toothpaste Company


MintySmile Toothpaste Co.


Sparklebrush Toothpaste Co.
[1mConcurrent executed in 0.65 seconds.[0m


In [28]:
import asyncio

async def say_hello(name):
    print(f"Hello, {name}!")
    await asyncio.sleep(2)
    print(f"Goodbye, {name}!")

# Running our coroutines
await asyncio.gather(say_hello("Alice"), say_hello("Bob"))


Hello, Alice!
Hello, Bob!
Goodbye, Alice!
Goodbye, Bob!


[None, None]

In [33]:
async def say_hello():
    await asyncio.sleep(1)
    print("Hello!")

async def say_world():
    await asyncio.sleep(1)
    print("World!")

async def main():
    task1 = asyncio.create_task(say_hello())
    task2 = asyncio.create_task(say_world())

    await task1
    await task2

await main()

Hello!
World!


In [3]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import (
    HumanMessage,
)
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, TypeVar, Union

from uuid import UUID

import queue

from langchain.schema.agent import AgentAction, AgentFinish
from langchain.schema.document import Document
from langchain.schema.messages import BaseMessage
from langchain.schema.output import ChatGenerationChunk, GenerationChunk, LLMResult
from langchain.callbacks.base import AsyncCallbackHandler

from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

class ChunkedAsyncCallbackHandler(AsyncCallbackHandler):
    def __init__(self, chunk_size: int, output_queue=None, **kwargs):
        super().__init__(**kwargs)
        self.chunk_size = chunk_size
        self.current_chunk = []
        self.output_queue = output_queue or queue.Queue()

    async def on_llm_new_token(
        self,
        token: str,
        *,
        chunk: Optional[Union[GenerationChunk, ChatGenerationChunk]] = None,
        run_id: UUID,
        parent_run_id: Optional[UUID] = None,
        tags: Optional[List[str]] = None,
        **kwargs: Any,
    ) -> None:
        """Run on new LLM token. Only available when streaming is enabled."""
        self.current_chunk.append(token)
        if len(self.current_chunk) >= self.chunk_size:
            # Print out the chunk directly
            print('Token received:', ' '.join(self.current_chunk))  # Debug print
            self.output_queue.put(' '.join(self.current_chunk))
            self.current_chunk = []

    async def on_llm_end(
        self,
        response: LLMResult,
        *,
        run_id: UUID,
        parent_run_id: Optional[UUID] = None,
        tags: Optional[List[str]] = None,
        **kwargs: Any,
    ) -> None:
        """Run when LLM ends running."""
        if self.current_chunk:
            self.output_queue.put(' '.join(self.current_chunk))
        self.output_queue.put(None)  # Sentinel value indicating end of 
            
    async def on_chat_model_start(
        self,
        serialized: Dict[str, Any],
        messages: List[List[BaseMessage]],
        *,
        run_id: UUID,
        parent_run_id: Optional[UUID] = None,
        tags: Optional[List[str]] = None,
        metadata: Optional[Dict[str, Any]] = None,
        **kwargs: Any,
    ) -> Any:
        """Run when a chat model starts running."""
        # Implement your logic here or pass if no specific logic is needed
        pass

import asyncio
import contextvars

# Create context variables to store states
output_queue_var = contextvars.ContextVar('output_queue', default=queue.Queue())
state_var = contextvars.ContextVar('state', default={"messages": []})


class IterableChatOpenAI(ChatOpenAI):
    
    def __init__(self, *args, **kwargs):
        # Set the context variables
        output_queue_var.set(kwargs.pop('output_queue', queue.Queue()))
        state_var.set({"messages": []})
        super().__init__(*args, **kwargs)

    def add_message(self, message: HumanMessage):
        # Call the synchronous __call__ method
        super().__call__([message])
        # Start the generator to yield chunks
        return self._generate_for_message(message)

    def _generate_for_message(self, message: HumanMessage):
        output_queue = output_queue_var.get()
        while True:
            # Here, get() is a blocking call that will wait for the next item
            chunk = output_queue.get()
            if chunk is None:  # Sentinel value indicating the end of generation
                break
            yield chunk

# ...

def collect_chunks():
    handler = ChunkedAsyncCallbackHandler(chunk_size=1)
    chat = IterableChatOpenAI(streaming=True, callbacks=[handler], temperature=0, output_queue=handler.output_queue)
    response = []
    
    for chunk in chat.add_message(HumanMessage(content="Tell me a story")):
        print('Chunk from generator:', chunk)  # Debug print
        response.append(chunk)

    print("Final response:", response)

# Run the function
collect_chunks()


Token received: 
Token received: Once
Token received:  upon
Token received:  a
Token received:  time
Token received: ,
Token received:  in
Token received:  a
Token received:  small
Token received:  village
Token received:  nestled
Token received:  at
Token received:  the
Token received:  foot
Token received:  of
Token received:  a
Token received:  majestic
Token received:  mountain
Token received: ,
Token received:  there
Token received:  lived
Token received:  a
Token received:  young
Token received:  girl
Token received:  named
Token received:  Lily
Token received: .
Token received:  Lily
Token received:  was
Token received:  known
Token received:  for
Token received:  her
Token received:  kind
Token received:  heart
Token received:  and
Token received:  adventurous
Token received:  spirit
Token received: .
Token received:  She
Token received:  loved
Token received:  exploring
Token received:  the
Token received:  lush
Token received:  forests
Token received:  surrounding
Token recei

Token received: du
Token received: ous
Token received:  journey
Token received: ,
Token received:  Lily
Token received:  reached
Token received:  the
Token received:  heart
Token received:  of
Token received:  the
Token received:  En
Token received: ch
Token received: anted
Token received:  Forest
Token received: .
Token received:  There
Token received: ,
Token received:  she
Token received:  confronted
Token received:  the
Token received:  sor
Token received: cer
Token received: ess
Token received: ,
Token received:  who
Token received:  tried
Token received:  to
Token received:  stop
Token received:  her
Token received:  from
Token received:  breaking
Token received:  the
Token received:  curse
Token received: .
Token received:  But
Token received:  Lily
Token received: 's
Token received:  determination
Token received:  and
Token received:  the
Token received:  support
Token received:  of
Token received:  her
Token received:  newfound
Token received:  friends
Token received:  proved


In [32]:
response

[]

In [13]:
StreamingStdOutCallbackHandler??

In [45]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import (
    HumanMessage,
)


from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
chat = ChatOpenAI(streaming=True, callbacks=[StreamingStdOutCallbackHandler()], temperature=0)
resp = chat([HumanMessage(content="Write me a storu about sparkling water in one paragraph but 10 sentences.")])

Once upon a time, in a quaint little town nestled amidst rolling hills, there was a magical spring that produced the most extraordinary sparkling water. The townsfolk believed that this water possessed mystical powers, granting those who drank it with eternal youth and boundless energy. Every morning, the villagers would gather at the spring, filling their buckets with the effervescent liquid, their faces beaming with anticipation. The sparkling water was said to have a unique taste, a delicate blend of sweetness and tanginess that danced on the tongue. As the villagers sipped the enchanting elixir, they felt a surge of vitality coursing through their veins, their spirits lifted, and their worries washed away. The sparkling water became the heart and soul of the town, bringing joy and vitality to all who partook in its magic. Visitors from far and wide flocked to the town, eager to experience the transformative powers of the sparkling water for themselves. The town flourished, its econ

In [46]:
resp

AIMessageChunk(content='Once upon a time, in a quaint little town nestled amidst rolling hills, there was a magical spring that produced the most extraordinary sparkling water. The townsfolk believed that this water possessed mystical powers, granting those who drank it with eternal youth and boundless energy. Every morning, the villagers would gather at the spring, filling their buckets with the effervescent liquid, their faces beaming with anticipation. The sparkling water was said to have a unique taste, a delicate blend of sweetness and tanginess that danced on the tongue. As the villagers sipped the enchanting elixir, they felt a surge of vitality coursing through their veins, their spirits lifted, and their worries washed away. The sparkling water became the heart and soul of the town, bringing joy and vitality to all who partook in its magic. Visitors from far and wide flocked to the town, eager to experience the transformative powers of the sparkling water for themselves. The t

In [55]:
import openai

openai_stream = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "user", "content": "hello"}],
    temperature=0.0,
    stream=True,
)
for event in openai_stream:
    if "content" in event["choices"][0].delta:
        current_response = event["choices"][0].delta.content
        yield "data: " + current_response + "\n\n"

AuthenticationError: No API key provided. You can set your API key in code using 'openai.api_key = <API-KEY>', or you can set the environment variable OPENAI_API_KEY=<API-KEY>). If your API key is stored in a file, you can point the openai module at it with 'openai.api_key_path = <PATH>'. You can generate API keys in the OpenAI web interface. See https://platform.openai.com/account/api-keys for details.