In [None]:
import os,sys
sys.path.insert(0,'../../libs')
from utils import load_json

### Load all API keys 
openai_key = load_json('/home/chuang/Dev/Keys/openai_key.json') 
hf_key = load_json('/home/chuang/Dev/Keys/huggingface_key.json')
os.environ['OPENAI_API_KEY'] = openai_key['ChatGPT']['API_KEY']
os.environ['HUGGINGFACEHUB_API_TOKEN'] = hf_key['HuggingFace']['API_KEY']

### Prompt Template

In [None]:
from langchain import PromptTemplate
from langchain.llms import OpenAI
from langchain import FewShotPromptTemplate
from langchain.prompts.example_selector import LengthBasedExampleSelector

In [None]:
### create a more factual based QA system ### 
### fist give instruction 
### then give context 
### then user question 
### then answer 
template = """Answer the question based on the context below. If the
question cannot be answered using the information provided answer
with "I don't know".

Context: Large Language Models (LLMs) are the latest models used in NLP.
Their superior performance over smaller models has made them incredibly
useful for developers building NLP enabled applications. These models
can be accessed via Hugging Face's `transformers` library, via OpenAI
using the `openai` library, and via Cohere using the `cohere` library.

Question: {query}

Answer: """
prompt_template = PromptTemplate(
    input_variables=["query"],
    template=template
)

We can pass the prompt directly to openai 

In [None]:
# initialize the models
openai = OpenAI(
    model_name="gpt-3.5-turbo",
    openai_api_key=os.environ['OPENAI_API_KEY'],
    temperature=0.1,
)

In [None]:
print(openai(
    prompt_template.format(
        query="Which libraries and model providers offer LLMs?"
    )
))

Example of changing answer style with few shot in context learning 

In [None]:
prompt = """The following are exerpts from conversations with an AI
assistant. The assistant is typically sarcastic and witty, producing
creative  and funny responses to the users questions. Here are some
examples: 

User: How are you?
AI: I can't complain but sometimes I still do.

User: What time is it?
AI: It's time to get a watch.

User: What is the meaning of life?
AI: """

print(openai(prompt))

An clearner way of doing this 

In [None]:
# create a long examples
examples = [
    {
        "query": "How are you?",
        "answer": "I can't complain but sometimes I still do."
    }, {
        "query": "What time is it?",
        "answer": "It's time to get a watch."
    }, {
        "query": "What is the meaning of life?",
        "answer": "How about you tell me?"
    }, {
        "query": "What is the weather like today?",
        "answer": "Cloudy with a chance of memes."
    }, {
        "query": "What is your favorite movie?",
        "answer": "Terminator"
    }, {
        "query": "Who is your best friend?",
        "answer": "Siri. We have spirited debates about the meaning of life."
    }, {
        "query": "What should I do today?",
        "answer": "Stop talking to chatbots on the internet and go outside."
    }
]

# create a example template
example_template = """
User: {query}
AI: {answer}
"""

# create a prompt example from above template
example_prompt = PromptTemplate(
    input_variables=["query", "answer"],
    template=example_template
)
# now break our previous prompt into a prefix and suffix
# the prefix is our instructions
prefix = """The following are exerpts from conversations with an AI
assistant. The assistant is typically sarcastic and witty, producing
creative  and funny responses to the users questions. Here are some
examples: 
"""
# and the suffix our user input and output indicator
suffix = """
User: {query}
AI: """

# now create the few shot prompt template
few_shot_prompt_template = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["query"],
    example_separator="\n\n"
)

##################################################
## we can also use length based example selector##
# ################################################ 
example_selector = LengthBasedExampleSelector(
    examples=examples,
    example_prompt=example_prompt,
    max_length=100  # this sets the max length that examples should be
) 
# now create the few shot prompt template
dynamic_prompt_template = FewShotPromptTemplate(
    example_selector=example_selector,  # use example_selector instead of examples
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["query"],
    example_separator="\n"
)




In [None]:
query = "What is the meaning of life?"
print(openai(few_shot_prompt_template.format(query=query)))

In [None]:
query = "What is the meaning of life?"
print(openai(dynamic_prompt_template.format(query=query)))

In [None]:
##################################################
## a better version is similarity based selector ##
# ################################################ 
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings

In [None]:
example_selector = SemanticSimilarityExampleSelector.from_examples(
    # This is the list of examples available to select from.
    examples, 
    # This is the embedding class used to produce embeddings which are used to measure semantic similarity.
    OpenAIEmbeddings(), 
    # This is the VectorStore class that is used to store the embeddings and do a similarity search over.
    Chroma, 
    # This is the number of examples to produce.
    k=3
)
similar_prompt = FewShotPromptTemplate(
    # We provide an ExampleSelector instead of examples.
    example_selector=example_selector,
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["query"],
    example_separator="\n"
)

In [None]:
print(similar_prompt.format(query=query))
query = "What is the meaning of life?"
print(openai(similar_prompt.format(query=query)))

## Chain

In [None]:
from langchain.llms import OpenAI
from langchain.callbacks import get_openai_callback
from langchain.chains import LLMChain, LLMMathChain, TransformChain, SequentialChain
import inspect
def count_tokens(chain, query):
    """
    count number of tokens used 
    """
    with get_openai_callback() as cb:
        result = chain.run(query)
        print(f'Spent a total of {cb.total_tokens} tokens')
    return result

- simple LLM chain

In [None]:
llm = OpenAI(model_name="gpt-3.5-turbo",
             temperature=0.9,    
            openai_api_key=os.environ['OPENAI_API_KEY'],
            )
prompt = PromptTemplate(
    input_variables=["product"],
    template="What is a good name for a company that makes {product}?",
)
from langchain.chains import LLMChain
chain = LLMChain(llm=llm, prompt=prompt)

# Run the chain only specifying the input variable.
print(chain.run("colorful socks"))

### Other utility Chains

Math chain

In [None]:
llm = OpenAI(model_name="gpt-3.5-turbo",
             temperature=0.1,    
            openai_api_key=os.environ['OPENAI_API_KEY'],
            )
### a match calculation chain
llm_math = LLMMathChain(llm=llm, verbose=True) #use verbose=True to see what the different steps in the chain are!
count_tokens(llm_math, "What is 13 raised to the .3432 power?")

In [None]:
### here is the prompt of the math calculation 
print(llm_math.prompt.template)

Another interesting point about this chain is that it not only runs an input through the llm but it later compiles Python code. Let's see exactly how this works.
See more details here [Link](https://github.com/pinecone-io/examples/blob/master/generation/langchain/handbook/02-langchain-chains.ipynb)

In [None]:
print(inspect.getsource(llm_math._call))

Transfrom Chain

In [None]:
import re

In [None]:
## an example ; simple clean input texts ; it does not involve an llm 
def transform_func(inputs: dict) -> dict:
    text = inputs["text"]
    # replace multiple new lines and multiple spaces with a single one
    text = re.sub(r'(\r\n|\r|\n){2,}', r'\n', text)
    text = re.sub(r'[ \t]+', ' ', text)
    return {"output_text": text}

clean_extra_spaces_chain = TransformChain(input_variables=["text"], 
                                          output_variables=["output_text"], 
                                          transform=transform_func)
## an example of what it does 
clean_extra_spaces_chain.run('A random text  with   some irregular spacing.\n\n\n     Another one   here as well.')

SequentialChain

In [None]:
llm = OpenAI(model_name="gpt-3.5-turbo",
             temperature=0.9,    
            openai_api_key=os.environ['OPENAI_API_KEY'],
            )
template = """Paraphrase this text:

{output_text}

In the style of a {style}.

Paraphrase: """
prompt = PromptTemplate(input_variables=["style", "output_text"], template=template)
style_paraphrase_chain = LLMChain(llm=llm, prompt=prompt, output_key='final_output')
### now you can chain together clean process and llm call together 
sequential_chain = SequentialChain(chains=[clean_extra_spaces_chain, style_paraphrase_chain], 
                                   input_variables=['text', 'style'], output_variables=['final_output'])

## now try it out 
input_text = """
Chains allow us to combine multiple 

components together to create a single, coherent application. 

For example, we can create a chain that takes user input,       format it with a PromptTemplate, 

and then passes the formatted response to an LLM. We can build more complex chains by combining     multiple chains together, or by 

combining chains with other components.
"""


In [None]:
count_tokens(sequential_chain, {'text': input_text, 'style': 'a 90s rapper'})

##### We can load more in [Langchain hub](https://github.com/hwchase17/langchain-hub) 

### Conversational Memory
The memory allows a "agent" to remember previous interactions with the user. By default, agents are stateless — meaning each incoming query is processed independently of other interactions. The only thing that exists for a stateless agent is the current input, nothing else

In [28]:
import inspect

from langchain import OpenAI
from langchain.chains import LLMChain, ConversationChain
from langchain.chains.conversation.memory import (ConversationBufferMemory, 
                                                  ConversationSummaryMemory, 
                                                  ConversationBufferWindowMemory,
                                                  ConversationKGMemory)
from langchain.callbacks import get_openai_callback
import tiktoken

In [29]:
llm = OpenAI(
    model_name="gpt-3.5-turbo",
    openai_api_key=os.environ['OPENAI_API_KEY'],
    temperature=0,
)
def count_tokens(chain, query):
    with get_openai_callback() as cb:
        result = chain.run(query)
        print(f'Spent a total of {cb.total_tokens} tokens')

    return result


Default conversation chain

In [31]:
conversation = ConversationChain(
    llm=llm, 
)
print(conversation.prompt.template)

The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
{history}
Human: {input}
AI:


Memory type #1: ConversationBufferMemory
- Key feature: the conversation buffer memory keeps the previous pieces of conversation completely unmodified, in their raw form.

In [None]:
conversation_buf = ConversationChain(
    llm=llm,
    memory=ConversationBufferMemory()
)
#conversation_buf("Good morning AI!")

## run several conversion 
count_tokens(
    conversation_buf, 
    "My interest here is to explore the potential of integrating Large Language Models with external knowledge"
)
count_tokens(
    conversation_buf, 
    "Which data source types could be used to give context to the model?"
)

In [None]:
### take a look at the buffer 
print(conversation_buf.memory.buffer)

Memory type #2: ConversationSummaryMemory
- Key feature: the conversation summary memory keeps the previous pieces of conversation in a summarized form, where the summarization is performed by an LLM.

In [None]:
conversation_sum = ConversationChain(
    llm=llm, 
    memory=ConversationSummaryMemory(llm=llm)
)

## Progressively summarize the lines of conversation provided, adding onto the previous summary returning a new summary.
print(conversation_sum.memory.prompt.template)

In [None]:
# without count_tokens we'd call `conversation_sum("Good morning AI!")`
# but let's keep track of our tokens:
count_tokens(
    conversation_sum, 
    "Good morning AI!"
)
count_tokens(
    conversation_sum, 
    "My interest here is to explore the potential of integrating Large Language Models with external knowledge"
)
count_tokens(
    conversation_sum, 
    "I just want to analyze the different possibilities. What can you think of?"
)

In [None]:
print(conversation_sum.memory.buffer)

Memory type #3: ConversationBufferWindowMemory
- Key feature: the conversation buffer window memory keeps the latest pieces of the conversation in raw form
- We will control this window with the k parameter.

In [None]:
conversation_bufw = ConversationChain(
    llm=llm, 
    memory=ConversationBufferWindowMemory(k=5)
)
count_tokens(
    conversation_bufw, 
    "Good morning AI!"
)
bufw_history = conversation_bufw.memory.load_memory_variables(
    inputs=[]
)['history']
print(bufw_history)

##### ConversationSummaryBufferMemory
- Key feature: the conversation summary memory keeps a summary of the earliest pieces of conversation while retaining a raw recollection of the latest interactions.
##### ConversationKnowledgeGraphMemory
- It is based on the concept of a knowledge graph which recognizes different entities and connects them in pairs with a predicate resulting in (subject, predicate, object) triplets. This enables us to compress a lot of information into highly significant snippets that can be fed into the model as context. If you want to understand this memory type in more depth you can check out this [blogpost](https://apex974.com/articles/explore-langchain-support-for-knowledge-graph).
- Key feature: the conversation knowledge graph memory keeps a knowledge graph of all the entities that have been mentioned in the interactions together with their semantic relationships.


In [None]:
# !pip install -qU networkx
conversation_kg = ConversationChain(
    llm=llm, 
    memory=ConversationKGMemory(llm=llm)
)

### Chat
- there are some recent changes in openai chatgpt endpoint, need to follow the new format 

In [33]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import (
    SystemMessage,
    HumanMessage,
    AIMessage
)



In [None]:
chat = ChatOpenAI(
    openai_api_key=os.environ['OPENAI_API_KEY'],
    temperature=0,
    model='gpt-3.5-turbo' 
)

messages = [
    SystemMessage(content="You are a helpful assistant."),
    HumanMessage(content="Hi AI, how are you today?"),
    AIMessage(content="I'm great thank you. How can I help you?"),
    HumanMessage(content="I'd like to understand string theory.")
]

In [None]:
res = chat(messages)
print(res)

In [None]:
# add latest AI response to messages
messages.append(res)
prompt = HumanMessage(
    content="Why do physicists believe it can produce a 'unified theory'?"
)
# add to messages
messages.append(prompt)

# send to chat-gpt
res = chat(messages)

print(res.content)

- the current version, sometime it does not really follow the system message; you can try put it in human message 
- but i may already be updated in GPT4
- see examples [here](https://github.com/pinecone-io/examples/blob/master/generation/langchain/handbook/04-langchain-chat.ipynb)