# Chat Functionality

This notebook exists for the user to test out chat functionalities using the IMDB dataset stored in Opensearch. 

In [1]:
import sys
sys.path.append("../src/")

import yaml
import langchain_qa_chat
import utils_text
import requests
from prompt import QA_PROMPT, get_instruction

with open('config.yml', 'r') as file:
    config = yaml.safe_load(file)
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.schema import Document
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate

In [2]:
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import LLMChain
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory

### llm-1

Helper functions are initialized.

In [3]:
def create_qa_chain(model, verbose=False):
    """
    Create a question answering chain from langchain using an llm chosen by the user. 
    Args: 
        model(str): llm model type
        verbose(boolean): whether to have the model show its full output
    Returns: 
        langchain QA chain
    """
    if model == 'FLAN-T5-XXL':
        llm = langchain_qa_chat.sagemaker_endpoint(config['llm']['t5_endpoint'])
    elif model == 'Bedrock':
        llm = langchain_qa_chat.amazon_bedrock_llm(verbose=verbose)
    elif model == "Text2Text":
        llm = langchain_qa_chat.text2text_llm()
    elif model == 'Jurassic-Jumbo-Instruct':
        llm = langchain_qa_chat.sagemaker_endpoint_ai21(config['llm']['ai21_instruct'])
    else:
        assert False
    print('Make QA chain for', model)
    return langchain_qa_chat.chain_qa(llm, verbose=verbose,prompt=QA_PROMPT)

In [4]:
def describe_movie(result):
    """
    Add to context the results of the queried movie
    Args: 
        result(dict): information about the particular movie
    Returns: 
        Added context sent to the LLM
    """
    context = ""
    if 'title' in result:
        context+=f"The name of the movie is {result['title']}, "
    if 'year' in result:
        context+=f"was shot in {result['year']}, "
    if 'stars' in result:
        context+=f"has the actors/stars {', '.join(result['stars'])}, "
    if 'directors' in result:
        context+=f"directed by {', '.join(result['directors'])}, "
    if 'producers' in result:
        context+=f"produced by {', '.join(result['producers'])}. "
    if 'plotLong' in result:
        context+=f"The plot of the movie is {result['plotLong']}. "   
    if 'location' in result:
        context+=f"The movie was shot in the following locations: {', '.join(set(result['location']))}. "   
    if 'rating' in result:
        context+=f"It has rating of {result['rating']}. "   
    if 'location' in result:
        context+=f"The movie belongs to the genres {', '.join(result['genres'])}"
    
    return context

def get_docs(response, topk=5):
    """
    Obtain documents with given response and context
    Args: 
        response(List(Dict)): list of information about movies
        topk(int): how many elements in response to analyze
    Returns: 
        List(Langchain Documents)
    """
    docs = [
                    Document(
                        page_content=describe_movie(resp['_source']),
                        metadata={'source':resp['_source']['title']}
                    )
                for resp in response[0:topk]
            ]
    return docs

In [5]:
def os_results(chain, query):
    store = "CUSTOM"
    k = 5
    os_query_type = "With LLM to form query"
    response = langchain_qa_chat.search_and_answer(store, query, k=k, task=os_query_type)
    return [hit for hit in response['response']["hits"]["hits"]]

Analyze results of a Jurassic Jumbo Instruct model. You can input it any other LLM models to test their results.

In [7]:
model = 'Jurassic-Jumbo-Instruct'
chain_1 = create_qa_chain(model)
query = "Movies shot in location New York with keyword christmas?"
response = os_results(chain_1, query)

Make QA chain for Jurassic-Jumbo-Instruct
Search and Answer
Instruction: There are eight columns with names given in double quotes "title" for movies, "stars" for actors, "rating", "directors", "producers", "keywords", "location" and "genres" in \
    the opensearch/elasticsearch cluster. The columns "stars", "director", "keywords" and "producer" have arrays so use Terms query otherwise use term query. Use size 15 for all queries unless stated. For columns stars, directors, producers,location use `<column_name>.keyword` in the query instead of column_name.  Use `Terms` query for columns stars, directors, genres, producers and location. If you use `bool` query, check inside the terms query for columns stars, directors, genres, producers and location inside the bool.
   
    Use the following query template to answer the question below. There can be multiple Term or Terms queries inside the "must" array
    {"query":{"bool":{"must":[{"terms":{"<column>.keyword":["<entity>"] }}]}}, "sort"

In [8]:
docs = get_docs(response, 5)

In [36]:
docs

[Document(page_content='The name of the movie is Bell Book and Candle, was shot in 1958, has the actors/stars James Stewart, Kim Novak, Jack Lemmon, Ernie Kovacs, Hermione Gingold, Elsa Lanchester, Janice Rule, Philippe Clay, Bek Nelson, Howard McNear, directed by Richard Quine, produced by Julian Blaustein. The plot of the movie is A modern-day witch likes her neighbor but despises his fiancu00e9e, so she enchants him to love her instead.. The movie was shot in the following locations: New York, New York City, USA. It has rating of 6.9. The movie belongs to the genres Comedy, Fantasy, Romance', metadata={'source': 'Bell Book and Candle'}),
 Document(page_content='The name of the movie is Night Shift, was shot in 1982, has the actors/stars Henry Winkler, Michael Keaton, Shelley Long, Gina Hecht, Pat Corley, Bobby Di Cicco, Nita Talbot, Basil Hoffman, Tim Rossovich, Clint Howard, directed by Ron Howard, produced by Brian Grazer. The plot of the movie is A morgue attendant is talked into

### llm-2

Create a chatbot that uses context and input documents to answer a given question. 

In [38]:
llm_2 = langchain_qa_chat.sagemaker_endpoint_ai21(config['llm']['ai21_instruct'])
template = """You are a chatbot having a conversation with a human.
Given the following extracted parts of a long document and a question, create a final answer.

{context}

{chat_history}
(Use only the above information to answer the question. )
Human: {human_input}
Chatbot:"""

prompt = PromptTemplate(
    input_variables=["chat_history", "human_input", "context"], 
    template=template
)
memory = ConversationBufferMemory(memory_key="chat_history", input_key="human_input")
chain_2 = load_qa_chain(llm_2, chain_type="stuff", memory=memory, prompt=prompt)

In [26]:
query = "what is the plot of Sid and Nancy?"
chain_2({"input_documents": docs, "human_input": query}, return_only_outputs=True)

<botocore.response.StreamingBody object at 0x7f6e3ce6fc40>


{'output_text': ' The plot of the movie is The relationship between Sid Vicious, bassist for British punk group Sex Pistols, and his girlfriend Nancy Spungen is portrayed.'}

In [27]:
query = "who are the actors in it?"
chain_2({"input_documents": docs, "human_input": query}, return_only_outputs=True)

<botocore.response.StreamingBody object at 0x7f6f106f65b0>


{'output_text': ' The actors in the movie are Gary Oldman, Chloe Webb, David Hayman, Debby Bishop, Andrew Schofield, Xander Berkeley, Perry Benson, Tony London, Sandy Baron, Sy Richardson.'}

In [28]:
query = "which movies were shot in Paris?"
chain_2({"input_documents": docs, "human_input": query}, return_only_outputs=True)

<botocore.response.StreamingBody object at 0x7f6f10786a00>


{'output_text': " I don't know."}

In [29]:
query = "was the movie Sid and Nancy, shot in Paris?"
chain_2({"input_documents": docs, "human_input": query}, return_only_outputs=True)

<botocore.response.StreamingBody object at 0x7f6f106ff850>


{'output_text': " I don't know."}

In [30]:
query = "what is the plot of sleepless in seattle?"
chain_2({"input_documents": docs, "human_input": query}, return_only_outputs=True)

<botocore.response.StreamingBody object at 0x7f6f106ffdf0>


{'output_text': " The plot of the movie is A recently widowed man's son calls a radio talk-show in an attempt to find his father a partner."}

In [31]:
query = "where was it shot?"
chain_2({"input_documents": docs, "human_input": query}, return_only_outputs=True)

<botocore.response.StreamingBody object at 0x7f6f106f9fa0>


{'output_text': ' The movie was shot in the following locations: New York, Chicago, Annapolis, Maryland, USA, Seattle, Illinois, New York City, Washington, Baltimore.'}

In [33]:
query = "which movies were shot in New York?"
chain_2({"input_documents": docs, "human_input": query}, return_only_outputs=True)

<botocore.response.StreamingBody object at 0x7f6f106f6550>


{'output_text': ' The movie Sid and Nancy was shot in New York.'}

In [40]:
query = "Name 4 movies that were shot in New York?"
chain_2({"input_documents": docs, "human_input": query}, return_only_outputs=True)

<botocore.response.StreamingBody object at 0x7f6f10170280>


{'output_text': ' Bell Book and Candle, Night Shift, Sid and Nancy, and Sleepless in Seattle.'}

In [39]:
query = "Which of these movies has actor Gary Oldman?"
chain_2({"input_documents": docs, "human_input": query}, return_only_outputs=True)

<botocore.response.StreamingBody object at 0x7f6f0fc20a30>


{'output_text': ' Sid and Nancy'}

In [41]:
query = "which movies were shot in Paris?"
chain_2({"input_documents": docs, "human_input": query}, return_only_outputs=True)

<botocore.response.StreamingBody object at 0x7f6f0fc200d0>


{'output_text': ' Sid and Nancy was shot in Paris.'}