# Based on the LangChain for LLM Application Development short course on deeplearning.ai

In [1]:
import openai
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

# set api key and organization
openai.api_key = os.environ['OPENAI_API_KEY']
openai.organization = os.environ['OPENAI_ORG']

## Models, prompts and parsers

### Prompt completion
Here is a small example to translate the customer text email written in Hindi in American English

In [2]:
def get_completion(prompt, model='gpt-3.5-turbo'):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0
    )
    
    return response.choices[0].message["content"]

In [3]:
customer_email = """Mujhe ye smjh mein nhi aa rha ki isko kaise shi karun. \ 
Mene bht baar koshish kari lekin ye shi nhi ho rha h"""

style = """American English"""

prompt = f"""Translate the text that is delimited by triple backticks into a \
style that is {style}.
Text: ```{customer_email}```"""

print(prompt)

Translate the text that is delimited by triple backticks into a style that is American English.
Text: ```Mujhe ye smjh mein nhi aa rha ki isko kaise shi karun. \ 
Mene bht baar koshish kari lekin ye shi nhi ho rha h```


In [4]:
response = get_completion(prompt)

In [5]:
print(response)

"I'm not understanding how to fix this. I've tried many times but it's not working."


### Chat models and templates
Use prompt templates to reuse long and detailed prompts

In [6]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

In [7]:
# set temperature to 0 for reproducing responses
chat = ChatOpenAI(temperature=0.0, model='gpt-3.5-turbo')

In [8]:
template_string = """Translate the text \
that is delimited by triple backticks \
into a style that is {style}. \
text: ```{text}```
"""

In [9]:
# create a template
prompt_template = ChatPromptTemplate.from_template(template_string)
prompt_template.messages[0].prompt

PromptTemplate(input_variables=['style', 'text'], output_parser=None, partial_variables={}, template='Translate the text that is delimited by triple backticks into a style that is {style}. text: ```{text}```\n', template_format='f-string', validate_template=True)

In [10]:
# any and all input variables will be present
# in the prompt template
prompt_template.messages[0].prompt.input_variables

['style', 'text']

In [11]:
customer_style = """Hoch Deutsch"""

customer_email = """Mujhe ye smjh mein nhi aa rha \
ki isko kaise shi karun. \ 
Mene bht baar koshish kari lekin ye shi nhi ho rha h"""

In [12]:
customer_messages = prompt_template.format_messages(
    style=customer_style,
    text=customer_email
)

In [13]:
type(customer_messages), type(customer_messages[0])

(list, langchain.schema.messages.HumanMessage)

In [14]:
customer_messages[0]

HumanMessage(content='Translate the text that is delimited by triple backticks into a style that is Hoch Deutsch. text: ```Mujhe ye smjh mein nhi aa rha ki isko kaise shi karun. \\ \nMene bht baar koshish kari lekin ye shi nhi ho rha h```\n', additional_kwargs={}, example=False)

In [15]:
# get output from the model
customer_response = chat(customer_messages)
customer_response.content

'"Mujhe fällt es schwer zu verstehen, wie ich das richtig machen soll. Ich habe es viele Male versucht, aber es funktioniert nicht richtig."'

In [16]:
service_style = """Hindi"""
service_style_hinglish = """Hinglish"""

service_reply = """Ja, ich kann Ihnen dabei helfen. \
Bitte nennen Sie mir Ihren Namen und Ihre Kundennummer."""

In [17]:
service_messages = prompt_template.format_messages(
    style=service_style,
    text=service_reply
)

In [18]:
service_response = chat(service_messages)
service_response.content

'```जी हां, मैं आपकी मदद कर सकता हूँ। कृपया मुझे अपना नाम और ग्राहक संख्या बताएं।```'

In [19]:
service_messages_hinglish = prompt_template.format_messages(
    style=service_style_hinglish,
    text=service_reply
)

In [20]:
service_response_hinglish = chat(service_messages_hinglish)
service_response_hinglish.content

'```Haan, main aapki madad kar sakta hoon. Apna naam aur customer number mujhe bataiye, please.```'

### Parse output 
Provide formatting instructions of the output in the input prompt and then use a parser to parse the output. This is useful for downstream processing of the output.

In [21]:
user_request = """Give me the number of incoming and outgoing synapses \
of all the MSN cells"""

request_template = """For the text delimited by backticks, extract the \
following information:

cell_type: Which type of cell is mentioned?
property: Which property of the cell is requested? Is it organelles \
e.g mitochondria or vesicles, synapses, myelin, subcompartments e.g axon, dendrite, soma etc.
task: What to do with the requested cell and its property?
extra: Any other information e.g a partner cell, quantity, \
dataset version, etc.

Format the output as JSON with the following keys:
cell_type
property
task
extra

text: ```{text}```
"""

In [22]:
prompt_template = ChatPromptTemplate.from_template(request_template)
prompt_template

ChatPromptTemplate(input_variables=['text'], output_parser=None, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['text'], output_parser=None, partial_variables={}, template='For the text delimited by backticks, extract the following information:\n\ncell_type: Which type of cell is mentioned?\nproperty: Which property of the cell is requested? Is it organelles e.g mitochondria or vesicles, synapses, myelin, subcompartments e.g axon, dendrite, soma etc.\ntask: What to do with the requested cell and its property?\nextra: Any other information e.g a partner cell, quantity, dataset version, etc.\n\nFormat the output as JSON with the following keys:\ncell_type\nproperty\ntask\nextra\n\ntext: ```{text}```\n', template_format='f-string', validate_template=True), additional_kwargs={})])

In [23]:
messages = prompt_template.format_messages(text=user_request)
print(messages[0].content)

For the text delimited by backticks, extract the following information:

cell_type: Which type of cell is mentioned?
property: Which property of the cell is requested? Is it organelles e.g mitochondria or vesicles, synapses, myelin, subcompartments e.g axon, dendrite, soma etc.
task: What to do with the requested cell and its property?
extra: Any other information e.g a partner cell, quantity, dataset version, etc.

Format the output as JSON with the following keys:
cell_type
property
task
extra

text: ```Give me the number of incoming and outgoing synapses of all the MSN cells```



In [24]:
response = chat(messages)
print(response.content)

{
  "cell_type": "MSN",
  "property": "synapses",
  "task": "number of incoming and outgoing",
  "extra": "all"
}


In [25]:
type(response.content)

str

Though the output response looks like JSON, it is actually a string hence the different fields
cannot be accessed directly. We need to specify a parsing scheme to return it as a JSON/dict object.

In [26]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema

In [27]:
celltype_schema = ResponseSchema(name='cell_type', \
                                 description='Which type of cell is mentioned?')
property_schema = ResponseSchema(name='property', \
                                 description='Which property of the cell is requested?')
task_schema = ResponseSchema(name='task',
                             description='What to do with the requested cell and its property?')
extra_schema = ResponseSchema(name='extra',
                              description='Any other information')

schemas = [celltype_schema, property_schema, task_schema, extra_schema]
# StructuredOutputParser only accepts keyword arguments, passing 
# as a positional argument will throw an error
output_parser = StructuredOutputParser(response_schemas=schemas)
output_parser

StructuredOutputParser(response_schemas=[ResponseSchema(name='cell_type', description='Which type of cell is mentioned?', type='string'), ResponseSchema(name='property', description='Which property of the cell is requested?', type='string'), ResponseSchema(name='task', description='What to do with the requested cell and its property?', type='string'), ResponseSchema(name='extra', description='Any other information', type='string')])

In [28]:
format_instructions = output_parser.get_format_instructions()

In [29]:
print(format_instructions)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"cell_type": string  // Which type of cell is mentioned?
	"property": string  // Which property of the cell is requested?
	"task": string  // What to do with the requested cell and its property?
	"extra": string  // Any other information
}
```


In [30]:
request_template_2 = """For the text delimited by backticks, extract the \
following information:

cell_type: Which type of cell is mentioned?
property: Which property of the cell is requested? Is it organelles \
e.g mitochondria or vesicles, synapses, myelin, subcompartments e.g axon, dendrite, soma etc.
task: What to do with the requested cell and its property?
extra: Any other information e.g a partner cell, quantity, \
dataset version, etc.

{format_instructions}

text: ```{text}```
"""

In [31]:
prompt_template_2 = ChatPromptTemplate.from_template(request_template_2)
# pass all the prompt input variables as arguments
messages = prompt_template_2.format_messages(text=user_request, \
                                             format_instructions=format_instructions)
print(messages[0].content)

For the text delimited by backticks, extract the following information:

cell_type: Which type of cell is mentioned?
property: Which property of the cell is requested? Is it organelles e.g mitochondria or vesicles, synapses, myelin, subcompartments e.g axon, dendrite, soma etc.
task: What to do with the requested cell and its property?
extra: Any other information e.g a partner cell, quantity, dataset version, etc.

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"cell_type": string  // Which type of cell is mentioned?
	"property": string  // Which property of the cell is requested?
	"task": string  // What to do with the requested cell and its property?
	"extra": string  // Any other information
}
```

text: ```Give me the number of incoming and outgoing synapses of all the MSN cells```



In [32]:
response_2 = chat(messages)
print(response_2.content)

```json
{
	"cell_type": "MSN cells",
	"property": "incoming and outgoing synapses",
	"task": "Give me the number of",
	"extra": "all"
}
```


In [33]:
type(response_2.content)  # still str but now we can use the output parser we created

str

In [34]:
output_dict = output_parser.parse(response_2.content)

In [35]:
print(output_dict)
output_dict['cell_type'], output_dict['task']

{'cell_type': 'MSN cells', 'property': 'incoming and outgoing synapses', 'task': 'Give me the number of', 'extra': 'all'}


('MSN cells', 'Give me the number of')