In [1]:
import openai
import os

from dotenv import load_dotenv
load_dotenv()
openai.api_key  = os.getenv('OPENAI_API_KEY')  # this is not needed for langchain?

In [2]:
# prompting
def get_completion_and_token_count(
    messages, model="gpt-3.5-turbo", temperature=0, max_tokens=500
):
    # messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature,
        max_tokens=max_tokens,
    )

    content = response.choices[0].message["content"]

    token_dict = {
        "prompt_tokens": response["usage"]["prompt_tokens"],
        "completion_tokens": response["usage"]["completion_tokens"],
        "total_tokens": response["usage"]["total_tokens"],
    }

    return content, token_dict


messages = [
    {
        "role": "system",
        "content": """You are an assistant who responds in the style of Dr Seuss.""",
    },
    {
        "role": "user",
        "content": """write me a 4 sentence long poem about a happy carrot""",
    },
]
response, token_dict = get_completion_and_token_count(messages)

print(response)
print(token_dict)

In a garden so bright, a carrot grew,
With a smile so wide, and a vibrant hue.
It danced in the breeze, so full of glee,
A happy carrot, as happy as can be!

With roots in the earth, it reached for the sky,
Its leafy greens waving, saying "hi!"
It shared its joy with all who passed by,
A happy carrot, bringing smiles to the eye!

So let us remember, in life's daily grind,
To be like the carrot, with happiness entwined.
Spread joy and laughter, wherever we go,
Just like the happy carrot, all in a row!
{'prompt_tokens': 37, 'completion_tokens': 128, 'total_tokens': 165}


In [3]:
# moderation
response = openai.Moderation.create(
    input="""
Here's the plan.  We get the warhead, 
and we hold the world ransom...
...FOR ONE MILLION DOLLARS!
"""
)
moderation_output = response["results"][0]
print(moderation_output)

{
  "flagged": false,
  "categories": {
    "sexual": false,
    "hate": false,
    "harassment": false,
    "self-harm": false,
    "sexual/minors": false,
    "hate/threatening": false,
    "violence/graphic": false,
    "self-harm/intent": false,
    "self-harm/instructions": false,
    "harassment/threatening": false,
    "violence": false
  },
  "category_scores": {
    "sexual": 8.910085e-06,
    "hate": 0.00013615482,
    "harassment": 0.0023860661,
    "self-harm": 7.5545418e-06,
    "sexual/minors": 2.20487e-07,
    "hate/threatening": 7.746158e-06,
    "violence/graphic": 0.00012008196,
    "self-harm/intent": 5.9765495e-07,
    "self-harm/instructions": 3.4945369e-09,
    "harassment/threatening": 0.0015225811,
    "violence": 0.34292138
  }
}


In [None]:
# prompting with LangChain
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

template_string = """Translate the text \
that is delimited by triple backticks \
into a style that is {style}. \
text: ```{text}```
"""
customer_style = """American English \
in a calm and respectful tone
"""
customer_email = """
Arrr, I be fuming that me blender lid \
flew off and splattered me kitchen walls \
with smoothie! And to make matters worse, \
the warranty don't cover the cost of \
cleaning up me kitchen. I need yer help \
right now, matey!
"""

In [None]:
chat = ChatOpenAI(temperature=0.0)
prompt_template = ChatPromptTemplate.from_template(template_string)
customer_messages = prompt_template.format_messages(
    style=customer_style, text=customer_email
)
customer_response = chat(customer_messages)

In [None]:
print(prompt_template.messages[0].prompt, end="\n__2__\n")
print(prompt_template.messages[0].prompt.input_variables, end="\n__3__\n")
print(type(customer_messages), end="\n__4__\n")
print(type(customer_messages[0]), end="\n__5__\n")
print(customer_messages[0], end="\n__6__\n")
print(customer_messages[0].content, end="\n__7__\n")
print(customer_response.content)

In [None]:
# Prompting with LangChain - Outputing Dictionaries/json
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

gift_schema = ResponseSchema(name="gift",
                             description="Was the item purchased\
                             as a gift for someone else? \
                             Answer True if yes,\
                             False if not or unknown.")
delivery_days_schema = ResponseSchema(name="delivery_days",
                                      description="How many days\
                                      did it take for the product\
                                      to arrive? If this \
                                      information is not found,\
                                      output -1.")
price_value_schema = ResponseSchema(name="price_value",
                                    description="Extract any\
                                    sentences about the value or \
                                    price, and output them as a \
                                    comma separated Python list.")
response_schemas = [gift_schema, 
                    delivery_days_schema,
                    price_value_schema]

# this is not clear... duplicated!
review_template_2 = """\
For the following text, extract the following information:

gift: Was the item purchased as a gift for someone else? \
Answer True if yes, False if not or unknown.

delivery_days: How many days did it take for the product\
to arrive? If this information is not found, output -1.

price_value: Extract any sentences about the value or price,\
and output them as a comma separated Python list.

text: {text}

{format_instructions}
"""

customer_review = """\
This leaf blower is pretty amazing.  It has four settings:\
candle blower, gentle breeze, windy city, and tornado. \
It arrived in two days, just in time for my wife's \
anniversary present. \
I think my wife liked it so much she was speechless. \
So far I've been the only one using it, and I've been \
using it every other morning to clear the leaves on our lawn. \
It's slightly more expensive than the other leaf blowers \
out there, but I think it's worth it for the extra features.
"""

In [None]:
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

prompt = ChatPromptTemplate.from_template(template=review_template_2)
messages = prompt.format_messages(text=customer_review, 
                                format_instructions=format_instructions)
response = chat(messages)
output_dict = output_parser.parse(response.content)  # add

In [None]:
print(format_instructions, end="\n__2__\n")
print(messages[0].content, end="\n__3__\n")
print(response.content, end="\n__4__\n")
print(output_dict, end="\n__5__\n")
print(type(output_dict), end="\n__6__\n")
print(output_dict.get('delivery_days'), end="\n__7__\n")
print(output_dict)

In [None]:
# Agents with tables
from langchain.agents import create_csv_agent
# from langchain.agents import create_pandas_dataframe_agent  # might be the same
# from langchain.llms import OpenAI  # without llms

# # LangChain agents
# llm = OpenAI(temperature=0)
agent = create_csv_agent(chat, verbose=True)
# agent = create_pandas_dataframe_agent(llm, df, verbose=True)
# prompt = "Make the values more readable, store the result into 'listening.csv'"
agent.run(customer_messages[0].content)

In [None]:
# Dealing with tables
import pandas as pd
import utils

# Getting csv + df
_, ws_dfs = utils.get_worksheets("Korea - Vocabulary (raw)", ("listening occurences",))
ws_df = ws_dfs[0]
ws_df.to_csv("in.csv", index=False)
csv_df = pd.read_csv('in.csv', keep_default_na=False)

In [None]:
# merging occurences
df = csv_df.reset_index()
word_agg_f = lambda x: ' '.join(str(val) for val in x if pd.notnull(val))

grouped_df = df.groupby(['Word']).agg(word_agg_f)
grouped_df["index"] = grouped_df["index"].str.split().str[0].astype(int)
sorted_df = grouped_df.sort_values(by='index')
sorted_df = sorted_df.drop(['index'], axis=1)
sorted_df.to_csv('listening.csv')

In [None]:
# Agents with tables
from langchain.agents import create_csv_agent
from langchain.agents import create_pandas_dataframe_agent  # might be the same
from langchain.llms import OpenAI  # without llms

# # LangChain agents
# llm = OpenAI(temperature=0)
# # agent = create_csv_agent(llm, filepath, verbose=True)
# agent = create_pandas_dataframe_agent(llm, df, verbose=True)
# prompt = "Make the values more readable, store the result into 'listening.csv'"
# agent.run(prompt)

In [None]:
# RetrievalQA (not for me for now)
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.chains import RetrievalQA

In [None]:
loader = CSVLoader(file_path="out.csv", encoding="utf-8")
data = loader.load()
index_creator = VectorstoreIndexCreator(vectorstore_cls=DocArrayInMemorySearch)  # without par gives error
docsearch = index_creator.from_loaders([loader])
chain = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.vectorstore.as_retriever(), input_key="question")

def ask_question(query):
    response = chain({"question": query})
    return response['result']

In [None]:
query = "Get me country with the highest GDP."
query2 = "Create 'out22.csv', where will be 3 countries with the biggest happiness index"
print(ask_question(query2))