In [None]:
from decouple import config
from langchain_community.utilities import SQLDatabase
from langchain_experimental.sql import SQLDatabaseChain
from sqlalchemy import create_engine, MetaData
from sqlalchemy.orm import sessionmaker
from operator import itemgetter

from langchain.chains import create_sql_query_chain, LLMChain
from langchain_openai import ChatOpenAI, OpenAI
from langchain_mistralai import ChatMistralAI
from langchain_core.prompts import PromptTemplate, FewShotPromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
from langchain_core.runnables import RunnablePassthrough
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.output_parsers import PydanticOutputParser, StrOutputParser, PydanticToolsParser
from langchain_core.utils.function_calling import convert_to_openai_tool


import psycopg2
import json
import boto3
from typing import Literal, Union, Optional, List
from pydantic.v1 import BaseModel, Field

In [None]:
# Setup S3 bucket connection
AWS_ACCESS_KEY_ID = config("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = config("AWS_SECRET_ACCESS_KEY")


SESSION = boto3.Session(
    aws_access_key_id=AWS_ACCESS_KEY_ID ,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
S3 = SESSION.client('s3')
BUCKET_NAME = "chichewa-ai"
USERS_FILE_KEY = 'chichewa-ai/phoso-ai-files/welcomed_users.json'


In [None]:
AWS_SECRET_ACCESS_KEY

In [None]:
def load_welcomed_users():
    """
    Loads the list of welcomed users from an S3 bucket.

    This function attempts to download and load a JSON file from an S3 bucket 
    that contains the list of phone numbers for users who have already received 
    a welcome message. If the file does not exist, the function returns an empty 
    dictionary.

    Returns
    -------
    dict
        A dictionary where the keys are user phone numbers and the values indicate 
        whether the user has been welcomed. If the S3 file does not exist, an 
        empty dictionary is returned.

    Raises
    ------
    botocore.exceptions.BotoCoreError
        If there is an error in accessing the S3 bucket, such as network issues 
        or incorrect credentials.
    """
    try:
        # Download the file from S3
        s3_response = S3.get_object(Bucket=BUCKET_NAME, Key=USERS_FILE_KEY)
        users_data = s3_response['Body'].read().decode('utf-8')
        return json.loads(users_data)
    except S3.exceptions.NoSuchKey:
        # If the file doesn't exist, return an empty dictionary
        return {}

In [None]:
def save_welcomed_users(welcomed_users):
    """
    Saves the list of welcomed users to an S3 bucket.

    This function takes a dictionary of welcomed users, converts it into a JSON string,
    and uploads it to a specified S3 bucket. The JSON file stores the phone numbers of users 
    who have already received a welcome message.

    Parameters
    ----------
    welcomed_users : dict
        A dictionary where the keys are user phone numbers and the values indicate 
        whether the user has been welcomed.

    Raises
    ------
    botocore.exceptions.BotoCoreError
        If there is an error in uploading the file to the S3 bucket, such as network issues 
        or incorrect credentials.
    """
    # Convert the dictionary to a JSON string
    users_data = json.dumps(welcomed_users)
    # Upload the JSON string to S3
    S3.put_object(Bucket=BUCKET_NAME, Key=USERS_FILE_KEY, Body=users_data)


In [None]:
from langchain import LLMChain, PromptTemplate
from langchain.llms import OpenAI

def generate_welcome_message(language):
    # Initialize the LLM (e.g., OpenAI GPT-3.5)
    llm = OpenAI(temperature=0.7)

    if language == 'Chichewa':
        prompt_text = """
        You are a helpful assistant who speaks Chichewa. Generate a warm welcome message in Chichewa.
        
        Message:
        """
    else:  # Default to English
        prompt_text = """
        You are a helpful assistant who speaks English. Generate a warm welcome message in English.
        
        Message:
        """

    prompt = PromptTemplate.from_template(prompt_text)
    chain = LLMChain(llm=llm, prompt=prompt)
    
    # Generate the welcome message
    return chain.run({})


In [None]:
from langchain import LLMChain, PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage

# Define the prompt template for the welcome message
welcome_message_prompt = PromptTemplate.from_template(
    """
    You are a helpful assistant who interacts with users in their preferred language. 
    Your task is to greet the user in their language and briefly inform them about the type of information you can provide.

    Specifically, you provide the following types of information:
    - Prices for food and other agricultural commodities (e.g., Maize, rice, soy beans).
    - Agricultural production details (e.g., Maize, Tobacco).
    - The situation of food security (e.g., how many people are lacking food).
    - All information is specific to Malawi.

    Greet the user and let them know that they can ask questions about these topics. Please generate the welcome message in the user's language.

    Here are some examples:
    
    Example 1:
    Text: "Hello, how are you?"
    Language: English
    Welcome Message: "Welcome! I can help you with information about food prices, agricultural production, and food security in Malawi. How can I assist you today?"

    Example 2:
    Text: "Moni, muli bwanji?"
    Language: Chichewa
    Welcome Message: "Takulandirani! Ndikhoza kukuthandizani ndi zambiri zokhudza mitengo ya chakudya, ulimi, ndi chitetezo cha chakudya ku Malawi. Kodi ndingakuthandizeni bwanji lero?"

    Now, based on the user's input, detect the language and generate a suitable welcome message.
    
    Text: "{text_to_detect}"
    Language:
    Welcome Message:
    """
)

# Initialize the chat-based model (e.g., GPT-3.5-turbo)
llm = ChatOpenAI(temperature=0.7)

# Create the LLMChain with the prompt
welcome_message_chain = LLMChain(llm=llm, prompt=welcome_message_prompt)

# Generate the welcome message
def generate_welcome_message(user_message):
    return welcome_message_chain.run({"text_to_detect": user_message})

# Example usage
user_message = "Moni, muli bwanji?"
welcome_message = generate_welcome_message(user_message)
print(welcome_message)


In [None]:
users = load_welcomed_users()

In [None]:
users['+12023726721'] = True
users['+265999392358'] = True


In [None]:
save_welcomed_users(welcomed_users=users)

In [None]:
welcomed_users = load_welcomed_users()

In [None]:
welcomed_users

In [None]:
def detect_language(text, llm=None):
    
    language_detection_prompt = PromptTemplate.from_template(
    """
    You are a language detection expert. Your task is to identify the language of the given text accurately.
    Respond with only the name of the language (e.g., "English", "Chichewa", "Spanish", etc.).

    Here are some examples:
    
    Example 1:
    Text: "Hello, how are you?"
    Language: English
    
    Example 2:
    Text: "Moni, muli bwanji?"
    Language: Chichewa
    
    Example 3:
    Text: "ndikuti kukupezeka nyemba zambiri"
    Language: Chichewa
    
    Now, identify the language for the following text:
    
    Text: "{text_to_detect}"
    Language:
    """)

    if not llm:
        llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, openai_api_key=OPENAI_API_KEY)

    # Create the LLMChain with the prompt
    language_detection_chain = language_detection_prompt | llm | StrOutputParser()

    # Detect the language
    detected_language = language_detection_chain.invoke({"text_to_detect": text})

    return detected_language

In [None]:
def generate_welcome_message(user_message, llm=None):
    
    # Define the prompt template for the welcome message
    welcome_message_prompt = PromptTemplate.from_template(
    """
    You are a helpful assistant who interacts with users in their preferred language. 
    Your task is to greet the user in their language and briefly inform them about the type of information you can provide.

    Specifically, you provide the following types of information:
    - Prices for food and other agricultural commodities (e.g., Maize, rice, soy beans).
    - Agricultural production details (e.g., Maize, Tobacco).
    - The situation of food security (e.g., how many people are lacking food).
    - All information is specific to Malawi.

    Greet the user and let them know that they can ask questions about these topics. 
    
    Here are some examples:
    
    Example 1:
    Text: "Hello, how are you?"
    Welcome Message: "Welcome! I can help you with information about food prices, agricultural production, and food security in Malawi. For example, you can ask: 'What is the current price of maize?' or 'How much maize was produced last year?How can I assist you today?'"
    
    Example 2:
    Text: "Moni, muli bwanji?"
    Welcome Message: "Takulandirani! Ndikhoza kukuthandizani ndi zambiri zokhudzana ndi mitengo ya chakudya, zokolola, zaulimi komanso zokhudzana ndi zanjala mmene ilili ku Malawi. Mwachitsanzo mutha kufunsa kuti: "Kodi chimanga chili pa bwanji ku Kasungu?"
    'Kodi ndikuti kunakololedwa mtedza wambiri?' kapena 'Kodi ndikuti kunakololedwa mtedza wambiri?'. Kodi ndingakuthandizeni bwanji lero?"
    

    Now, based on the user's input, generate a suitable welcome message in the same language.

    Text: "{text_to_detect}"
    Welcome Message:
    Example Questions:
    1. 
    2. 
    """)

    # Initialize the chat-based model (e.g., GPT-3.5-turbo)
    if not llm:
        llm = ChatOpenAI(temperature=0.7)

    # Create the LLMChain with the prompt
    welcome_message_chain = welcome_message_prompt | llm | StrOutputParser()

    return welcome_message_chain.invoke({"text_to_detect": user_message})


In [None]:
user_question_en = "Whats the price of Maize"
user_question_ny = "Kodi ndi boma liti anakolola chimanga chambiri pakati pa Lilongwe kapena Kasungu?"
response = generate_welcome_message(user_message=user_question_en)

In [None]:
users = {"whatsapp:+12022948588": True}
number = "whatsapp:+12023923333"
number2 = "whatsapp:+12022948588"

if number2 not in users:
    print("Y")


In [None]:
from utils import translate_text_openai, detect_language_with_langchain

In [None]:
user_question = "Kodi ndi boma liti anakolola chimanga chambiri pakati pa Lilongwe kapena Kasungu?"
response = translate_text_openai(text=user_question, 
                                 source_language="Chichewa", target_language="English")

In [1]:
import warnings
warnings.filterwarnings("ignore")

import logging

# Set the logging level for the `httpx` logger to WARNING to suppress INFO logs
logging.getLogger("httpx").setLevel(logging.WARNING)

# You can also suppress other loggers if necessary
logging.getLogger("openai").setLevel(logging.WARNING)
logging.getLogger("langchain").setLevel(logging.WARNING)

# Set the logging level for langsmith.client to ERROR to suppress warnings
logging.getLogger("langsmith.client").setLevel(logging.ERROR)

from sql_chain import process_sql_query

In [None]:
questions = questions = ["What is the price of Maize in Rumphi",
                 "Where can I find the cheapest maize?",
                    "Which district harvested the most beans?",
                    "How much is Maize in Zomba?",
                    "Which district produced more Tobacco, Mchinji or Kasungu?",
                    "Where can I get bananas?", "Kodi chimanga chotchipa ndingachipeze kuti?",
                    "Ndi boma liti komwe anakolola nyemba zambiri?",
                    "Ku Zomba chimanga akugulitsa pa bwanji?",
                    "Kodi ndi boma liti anakolola chimanga chambiri pakati pa Lilongwe kapena Kasungu?",
                    "Ndikuti ndingapeze mpunga wambiri?"]

for q in questions:
    print()
    print("QUESTION:", q)
    response= process_sql_query(q)
    print("LLM Response:", response)

In [None]:
from utils import translate_text_openai

In [None]:
translate_text_openai("You can find a lot of rice in Karonga, Nkhotakota, Nkhata, Salima, and Dedza.", 
                                                    source_language="English",
                                                    target_language="Chichewa")

In [None]:
import json

In [None]:
def load_translation_examples(file_path, source_language, target_language):
    """
    Loads and formats translation examples from a JSON file.

    Parameters
    ----------
    file_path : str
        The path to the JSON file containing translation examples.
    source_language : str
        The source language (e.g., "Chichewa").
    target_language : str
        The target language (e.g., "English").

    Returns
    -------
    list
        A list of formatted translation examples.
    """
    with open(file_path, 'r') as file:
        examples = json.load(file)
    
    key = f"{source_language}-{target_language}"
    if key in examples:
        return examples[key]
    else:
        raise ValueError(f"Translation examples for {source_language} to {target_language} not found.")

In [None]:
ex = load_translation_examples(file_path="translation_examples.json", 
                          source_language="Chichewa", target_language="English")

In [None]:
from langchain import LLMChain, PromptTemplate
from langchain.chat_models import ChatOpenAI

def translate_text_openai(text, source_language, target_language, examples):
    """
    Translates the given text from the source language to the target language using an LLM with few-shot examples.

    Parameters
    ----------
    text : str
        The text to be translated.
    source_language : str
        The source language of the text.
    target_language : str
        The language into which the text should be translated.
    examples : list
        A list of few-shot translation examples.

    Returns
    -------
    str
        The translated text.
    """
    # Construct the prompt template using examples
    example_prompts = "\n".join([f'{source_language}: "{ex[source_language]}"\n{target_language}: "{ex[target_language]}"' for ex in examples])

    prompt_template = PromptTemplate.from_template(
        f"""
        You are a professional translator who specializes in translating text from {source_language} to {target_language}.
        Given the following examples, translate the provided text.

        Examples:
        {example_prompts}

        Now, translate the following:

        {source_language}: "{{text}}"
        {target_language}:
        """
    )

    # Initialize the chat-based model
    llm = ChatOpenAI(temperature=0.7, model="gpt-4o")

    # Create the LLMChain for translation
    translation_chain = LLMChain(llm=llm, prompt=prompt_template)

    # Perform the translation
    return translation_chain.run({"text": text})


In [None]:
ex1 = "You can find a lot of beans in Karonga, Nkhotakota, Nkhata, Salima, and Dedza."
ex2 = "Ndi boma liti komwe anakolola nyemba zambiri?"

In [None]:
# Example usage
file_path = 'translation_examples.json'
source_language = "English"
target_language = 'Chichewa'
examples = load_translation_examples(file_path, source_language, target_language)
translated_text = translate_text_openai(
    text=ex1, 
    source_language=source_language, 
    target_language=target_language,
    examples=examples
)
print("Translated Question:==>", translated_text)


In [None]:


translate_text_openai("You can find a lot of rice in Karonga, Nkhotakota, Nkhata, Salima, and Dedza.", 
                                                    source_language="English",
                                                    target_language="Chichewa")

In [None]:
from langchain import LLMChain, PromptTemplate
from langchain.chat_models import ChatOpenAI

def translate_to_chichewa(text):
    translation_prompt = PromptTemplate.from_template(
        """
        You are a professional translator who specializes in translating English sentences to Chichewa. 
        Translate the following English sentence into Chichewa, ensuring the translation is accurate and maintains the original meaning.

        Example:
        - English: "I love eating nsima."
        - Chichewa: "Ndimakonda kudya nsima."
        - English: "Where can I find cheap beans?."
        - Chichewa: "Nyemba zotchipa zikupezeka kuti?."

        Now, translate the following sentence:
        - English: "{text}"
        - Chichewa:
        """
    )
    # Initialize the chat-based model
    llm = ChatOpenAI(temperature=0.7, model='gpt-4')

    # Create the LLMChain
    translation_chain = LLMChain(llm=llm, prompt=translation_prompt)

    # Perform the translation
    return translation_chain.run({"text": text})

# Example usage
english_sentence = "where can I get beans?"
chichewa_translation = translate_to_chichewa(english_sentence)
print(chichewa_translation)


In [None]:
llm = ChatOpenAI(temperature=0.7, model='gpt-4')

In [None]:
llm.model_name

In [None]:
quest_lan = detect_language_with_langchain(text='How are you?')

In [None]:
detect_language(text="Kodi ndi boma liti anakolola chimanga chambiri pakati pa Lilongwe kapena Kasungu?")

In [None]:
OPENAI_API_KEY = config("OPENAI_API_KEY")
MISTRAL_API_KEY = config("MISTRAL_API_KEY")
DB_USER = config('DB_USER')
DB_PASSWORD = config('DB_PASSWORD')
DB_HOST = 'localhost'
DB_PORT = '5432'
DB_NAME = "food_security"
SAMPLE_QUESTIONS = {"low-birthweight": "Which region has the highest number of children born with low birth weights?",
                    "vaccine_rates": "Which vaccine has the lowest vaccination percentage?",
                    "vaccine_rates_all": "What percentage of children received all vaccines before 12 months",
                    "kids_in_sch": "Whats average percentage of children who are in preschool",
                    "vaccines": "which vaccines did children get in Tunisia?"
                    }
                    
# Create the database URL
DATABASE_URL = f'postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
FILE_SQL_EXAMPLES_EN = "sql_examples_en.json"
USE_BEST_MATCHING_COLUMNS = False

# LangChain Evals

## Get the data

In [None]:
import pandas as pd
from tqdm import tqdm
import requests
import io

# Dataset URL:
url = "https://storage.googleapis.com/oreilly-content/transaction_data_with_expanded_descriptions.csv"

# Download the file from the URL:
downloaded_file = requests.get(url)

# Load the transactions dataset and only look at 20 transactions:
df = pd.read_csv(io.StringIO(downloaded_file.text))[:20]
df.head()

## Define the Mistral Model

In [None]:
model = ChatMistralAI(model="mistral-small", mistral_api_key=MISTRAL_API_KEY)

## Define the prompt

In [None]:
# 2. Define the prompt:
system_prompt = """You are are an expert at analyzing
bank transactions, you will be categorizing a single
transaction.
Always return a transaction type and category:
do not return None.
Format Instructions:
{format_instructions}"""

user_prompt = """Transaction Text:
{transaction}"""


prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            system_prompt,
        ),
        (
            "user",
            user_prompt,
        ),
    ]
)

### Define the pydantic model

In [None]:
# 3. Define the pydantic model:
class EnrichedTransactionInformation(BaseModel):
    transaction_type: Union[
        Literal["Purchase", "Withdrawal", "Deposit",
        "Bill Payment", "Refund"], None
    ]
    transaction_category: Union[
        Literal["Food", "Entertainment", "Transport",
        "Utilities", "Rent", "Other"],
        None,
    ]

# 4. Define the output parser:

In [None]:
output_parser = PydanticOutputParser(
    pydantic_object=EnrichedTransactionInformation)

In [None]:
# 5. Define a function to try to fix and remove the backslashes:
def remove_back_slashes(string):
    # double slash to escape the slash
    cleaned_string = string.replace("\\", "")
    return cleaned_string

### Create an LCEL chain that fixes the formatting:

In [None]:
# 6. 
chain = prompt | model | StrOutputParser() | remove_back_slashes | output_parser

## Invoke the chain for a single instance of the dataframe

In [None]:
transaction = df.iloc[0]["Transaction Description"]
result = chain.invoke(
        {
            "transaction": transaction,
            "format_instructions": \
            output_parser.get_format_instructions(),
        }
    )

## Invoke the chain on the whole dataframe

In [None]:
results = []

for i, row in tqdm(df.iterrows(), total=len(df)):
    transaction = row["Transaction Description"]
    try:
        result = chain.invoke(
            {
                "transaction": transaction,
                "format_instructions": output_parser.get_format_instructions(),
            }
        )
        print(result.transaction_type, result.transaction_category)
    except Exception as e:
        print(e)
        result = EnrichedTransactionInformation(
            transaction_type=None,
            transaction_category=None
        )

    results.append(result)

In [None]:
# 8. Add the results to the dataframe, as columns transaction type and
# transaction category:
transaction_types = []
transaction_categories = []

for result in results:
    transaction_types.append(result.transaction_type)
    transaction_categories.append(
        result.transaction_category)

df["mistral_transaction_type"] = transaction_types
df["mistral_transaction_category"] = transaction_categories
df.head()

## Create alternate/ground truth with GPT4

In [None]:
model_gpt4 = ChatOpenAI(model="gpt-4o")
chain_gpt = prompt | model_gpt4 | StrOutputParser() | output_parser

In [None]:
df["gpt4_transaction_type"] = None
df["gpt4_transaction_category"] = None

In [None]:
for i, row in tqdm(df.iterrows(), total=len(df)):
    transaction = row["Transaction Description"]
    try:
        result = chain_gpt.invoke(
            {
                "transaction": transaction,
                "format_instructions": output_parser.get_format_instructions(),
            }
        )
        df.loc[i, "gpt4_transaction_type"] = result.transaction_type
        df.loc[i, "gpt4_transaction_category"] = result.transaction_category
    except Exception as e:
        print(e)
        result = EnrichedTransactionInformation(
            transaction_type=None,
            transaction_category=None
        )


## Function Calling in LangChain

In [None]:
class Article(BaseModel):
    """Identifying key points and contrarian views in an article."""

    points: str = Field(..., description="Key points from the article")
    contrarian_points: Optional[str] = Field(
        None, description="Any contrarian points acknowledged in the article"
    )
    author: Optional[str] = Field(None, description="Author of the article")

_EXTRACTION_TEMPLATE = """Extract and save the relevant entities mentioned \
in the following passage together with their properties.

If a property is not present and is not required in the function parameters,
do not include it in the output."""

# Create a prompt telling the LLM to extract information:
prompt = ChatPromptTemplate.from_messages(
    {("system", _EXTRACTION_TEMPLATE), ("user", "{input}")}
)

pydantic_schemas = [Article]

# Convert Pydantic objects to the appropriate schema:
tools = [convert_to_openai_tool(p) for p in pydantic_schemas]

# Give the model access to these tools:
model = model_gpt4.bind_tools(tools=tools)

# Create an end to end chain:
chain = prompt | model | PydanticToolsParser(tools=pydantic_schemas)

result = chain.invoke(
    {
        "input": """In the recent article titled 'AI adoption in industry,'
        key points addressed include the growing interest ... However, the
        author, Dr. Jane Smith, ..."""
    }
)
print(result)


## Query Planning

In [None]:
class Query(BaseModel):
    id: int
    question: str
    dependencies: List[int] = Field(
        default_factory=list,
        description="""A list of sub-queries that must be completed before
        this task can be completed.
        Use a sub query when anything is unknown and we might need to ask
        many queries to get an answer.
        Dependencies must only be other queries."""
    )

class QueryPlan(BaseModel):
    query_graph: List[Query]

In [None]:
# Set up a chat model:
model = ChatOpenAI()

# Set up a parser:
parser = PydanticOutputParser(pydantic_object=QueryPlan)

template = """Generate a query plan. This will be used for task execution.

Answer the following query: {query}

Return the following query graph format:
{format_instructions}
"""
system_message_prompt = SystemMessagePromptTemplate.from_template(template)
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt])

# Create the LCEL chain with the prompt, model, and parser:
chain = chat_prompt | model | parser

result = chain.invoke({
"query":'''I want to get the results from my database. Then I want to find
out what the average age of my top 10 customers is. Once I have the average
age, I want to send an email to John. Also I just generally want to send a
welcome introduction email to Sarah, regardless of the other tasks.''',
"format_instructions":parser.get_format_instructions()})

print(result.query_graph)

In [None]:
system_template = '''You are helpful assistant who can translate from {input_language} to {output_language}.'''
user_template = "User text: {user_text}"

prompt = ChatPromptTemplate.from_messages(
    [("system",system_template),("user", user_template)])
question = "Ndimakudonda kwambiri"
src_lan = "Chichewa"
dest_lan = "English"

chat = ChatOpenAI()
chain = prompt | chat | StrOutputParser()
response = chain.invoke({"input_language": "Chichewa", "output_language": "English", "user_text": question})

## Creating Few-Shot Prompt Templates

In [None]:
examples = [
    {
        "question": "What is the capital of France?",
        "answer": "Paris",
    },
    {
        "question": "What is the capital of Spain?",
        "answer": "Madrid",
    } 
]

In [None]:
example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{question}"),
        ("ai", "{answer}"),
    ]
)

# Prompt Chaining

In [None]:
from langchain_core.prompts.chat import ChatPromptTemplate

character_generation_prompt = ChatPromptTemplate.from_template(
    """I want you to brainstorm three to five characters for my short story. The
    genre is {genre}. Each character must have a Name and a Biography.
    You must provide a name and biography for each character, this is very
    important!
    ---
    Example response:
    Name: CharWiz, Biography: A wizard who is a master of magic.
    Name: CharWar, Biography: A warrior who is a master of the sword.
    ---
    Characters: """
)

plot_generation_prompt = ChatPromptTemplate.from_template(
    """Given the following characters and the genre, create an effective
    plot for a short story:
    Characters:
    {characters}
    ---
    Genre: {genre}
    ---
    Plot: """
    )

scene_generation_plot_prompt = ChatPromptTemplate.from_template(
    """Act as an effective content creator.
    Given multiple characters and a plot, you are responsible for
    generating the various scenes for each act.

    You must decompose the plot into multiple effective scenes:
    ---
    Characters:
    {characters}
    ---
    Genre: {genre}
    ---
    Plot: {plot}
    ---
    Example response:
    Scenes:
    Scene 1: Some text here.
    Scene 2: Some text here.
    Scene 3: Some text here.
    ----
    Scenes:
    """
)

In [None]:
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough

chain = RunnablePassthrough() | {
    "genre": itemgetter("genre"),
  }

chain.invoke({"genre": "fantasy"})
# {'genre': 'fantasy'}


In [None]:
from langchain_openai.chat_models import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

# Create the chat model:
model = ChatOpenAI()

# Create the subchains:
character_generation_chain = ( character_generation_prompt
| model
| StrOutputParser() )

plot_generation_chain = ( plot_generation_prompt
| model
| StrOutputParser() )

scene_generation_plot_chain = ( scene_generation_plot_prompt
| model
| StrOutputParser()  )

In [None]:
from langchain_core.runnables import RunnableParallel
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough

master_chain = (
    {"characters": character_generation_chain, "genre":
    RunnablePassthrough()}
    | RunnableParallel(
        characters=itemgetter("characters"),
        genre=itemgetter("genre"),
        plot=plot_generation_chain,
    )
    | RunnableParallel(
        characters=itemgetter("characters"),
        genre=itemgetter("genre"),
        plot=itemgetter("plot"),
        scenes=scene_generation_plot_chain,
    )
)

story_result = master_chain.invoke({"genre": "Fantasy"})

In [None]:
master_chain = (RunnablePassthrough() |
    {"characters": character_generation_chain, "genre":
    itemgetter("genre")}
    | {
        "characters": itemgetter("characters"),
        "genre": itemgetter("genre"),
        "plot": plot_generation_chain,
    }
    | {
        "characters": itemgetter("characters"),
        "genre": itemgetter("genre"),
        "plot": itemgetter("plot"),
        "scenes": scene_generation_plot_chain,

    }
)

story_result = master_chain.invoke({"genre": "Fantasy"})

In [None]:
# Extracting the scenes using .split('\n') and removing empty strings:
scenes = [scene for scene in story_result["scenes"].split("\n") if scene]
generated_scenes = []
previous_scene_summary = ""

character_script_prompt = ChatPromptTemplate.from_template(
    template="""Given the following characters: {characters} and the genre:
    {genre}, create an effective character script for a scene.

    You must follow the following principles:
    - Use the Previous Scene Summary: {previous_scene_summary} to avoid
    repeating yourself.
    - Use the Plot: {plot} to create an effective scene character script.
    - Currently you are generating the character dialogue script for the
    following scene: {scene}

    ---
    Here is an example response:
    SCENE 1: ANNA'S APARTMENT

    (ANNA is sorting through old books when there is a knock at the door.
    She opens it to reveal JOHN.)
    ANNA: Can I help you, sir?
    JOHN: Perhaps, I think it's me who can help you. I heard you're
    researching time travel.
    (Anna looks intrigued but also cautious.)
    ANNA: That's right, but how do you know?
    JOHN: You could say... I'm a primary source.

    ---
    SCENE NUMBER: {index}

    """,
)

summarize_prompt = ChatPromptTemplate.from_template(
    template="""Given a character script, create a summary of the scene.
    Character script: {character_script}""",
)

In [None]:
# Loading a chat model:
model = ChatOpenAI(model='gpt-3.5-turbo-16k')

# Create the LCEL chains:
character_script_generation_chain = (
    {
        "characters": RunnablePassthrough(),
        "genre": RunnablePassthrough(),
        "previous_scene_summary": RunnablePassthrough(),
        "plot": RunnablePassthrough(),
        "scene": RunnablePassthrough(),
        "index": RunnablePassthrough(),
    }
    | character_script_prompt
    | model
    | StrOutputParser()
)

summarize_chain = summarize_prompt | model | StrOutputParser()

# You might want to use tqdm here to track the progress,
# or use all of the scenes:
for index, scene in enumerate(scenes[0:3]):

    # # Create a scene generation:
    scene_result = character_script_generation_chain.invoke(
        {
            "characters": story_result["characters"],
            "genre": "fantasy",
            "previous_scene_summary": previous_scene_summary,
            "index": index,
        }
    )

    # Store the generated scenes:
    generated_scenes.append(
        {"character_script": scene_result, "scene": scenes[index]}
    )

    # If this is the first scene then we don't have a
    # previous scene summary:
    if index == 0:
        previous_scene_summary = scene_result
    else:
        # If this is the second scene or greater then
        # we can use and generate a summary:
        summary_result = summarize_chain.invoke(
            {"character_script": scene_result}
        )
        previous_scene_summary = summary_result


In [None]:
from langchain_core.prompts.chat import ChatPromptTemplate
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough, RunnableLambda

bad_first_input = {
    "film_required_age": itemgetter("age"),
}

prompt = ChatPromptTemplate.from_template(
    "Generate a film title, the age is {film_required_age}"
)

# This will error:
bad_chain = bad_first_input | prompt

In [None]:
df = pd.DataFrame(generated_scenes)

In [None]:
all_character_script_text = "\n".join(df.character_script.tolist())

In [None]:
from langchain_text_splitters import CharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=1500, chunk_overlap=200
)
docs = text_splitter.create_documents([all_character_script_text])

In [None]:
chain = translation_template | chat | StrOutputParser()

In [None]:
response = chain.invoke({"input_language": "Chichewa", "output_language": "English",
               "messages": [HumanMessage(content=question)]})

In [None]:
OPENAI_API_KEY = config("OPENAI_API_KEY")
MISTRAL_API_KEY = config("MISTRAL_API_KEY")
DB_USER = config('DB_USER')
DB_PASSWORD = config('DB_PASSWORD')
DB_HOST = 'localhost'
DB_PORT = '5432'
DB_NAME = "food_security"
SAMPLE_QUESTIONS = {"low-birthweight": "Which region has the highest number of children born with low birth weights?",
                    "vaccine_rates": "Which vaccine has the lowest vaccination percentage?",
                    "vaccine_rates_all": "What percentage of children received all vaccines before 12 months",
                    "kids_in_sch": "Whats average percentage of children who are in preschool",
                    "vaccines": "which vaccines did children get in Tunisia?"
                    }
                    
# Create the database URL
DATABASE_URL = f'postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}'
FILE_SQL_EXAMPLES_EN = "sql_examples_en.json"
USE_BEST_MATCHING_COLUMNS = False

## Prompt Templates

### Create Prompt to Select best Table

In [None]:
def connect_to_database(database_url=DATABASE_URL):
    """Connects to a postgreSQL


    Parameters
    ----------
    database_url : String
        postgreSQL database connection URL, by default DATABASE_URL
    """
    # conn = psycopg2.connect(f"dbname={DB_NAME} user={DB_USER} password={DB_PASSWORD}")
    conn = psycopg2.connect(database_url)

    cur = conn.cursor()

    # Query to get table names and column names
    cur.execute("SELECT table_name, description FROM table_metadata")
    tables = cur.fetchall()

    cur.execute("SELECT table_name, column_name, description FROM column_metadata")
    columns = cur.fetchall()

    cur.close()
    conn.close()

    return tables, columns

In [None]:
def find_best_table_prompt(user_query, tables, columns, 
                           return_chain=True, llm=None):# Define the template for selecting the best table
    template = """
    You are a database assistant. Given the following tables and columns with their descriptions, select the best table that matches the user's query.

    Tables and Columns:
    {table_info}

    User Query:
    {user_query}

    Provide the output in the following JSON format:
    {{
        "best_matching_table": {{
            "table_name": "<best_table_name>",
            "description": "<best_table_description>"
        }}
    }}
    """
    # Prepare the table_info string including descriptions for each table and its columns
    table_info = ""
    for table in tables:
        table_name, table_description = table
        table_info += f"Table: {table_name} - {table_description}\n"
        table_columns = [col for col in columns if col[0] == table_name]
        for column in table_columns:
            _, column_name, column_description = column
            table_info += f"    Column: {column_name} - {column_description}\n"
        table_info += "\n"

    # Create the PromptTemplate
    prompt_template = PromptTemplate(
        template=template,
        input_variables=["table_info", "user_query"]
    )

    # Format the template 
    formatted_prompt = prompt_template.format(table_info=table_info, user_query=user_query)

    if return_chain:
        # Create the chain using the ChatOpenAI model and the PromptTemplate
        chain = LLMChain(llm=llm,prompt=prompt_template)
        return chain, {"table_info": table_info, "user_query": user_query}

    return formatted_prompt


In [None]:
def get_columns_info(table_name, columns):
    columns_info = ""
    for column in columns:
        table, column_name, column_description = column
        if table == table_name:
            columns_info += f"    Column: {column_name} - {column_description}\n"
    return columns_info


In [None]:
def find_best_columns_prompt(user_query, best_matching_table, columns, 
                       return_chain=True, llm=None):
    # Define the template for selecting the relevant columns
    column_template = """
    You are a database assistant. Given the following columns for the table '{table_name}', select the columns that are most relevant to the user's query.

    Table Description: {table_description}

    Columns:
    {columns_info}

    User Query:
    {user_query}

    Relevant Columns:
    """

    columns_info = get_columns_info(best_matching_table["table_name"], columns)

    # Create the PromptTemplate for column selection
    column_prompt_template = PromptTemplate(
        template=column_template,
        input_variables=["table_name", "table_description", "columns_info", "user_query"]
    )

    # Example usage of the template with a user query
    formatted_column_prompt = column_prompt_template.format(
        table_name=best_matching_table["table_name"],
        table_description=best_matching_table["description"],
        columns_info=columns_info,
        user_query=user_query
    )

    # Prepare the context for running the chain
    context = {
        "table_name": best_matching_table["table_name"],
        "table_description": best_matching_table["description"],
        "columns_info": columns_info,
        "user_query": user_query}

    if return_chain:
        chain = LLMChain(llm=llm,prompt=column_prompt_template)
        return chain, context

    return formatted_column_prompt


In [None]:
def load_sql_examples(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return json.load(file)

In [None]:
def create_sql_prompt(examples, best_matching_table, columns_metadata, use_best_matching_columns=False):
    """
    Creates a FewShotPromptTemplate for generating SQL queries based on table and column metadata.

    This function generates a prompt template that includes detailed information about the table and its columns.
    The generated prompt instructs a language model (LLM) to create a syntactically correct SQL query based on
    user input. If the table contains a date column and the user does not specify a date, the prompt also instructs
    the LLM to retrieve the most recent data available.

    Parameters
    ----------
    examples : list of dict
        A list of example inputs and corresponding SQL queries. Each example should be a dictionary with 'input' and 'query' keys.
    best_matching_table : dict
        A dictionary containing the best matching table information with 'table_name' and 'description' keys.
    columns_metadata : list of tuples
        A list of tuples containing columns metadata. Each tuple should include 'table_name', 'column_name', and 'description'.
    use_best_matching_columns : bool, optional
        A flag indicating whether to use only the best-matching columns (if True) or all columns in the table (if False). Default is True.

    Returns
    -------
    sql_prompt : FewShotPromptTemplate
        A FewShotPromptTemplate object that can be used with an LLM to generate SQL queries.
    """
    # Prepare table_info string based on the best matching table and columns
    table_info = f"Table: {best_matching_table['table_name']} - {best_matching_table['description']}\n"
    columns_info = "Columns:\n"
    has_date_column = False

    # Determine which columns to use: best-matching or all columns
    if use_best_matching_columns:
        # If using best_matching_columns, use those provided (filtering columns_metadata based on matching logic)
        columns_to_use = columns_metadata  # Assuming columns_metadata is already filtered
    else:
        # Use all columns for the given table from columns_metadata
        columns_to_use = [col for col in columns_metadata if col[0] == best_matching_table['table_name']]

    # Construct the columns_info string
    for column in columns_to_use:
        table_name, column_name, column_description = column
        columns_info += f"    Column: {column_name} - {column_description}\n"
        if 'date' in column_name.lower():
            has_date_column = True

    # Create FewShot Prompt with instructions for handling most recent data
    example_prompt = PromptTemplate.from_template("User input: {input}\nSQL query: {query}")

    # Add a special instruction if the table has a date column
    recent_data_instruction = (
        "If the user does not specify a date, retrieve the most recent data available by ordering the results "
        "by the date column in descending order."
    ) if has_date_column else ""

    # Combine table_info and columns_info in the prompt
    sql_prompt = FewShotPromptTemplate(
        examples=examples,
        example_prompt=example_prompt,
        prefix=(
            "You are a PostgreSQL expert. Given an input question, create a syntactically correct PostgreSQL query to run. "
            "Unless otherwise specified, do not return more than {top_k} rows.\n\n"
            f"Here is the relevant table information:\n{table_info}\n\n"
            f"Here is the relevant columns information:\n{columns_info}\n\n"
            f"{recent_data_instruction}\n\n"
            "Below are a number of examples of questions and their corresponding SQL queries."
        ),
        suffix="User input: {input}\nSQL query: ",
        input_variables=["input", "top_k"],
    )

    return sql_prompt




In [None]:
# def create_sql_prompt(examples, best_matching_table, best_matching_columns):
#     # Prepare table_info string based on the best matching table and columns
#     table_info = f"Table: {best_matching_table['table_name']} - {best_matching_table['description']}\n"
#     has_date_column = False

#     for column in best_matching_columns:
#         table_info += f"    Column: {column['name']} - {column['description']}\n"
#         if 'date' in column['name'].lower():
#             has_date_column = True

#     # Create FewShot Prompt with instructions for handling most recent data
#     example_prompt = PromptTemplate.from_template("User input: {input}\nSQL query: {query}")

#     # Add a special instruction if the table has a date column
#     if has_date_column:
#         recent_data_instruction = (
#             "If the user does not specify a date, retrieve the most recent data available by ordering the results "
#             "by the date column in descending order."
#         )
#     else:
#         recent_data_instruction = ""

#     sql_prompt = FewShotPromptTemplate(
#         examples=examples,
#         example_prompt=example_prompt,
#         prefix=(
#             "You are a PostgreSQL expert. Given an input question, create a syntactically correct PostgreSQL query to run. "
#             "Unless otherwise specified, do not return more than {top_k} rows.\n\n"
#             f"Here is the relevant table info: {table_info}\n\n"
#             f"{recent_data_instruction}\n\n"
#             "Below are a number of examples of questions and their corresponding SQL queries."
#         ),
#         suffix="User input: {input}\nSQL query: ",
#         input_variables=["input", "top_k"],
#     )

#     return sql_prompt



In [None]:
def create_answer_chain(llm):
    # Define the prompt template with emphasis on including units, time-specific details, and using the latest data when time is not specified
    answer_prompt = PromptTemplate.from_template(
        """
        You are a knowledgeable assistant. Given the following user question and SQL result, answer the question accurately.
        
        Always ensure to:
        1. Include appropriate units in your answer (e.g., Kwacha per kg, liters, etc.).
        2. Specify the time period or date if the question implies or explicitly asks for it.
        3. If the user does not specify a time, provide the most recent information available in the database and clearly state that this is the latest data.

        For example, if the user asks "What's the price of Maize?", your answer should include the price with the correct unit and mention that this is the most recent price, e.g., "The most recent price of Maize is 60 Kwacha per kg."
        If the user asks about a specific time period, such as "What's the price of Maize for May 2024?", include the time in your answer, e.g., "The price of Maize in May 2024 is 60 Kwacha per kg."

        Question: {question}
        SQL Result: {result}
        Answer: """
    )

    # Combine the prompt with the LLM and output parser to form the answer chain
    return LLMChain(llm=llm, prompt=answer_prompt)


In [None]:
def run_sql_chain(user_question, best_table_info, columns_info, 
                  best_columns=None, llm=None):
    
    # Load examples and create prompts
    examples = load_sql_examples(file_path=FILE_SQL_EXAMPLES_EN)
    if USE_BEST_MATCHING_COLUMNS:
        sql_prompt = create_sql_prompt(examples=examples, best_matching_table=best_table_info, 
                                   columns_metadata=columns_info, 
                                   use_best_matching_columns=True)
    else:
        sql_prompt = create_sql_prompt(examples=examples, best_matching_table=best_table_info, 
                                   columns_metadata=columns_info)
    
    # Initialize LLM and other components
    engine = create_engine(DATABASE_URL)
    db = SQLDatabase(engine=engine, ignore_tables=['table_metadata', 'column_metadata'])

    execute_query = QuerySQLDataBaseTool(db=db)
    write_query = create_sql_query_chain(llm, db, sql_prompt)

    # Create the answer chain
    answer_chain = create_answer_chain(llm)

    # Put everything together
    chain = (
        RunnablePassthrough.assign(query=write_query).assign(
            result=itemgetter("query") | execute_query
        )
        | answer_chain
    )

    return chain.invoke({"question": "{}".format(user_question)})

In [None]:
def process_sql_query(user_question, llm=None):
    
    # LLM
    if not llm:
        llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, openai_api_key=OPENAI_API_KEY)

    # Retrieve the metadata info (tables and columns)
    tables, columns = connect_to_database()

    # Chain 1: Find the Best Table
    best_table_chain, context = find_best_table_prompt(user_question, tables, 
                                                       columns, llm=llm)
    best_table_output_str = best_table_chain.run(**context)

    # Convert the string output to a dictionary
    try:
        best_table_output = json.loads(best_table_output_str)
    except json.JSONDecodeError:
        print("Error: The output is not valid JSON.")
        best_table_output = None

    # Chain 2: Find Relevant Columns
    table_name = best_table_output['best_matching_table']['table_name']
    best_columns_chain, context = find_best_columns_prompt(user_question, best_table_output['best_matching_table'], 
                                                        columns, llm=llm)
    best_columns_output = best_columns_chain.run(**context)
    

    response = run_sql_chain(user_question, best_table_output['best_matching_table'], 
                            columns_info, best_columns_output, llm=llm)
    

    return response

    

In [None]:
question = "Whats the price of Maize?"
openai_chat_model = ChatOpenAI()
response = process_sql_query(user_question=question, llm=openai_chat_model)

In [None]:
response

In [None]:
from langchain_openai.chat_models import ChatOpenAI
from langchain.schema import AIMessage, HumanMessage, SystemMessage

chat = ChatOpenAI(temperature=0.5)
messages = [SystemMessage(content='''Act as a senior software engineer
at a startup company.'''),
HumanMessage(content='''Please can you provide a funny joke
about software engineers?''')]
response = chat.invoke(input=messages)
print(response.content)

In [None]:
chat = ChatOpenAI(temperature=0.5)
messages = [SystemMessage(content='''You are a highly-skilled linguist and polyglot.\
              Identify the language of the user query'''),
HumanMessage(content='''Dzina langa ndine Dunstan Matekenya''')]
response = chat.invoke(input=messages)
print(response.content)

In [None]:
for chunk in chat.stream(messages):
    print(chunk.content, end="", flush=True)

In [None]:
# 2x lists of messages, which is the same as [messages, messages]
synchronous_llm_result = chat.batch([messages]*2)
print(synchronous_llm_result)

In [None]:
from langchain_core.runnables.config import RunnableConfig

# Create a RunnableConfig with the desired concurrency limit:
config = RunnableConfig(max_concurrency=5)

# Call the .batch() method with the inputs and config:
results = chat.batch([messages, messages], config=config)

In [None]:
from langchain_openai.chat_models import ChatOpenAI
from langchain_core.prompts import (SystemMessagePromptTemplate,
ChatPromptTemplate)

template = """
You are a creative consultant brainstorming names for businesses.

You must follow the following principles:
{principles}

Please generate a numerical list of five catchy names for a start-up in the
{industry} industry that deals with {context}?

Here is an example output format:
- Name1
- Name2
- Name3
"""


model = ChatOpenAI()
system_prompt = SystemMessagePromptTemplate.from_template(template)
chat_prompt = ChatPromptTemplate.from_messages([system_prompt])

chain = chat_prompt | model

result = chain.invoke({
    "industry": "medical",
    "context":'''creating AI solutions by automatically summarizing patient
    records''',
    "principles":'''1. Each name should be short and easy to
    remember. 2. Each name should be easy to pronounce.
    3. Each name should be unique and not already taken by another company.'''
})

print(result.content)

In [None]:
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts.chat import SystemMessagePromptTemplate
from langchain_openai import ChatOpenAI

# Create the template text
template = '''You are a helpful assistant that translates {input_language}
to {output_language}.'''

# Create the PromptTemplate Object 
prompt = PromptTemplate(
    template = template,
    input_variables = ["input_language", "output_language"]
)

# Convert base prompt into Chat System prompt 
system_message_prompt = SystemMessagePromptTemplate(prompt=prompt)

# Create chat model
chat = ChatOpenAI()

# Create message from user
text_to_translate = HumanMessage(content="Chimanga chikupezeka kuti?")

# format the system message
formatted_sys_message = system_message_prompt.format(
    input_language="Chichewa", output_language="English")

response = chat.invoke([formatted_sys_message, text_to_translate] )


In [None]:
SystemMessagePromptTemplate.format_messages?

In [None]:
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts.chat import SystemMessagePromptTemplate, HumanMessage
from langchain_openai import ChatOpenAI

# Define the system message template
template = '''You are a helpful assistant that translates {input_language} to {output_language}.'''

prompt = PromptTemplate(
    template=template,
    input_variables=["input_language", "output_language"]
)

# Create the system message prompt
system_message_prompt = SystemMessagePromptTemplate(prompt=prompt)

# Format the system message
formatted_system_message = system_message_prompt.format(input_language="Chichewa", output_language="English")

# Create a human message
text_to_translate = HumanMessage(content="Chimanga chikupezeka kuti?")

# Initialize the chat model
chat = ChatOpenAI()

# Call the invoke method with both the human message and the formatted system message
response = chat.invoke([formatted_system_message, text_to_translate])

print(response)


## Output Parsers 
Enables you to take outputs from an LLM and convert it into the format you need.

In [None]:
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate
from langchain_openai import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser
from pydantic.v1 import BaseModel, Field
from typing import List


temperature = 0.0

class BusinessName(BaseModel):
    name: str = Field(description="The name of the business")
    rating_score: float = Field(description='''The rating score of the
    business. 0 is the worst, 10 is the best.''')

class BusinessNames(BaseModel):
    names: List[BusinessName] = Field(description='''A list
    of busines names''')


# Set up a parser + inject instructions into the prompt template:
parser = PydanticOutputParser(pydantic_object=BusinessNames)

principles = """
- The name must be easy to remember.
- Use the {industry} industry and Company context to create an effective name.
- The name must be easy to pronounce.
- You must only return the name without any other text or characters.
- Avoid returning full stops, \n, or any other characters.
- The maximum length of the name must be 10 characters.
"""

# Chat Model Output Parser:
model = ChatOpenAI()
template = """Generate five business names for a new start-up company in the
{industry} industry.
You must follow the following principles: {principles}
{format_instructions}
"""


system_message_prompt = SystemMessagePromptTemplate.from_template(template)
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt])

In [None]:
AIMessage(content='''Vous êtes un assistant utile qui traduit l'anglais en
français.''', additional_kwargs={}, example=False)

In [None]:
prompt = """Answer the question based on the context below. If the
question cannot be answered using the information provided answer
with "I don't know".

Context: Large Language Models (LLMs) are the latest models used in NLP.
Their superior performance over smaller models has made them incredibly
useful for developers building NLP enabled applications. These models
can be accessed via Hugging Face's `transformers` library, via OpenAI
using the `openai` library, and via Cohere using the `cohere` library.

Question: Which libraries and model providers offer LLMs?

Answer: """


In [None]:
from langchain_openai import OpenAI

# initialize the models
openai = OpenAI(openai_api_key=OPENAI_API_KEY)


In [None]:
print(openai(prompt))


In [None]:
from langchain_core.prompts import PromptTemplate

template = """Answer the question based on the context below. If the
question cannot be answered using the information provided answer
with "I don't know".

Context: Large Language Models (LLMs) are the latest models used in NLP.
Their superior performance over smaller models has made them incredibly
useful for developers building NLP enabled applications. These models
can be accessed via Hugging Face's `transformers` library, via OpenAI
using the `openai` library, and via Cohere using the `cohere` library.

Question: {query}

Answer: """

prompt_template = PromptTemplate(
    input_variables=["query"],
    template=template
)

In [None]:
openai(formatted_prompt)

In [None]:
formatted_prompt = prompt_template.format(query="Which libraries and model providers offer LLMs?")

In [None]:
type(prompt_template)

In [None]:
prompt = """The following is a conversation with an AI assistant.
The assistant is typically sarcastic and witty, producing creative 
and funny responses to the users questions. Here are some examples: 

User: What is the meaning of life?
AI: """

openai.temperature = 1.0  # increase creativity/randomness of output

print(openai(prompt))


In [None]:
prompt = """The following are exerpts from conversations with an AI
assistant. The assistant is typically sarcastic and witty, producing
creative  and funny responses to the users questions. Here are some
examples: 

User: How are you?
AI: I can't complain but sometimes I still do.

User: What time is it?
AI: It's time to get a watch.

User: What is the meaning of life?
AI: """

print(openai(prompt))


In [None]:
from langchain import FewShotPromptTemplate

# create our examples
examples = [
    {
        "query": "How are you?",
        "answer": "I can't complain but sometimes I still do."
    }, {
        "query": "What time is it?",
        "answer": "It's time to get a watch."
    }
]

# create a example template
example_template = """
User: {query}
AI: {answer}
"""

# create a prompt example from above template
example_prompt = PromptTemplate(
    input_variables=["query", "answer"],
    template=example_template
)

# now break our previous prompt into a prefix and suffix
# the prefix is our instructions
prefix = """The following are exerpts from conversations with an AI
assistant. The assistant is typically sarcastic and witty, producing
creative  and funny responses to the users questions. Here are some
examples: 
"""
# and the suffix our user input and output indicator
suffix = """
User: {query}
AI: """

# now create the few shot prompt template
few_shot_prompt_template = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["query"],
    example_separator="\n\n"
)


In [None]:
query = "What is the meaning of life?"

print(few_shot_prompt_template.format(query=query))


In [None]:
openai(few_shot_prompt_template.format(query=query))

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain_core.tools import Tool
from langchain_core.tools import langdetect

In [None]:
!pip install pandas openai

In [None]:
import pandas as pd
from openai import OpenAI
import os

# Define two variants of the prompt to test zero-shot
# vs few-shot
prompt_A = """Product description: A pair of shoes that can
fit any foot size.
Seed words: adaptable, fit, omni-fit.
Product names:"""

prompt_B = """Product description: A home milkshake maker.
Seed words: fast, healthy, compact.
Product names: HomeShaker, Fit Shaker, QuickShake, Shake
Maker

Product description: A watch that can tell accurate time in
space.
Seed words: astronaut, space-hardened, eliptical orbit
Product names: AstroTime, SpaceGuard, Orbit-Accurate,
EliptoTime.

Product description: A pair of shoes that can fit any foot
size.
Seed words: adaptable, fit, omni-fit.
Product names:"""

test_prompts = [prompt_A, prompt_B]


# Set your OpenAI key as an environment variable
# https://platform.openai.com/api-keys
client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],  # Default
)

def get_response(prompt):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": "You are a helpful assistant."
            },
            {
                "role": "user",
                "content": prompt
            }
        ]
    )
    return response.choices[0].message.content

# Iterate through the prompts and get responses
responses = []
num_tests = 5

for idx, prompt in enumerate(test_prompts):
    # prompt number as a letter
    var_name = chr(ord('A') + idx)

    for i in range(num_tests):
        # Get a response from the model
        response = get_response(prompt)

        data = {
            "variant": var_name,
            "prompt": prompt,
            "response": response
            }
        responses.append(data)

# Convert responses into a dataframe
df = pd.DataFrame(responses)

# Save the dataframe as a CSV file
df.to_csv("responses.csv", index=False)

print(df)

In [None]:
df

In [None]:
!pip install ipywidgets

In [None]:
import ipywidgets as widgets
from IPython.display import display
import pandas as pd

# load the responses.csv file
df = pd.read_csv("responses.csv")

# Shuffle the dataframe
df = df.sample(frac=1).reset_index(drop=True)

# df is your dataframe and 'response' is the column with the
# text you want to test
response_index = 0
# add a new column to store feedback
df['feedback'] = pd.Series(dtype='str')


def update_response():
    new_response = df.iloc[response_index]['response']
    if pd.notna(new_response):
        new_response = "<p>" + new_response + "</p>"
    else:
        new_response = "<p>No response</p>"
    response.value = new_response
    count_label.value = f"Response: {response_index + 1}"
    count_label.value += f"/{len(df)}"


In [None]:
def on_button_clicked(b):
    global response_index
    #  convert thumbs up / down to 1 / 0
    user_feedback = 1 if b.description == "\U0001F44D" else 0

    # update the feedback column
    df.at[response_index, 'feedback'] = user_feedback

    response_index += 1
    if response_index < len(df):
        update_response()
    else:
        # save the feedback to a CSV file
        df.to_csv("results.csv", index=False)

        print("A/B testing completed. Here's the results:")
        # Calculate score and num rows for each variant
        summary_df = df.groupby('variant').agg(
            count=('feedback', 'count'),
            score=('feedback', 'mean')).reset_index()
        print(summary_df)


In [None]:
response = widgets.HTML()
count_label = widgets.Label()

update_response()

thumbs_up_button = widgets.Button(description='\U0001F44D')
thumbs_up_button.on_click(on_button_clicked)

thumbs_down_button = widgets.Button(
    description='\U0001F44E')
thumbs_down_button.on_click(on_button_clicked)

button_box = widgets.HBox([thumbs_down_button,
thumbs_up_button])

display(response, button_box, count_label)

In [None]:
import ipywidgets as widgets
from IPython.display import display
import pandas as pd

# load the responses.csv file
df = pd.read_csv("responses.csv")

# Shuffle the dataframe
df = df.sample(frac=1).reset_index(drop=True)

# df is your dataframe and 'response' is the column with the
# text you want to test
response_index = 0
# add a new column to store feedback
df['feedback'] = pd.Series(dtype='str')

def on_button_clicked(b):
    global response_index
    #  convert thumbs up / down to 1 / 0
    user_feedback = 1 if b.description == "\U0001F44D" else 0

    # update the feedback column
    df.at[response_index, 'feedback'] = user_feedback

    response_index += 1
    if response_index < len(df):
        update_response()
    else:
        # save the feedback to a CSV file
        df.to_csv("results.csv", index=False)

        print("A/B testing completed. Here's the results:")
        # Calculate score and num rows for each variant
        summary_df = df.groupby('variant').agg(
            count=('feedback', 'count'),
            score=('feedback', 'mean')).reset_index()
        print(summary_df)

def update_response():
    new_response = df.iloc[response_index]['response']
    if pd.notna(new_response):
        new_response = "<p>" + new_response + "</p>"
    else:
        new_response = "<p>No response</p>"
    response.value = new_response
    count_label.value = f"Response: {response_index + 1}"
    count_label.value += f"/{len(df)}"

response = widgets.HTML()
count_label = widgets.Label()

update_response()

thumbs_up_button = widgets.Button(description='\U0001F44D')
thumbs_up_button.on_click(on_button_clicked)

thumbs_down_button = widgets.Button(
    description='\U0001F44E')
thumbs_down_button.on_click(on_button_clicked)

button_box = widgets.HBox([thumbs_down_button,
thumbs_up_button])

In [None]:
display(response, button_box, count_label)

In [None]:
from langchain.tools import LangDetectTool

In [None]:
!pip install langdetect

In [None]:
def detect_language(text):
     # Initialize the OpenAI API
    llm = ChatOpenAI(api_key=OPENAI_API_KEY, model='gpt-3.5-turbo')

    # Create the language detection tool
    lang_detect_tool = LangDetectTool()

    # Create a Tool object
    lang_detect = Tool(
        name="Language Detection",
        func=lang_detect_tool.run,
        description="Useful for detecting the language of a given text."
    )

    # Use the tool to detect language
    return lang_detect.run(text)
                             

In [None]:
from langchain.utils.math import cosine_similarity
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import OpenAIEmbeddings

physics_template = """You are a very smart physics professor. \
You are great at answering questions about physics in a concise and easy to understand manner. \
When you don't know the answer to a question you admit that you don't know.

Here is a question:
{query}"""

math_template = """You are a very good mathematician. You are great at answering math questions. \
You are so good because you are able to break down hard problems into their component parts, \
answer the component parts, and then put them together to answer the broader question.

Here is a question:
{query}"""

embeddings = OpenAIEmbeddings()
prompt_templates = [physics_template, math_template]
prompt_embeddings = embeddings.embed_documents(prompt_templates)


def prompt_router(query):
    query_embedding = embeddings.embed_query(query)
    similarity = cosine_similarity([query_embedding], prompt_embeddings)[0]
    most_similar = prompt_templates[similarity.argmax()]
    print("Similarity results=>", most_similar)
    print("Using MATH" if most_similar == math_template else "Using PHYSICS")
    return PromptTemplate.from_template(most_similar)


In [None]:
prompt_router("Explain Pythagoras theorem")

In [None]:
def create_db_object_with_metadata():
    # Create the SQLAlchemy engine
    engine = create_engine(DATABASE_URL)
    metadata_obj = MetaData()
    metadata_obj.reflect(bind=engine)

    # Create a configured "Session" class
    Session = sessionmaker(bind=engine)
    session = Session()

    # Load custom metadata from the table_metadata and column_metadata tables
    try:
        table_metadata = session.execute("SELECT * FROM table_metadata").fetchall()
        column_metadata = session.execute("SELECT * FROM column_metadata").fetchall()

        # Add table metadata
        for row in table_metadata:
            print(row)
            table_name = row['table_name']
            description = row['description']
            table = metadata_obj.tables.get(table_name)
            table.info['description'] = description

        # Add column metadata
        for row in column_metadata:
            table_name = row['table_name']
            column_name = row['column_name']
            description = row['description']
            table = metadata_obj.tables.get(table_name)
            column = table.columns.get(column_name)
            column.info['description'] = description
    finally:
        session.close()
    db = SQLDatabase(engine=engine, metadata=metadata_obj, ignore_tables=['table_metadata', 'column_metadata'])

    return db

In [None]:
commodities_price = ['Maize', 'Rice', 'Soya beans', 'Beans', 'Cow peas', 'Groundnuts']
crop_estimates = ['Maize', 'Beans', 'Cow peas', 'Dolichus beans ', 'Soy beans',
       'Ground beans', 'Paprika', 'Rice', 'Pigeon peas', 'Grams',
       'Sesame ', 'Field peas', 'Velvet beans', 'Chick peas', 'Wheat',
       'Millet', 'Sorghum ', 'Groundnuts', 'Cassava', 'Sweet potatoes',
       'Potatoes', 'Tobacco', 'Flue cured', 'Sunflower ', 'Chillies',
       'Cotton ', 'Bananas', 'Mangoes', 'Oranges', 'Tangerines', 'Coffee',
       'Pineapples', 'Guava', 'Pawpaws', 'Peaches', 'Lemons',
       'Grape fruits', 'Apples', 'Avocado pear', 'Macademia', 'Tomatoes',
       'Onions', 'Cabbage', 'Egg plants', 'Okra', 'Cucumber']
price_estimates_key_words = ["price", "cheap", "produce", 
                                 "buy", "sell", "sale", "find"]
all_kw = [i.lower() for i in set(commodities_price+crop_estimates+price_estimates_key_words)]
print(all_kw)

In [None]:
import os
import json
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.chains import LLMChain

def load_examples(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return json.load(file)



def format_translation_examples(examples_file, source_language, target_language):
    examples = load_examples(examples_file)
    key = f"{source_language}-{target_language}"
    if key in examples:
        return "\n".join([f"{source_language}: {ex[source_language]}\n{target_language}: {ex[target_language]}" 
                          for ex in examples[key]])
    return ""


def translate_with_openai(text, src_lan, dest_lan):
    # Create a ChatOpenAI instance
    chat_model = ChatOpenAI(temperature=0.7, openai_api_key=OPENAI_API_KEY)
    
    # Get and format translation examples
    formated_examples = format_translation_examples("./translation_examples.json", source_language=src_lan, 
                                target_language=dest_lan)
    # Create a system message with examples
    system_template = """You are a professional translator. Your task is to translate {src_lan} to {dest_lan}.
        Here are a few examples:

        {examples}

        Now, translate the following text:"""

    system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)

    # Create a human message for the actual translation request
    human_template = "{text}"
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

    # Combine the prompts
    chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

    # Create an LLMChain for translation
    translation_chain = LLMChain(llm=chat_model, prompt=chat_prompt)
    
    return translation_chain.run({
        "source_language": src_lan,
        "target_language": dest_lan,
        "examples": formated_examples,
        "text": text
    })



In [None]:
chat_model = ChatOpenAI(temperature=0.7, openai_api_key=OPENAI_API_KEY)
# Create a system message with examples
system_template = """You are a professional translator. Your task is to translate {source_language} to {target_language}.
Here are a few examples:

{examples}

Now, translate the following text:"""

system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)

# Create a human message for the actual translation request
human_template = "{text}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

# Combine the prompts
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

# Create an LLMChain for translation
translation_chain = LLMChain(llm=chat_model, prompt=chat_prompt)

# Function to translate text
def translate_text(text, source_language, target_language):
    formated_examples = format_translation_examples("./translation_examples.json", source_language, 
                                target_language)
    return translation_chain.run({
        "source_language": source_language,
        "target_language": target_language,
        "examples": formated_examples,
        "text": text
    })

In [None]:
translate_text("Mpunga ukugulitsidwa pa mtengo wanji?", "Chichewa", "English")

In [None]:
format_translation_examples(examples_file="./translation_examples.json", 
                            source_language="Chichewa", target_language="English")

In [None]:
format_examples(examples, source_language="Chichewa", target_language="English")

In [None]:
relevant_examples = examples["Chichewa-English"]

for item in relevant_examples:
    print(item)
    "\n".join([f"{source_language}: {ex['source']}\n{target_language}: {ex['target']}" 
                          for ex in examples[key]])

In [None]:


# Create a ChatOpenAI instance
chat_model = ChatOpenAI(temperature=0.7, openai_api_key=OPENAI_API_KEY)

# Create a system message with examples
system_template = """You are a professional translator. Your task is to translate {source_language} to {target_language}.
Here are a few examples:

{source_language}: Mtedza ukugulitsidwa pabwanji?
{target_language}: Whats the price of groundnuts?

{source_language}: Chimanga chikupezeka kuti?
{target_language}: Where can I find maize?

{source_language}: Ndikuti nyemba zikutchipa?
{target_language}: Where can I find beans at cheap price?

{source_language}: Chimanga chili pabwanji pano?
{target_language}: Whats the price of maize now?

{source_language}: Ku Dowa chimanga chili pa bwanji?
{target_language}: Whats the price of maize in Dowa?

{source_language}: Kodi ndi boma liti anakolola chimanga chambiri pakati pa Lilongwe kapena Kasungu?
{target_language}: Which district produced more maize: Lilongwe or Kasungu?

{source_language}: Kodi chimanga chili pa bwanji ku Rumphi?
{target_language}: How much is maize per Kg in Rumphi?

{source_language}: Mpunga ukugulitsidwa ndalama zingati ku Lilongwe?
{target_language}: Whats the price of rice in Lilongwe?

{source_language}: Mtedza otchipa ukupezeka mboma liti?
{target_language}: Which district has the cheap price for groundnuts?

{source_language}: Chimanga chambiri chikupezeka kuti?
{target_language}: Where can I find maize?

{source_language}: Ndi boma liti komwe anakolola chimanga chambiri?
{target_language}: Which district harvested large quantities of maize?

{source_language}: Ndi mbeu zanji anakolola bwino ku Rumphi?
{target_language}: Which crops produced the most yields in Rumphi

{source_language}: Soya ali pabwanji?
{target_language}: Whats the price of soya?

{source_language}: Mtedza otchipa ndingaupeze kuti?
{target_language}: Where can I find groundnuts at reasonable price?


Now, translate the following text:"""

system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)

# Create a human message for the actual translation request
human_template = "{text}"
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

# Combine the prompts
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

# Create an LLMChain for translation
translation_chain = LLMChain(llm=chat_model, prompt=chat_prompt)

# Function to translate text
def translate_text(text, source_language, target_language):
    return translation_chain.run({
        "source_language": source_language,
        "target_language": target_language,
        "text": text
    })

# Example usage
source_text = "Mtedza ndingaupeze kuti?"
source_language = "Chichewa"
target_language = "English"

translated_text = translate_text(source_text, source_language, target_language)

print(f"{source_language}: {source_text}")
print(f"{target_language}: {translated_text}")

In [None]:
# Create an OpenAI instance
llm = OpenAI(temperature=0.7, openai_api_key=OPENAI_API_KEY, model="gpt-4")

# Create a prompt template for translation
translation_template = PromptTemplate(
    input_variables=["source_language", "target_language", "text"],
    template="Translate the following {source_language} text to {target_language}: {text}"
)

# Create an LLMChain for translation
translation_chain = LLMChain(llm=llm, prompt=translation_template)

# Function to translate text
def translate_text(text, source_language, target_language):
    return translation_chain.run({
        "source_language": source_language,
        "target_language": target_language,
        "text": text
    })

In [None]:

source_text = "Chimanga chili pa bwanji ku Malawi?"
source_language = "Chichewa"
target_language = "English"

translated_text = translate_text(source_text, source_language, target_language)
print(translated_text)

In [None]:
translated_text

In [None]:
from langchain.llms import OpenAI

In [None]:
def translate_text(text, source_language="English", target_language="Chichewa"):

    llm = ChatOpenAI(api_key=OPENAI_API_KEY, model='gpt-3.5-turbo')
    # Create a template for the translation
    translation_template = ChatPromptTemplate.from_template(
    "Translate the following {source_language} text to {target_language}: {text}"
)

    # Create a chain with the LLM and the translation template
    translation_chain = LLMChain(llm=llm, prompt=translation_template)

    translation = translation_chain.run({
        'text': text,
        'source_language': source_language,
        'target_language': target_language
    })
    return translation


In [None]:
translated_text = translate_text(text="cheap")

In [None]:
!pip install googletrans==4.0.0-rc1

In [None]:
import openai
def translate_text(text, source_language="English", target_language="Chichewa"):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": f"You are a helpful assistant that translates {source_language} to {target_language}."
            },
            {
                "role": "user",
                "content": f"Translate the following text from {source_language} to {target_language}:\n\n{text}"
            }
        ]
    )
    translation = response['choices'][0]['message']['content']
    return translation.strip()

In [None]:
translate_text(text="cheap", source_language="English", target_language="Chichewa")

In [None]:
from googletrans import Translator
def translate_text(text, source_language="en", target_language="ny"):
    translator = Translator()
    translation = translator.translate(text, src=source_language, dest=target_language)
    return translation.text

In [None]:
db = create_db_object_with_metadata()
# llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
# chain = create_sql_query_chain(llm, db)
# response = chain.invoke({"question": "{}".format(SAMPLE_QUESTIONS["low-birthweight"])})
# response

In [None]:
import sqlalchemy

In [None]:
sqlalchemy.__version__

In [None]:

examples = [
    {"input": "Which region has the highest number of children born with low birth weights?", 
     "query": "SELECT * FROM tab4711 ORDER BY number_children DESC LIMIT 1;",},

     {"input": "Which region has the highest percentage of children born with low birth weights?", 
     "query": "SELECT * FROM tab4711 ORDER BY percentage_below_2500g DESC LIMIT 1;",
     },

     {"input": "How many children received all vaccines before 12 months?", 
     "query": "SELECT vacc_b4_12months FROM tab501 WHERE vacc_category = 'All vaccinations';"},

     {"input": "Which region has the lowest rates in preschool for children?", 
     "query": "SELECT * FROM tab9011 ORDER BY percentage_children_sch ASC LIMIT 1;",},

     {"input": "Whats the average literacy rate among young women in Tunisia?",
      "query": "SELECT AVG(percentage_literate) AS avg_literacy_rate FROM tab971;",},
]

In [None]:
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool

execute_query = QuerySQLDataBaseTool(db=db)
write_query = create_sql_query_chain(llm, db)

In [None]:
from operator import itemgetter

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough

answer_prompt = PromptTemplate.from_template(
    """Given the following user question, corresponding SQL query, and SQL result, answer the user question.

Question: {question}
SQL Query: {query}
SQL Result: {result}
Answer: """
)

answer = answer_prompt | llm | StrOutputParser()
chain = (
    RunnablePassthrough.assign(query=write_query).assign(
        result=itemgetter("query") | execute_query
    )
    | answer
)

chain.invoke({"question": "{}".format(SAMPLE_QUESTIONS['vaccine_rates_all'])})

In [None]:
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate

example_prompt = PromptTemplate.from_template("User input: {input}\nSQL query: {query}")
sql_prompt = FewShotPromptTemplate(
    examples=examples[:5],
    example_prompt=example_prompt,
    prefix="You are a PostgreSQL expert. Given an input question, create a syntactically correct PostgreSQL query to run. Unless otherwise specificed, do not return more than {top_k} rows.\n\nHere is the relevant table info: {table_info}\n\nBelow are a number of examples of questions and their corresponding SQL queries.",
    suffix="User input: {input}\nSQL query: ",
    input_variables=["input", "top_k", "table_info"],
)

In [None]:
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool

execute_query = QuerySQLDataBaseTool(db=db, verbose=True)
write_query = create_sql_query_chain(llm, db, sql_prompt)

In [None]:
answer_prompt = PromptTemplate.from_template(
    """Given the following user question, corresponding SQL query, and SQL result, answer the user question.

Question: {question}
SQL Query: {query}
SQL Result: {result}
Answer: """
)

answer = answer_prompt | llm | StrOutputParser()
chain = (
    RunnablePassthrough.assign(query=write_query).assign(
        result=itemgetter("query") | execute_query
    )
    | answer
)

chain.invoke({"question": "{}".format(SAMPLE_QUESTIONS["vaccines"])})