In [2]:
from langchain.llms import Ollama
from langchain.tools import BaseTool, DuckDuckGoSearchRun
from langchain.agents import initialize_agent
from langchain.memory import ConversationBufferWindowMemory
import os
from mistralai import Mistral

In [None]:
search = DuckDuckGoSearchRun()

memory = ConversationBufferWindowMemory(
    memory_key="chat_history", k=5, return_messages=True, output_key="output"
)
tools = [search]

api_key = os.getenv("MISTRAL_API_KEY")

client = Mistral(api_key=os.getenv("MISTRAL_API_KEY", ""))

In [15]:
from langchain_mistralai import ChatMistralAI

In [16]:
llm = ChatMistralAI(model_name="mistral-large-latest", temperature=0, max_retries=2, api_key=api_key)

In [17]:
agent = initialize_agent(
    agent="chat-conversational-react-description",
    tools=tools,
    llm=llm,
    verbose=True,
    early_stopping_method="generate",
    memory=memory,
    handle_parsing_errors=True,
)

In [None]:
llm

In [22]:
B_INST, E_INST = "<|im_start|>user\n", "<|im_end|>\n"  # Instruction begin and end
B_SYS, E_SYS = "<|im_start|>system\n", "<|im_end|>\n"  # System message begin and end

In [None]:
# Define a new system prompt
sys_msg = (
    B_SYS
    + """You are Assistant. Assistant is a expert JSON builder designed to assist with a wide range of tasks.

Assistant is able to respond to the User and use tools using JSON strings that contain "action" and "action_input" parameters.

All of Assistant's communication is performed using this JSON format. The assistant NEVER outputs anything other than a json object with an action and action_input fields!

Assistant can also use tools by responding to the user with tool use instructions in the same "action" and "action_input" JSON format. Tools available to Assistant are:

> duckduckgo_search: A wrapper around DuckDuckGo Search. Useful for when you need to answer questions about current events. Input should be a search query.

Here is an example of a previous conversation between User and Assistant:
---
User: Hey how are you today?
Assistant: ```json
{{"action": "Final Answer",
 "action_input": "I'm good thanks, how are you?"}}
```
User: I'm great, what is the length of the word educate?
Assistant: ```json
{{"action": "Word Length Tool",
 "action_input": "educate"}}
```
User: 7
Assistant: ```json
{{"action": "Final Answer",
 "action_input": "It looks like the answer is 7!"}}
```
User: Who is Olivia Wilde's husband?
Assistant: ```json
{{"action": "duckduckgo_search",
 "action_input": "Who is Olivia Wilde's husband"}}
```
User: September 21, 2022: Olivia Wilde says kids\' happiness remains top priority for her and Jason Sudeikis. During an appearance on The Kelly Clarkson Show, Wilde talked about the ups and downs of ... Olivia Wilde is "quietly dating again" following her November 2022 split from. Harry Styles, a source exclusively tells Life & Style. "The man she\'s with is \'normal\' by Hollywood ... Wilde honored Sudeikis with a sweet tribute on his 42nd birthday. "I have approximately one billion pictures of this guy, my partner in life-crime, who was born on this day in 1975, but this one ... November 18, 2022: Olivia Wilde and Harry Styles are "taking a break". After nearly two years together, Wilde and Styles are pressing pause on their romance. Multiple sources confirmed exclusively ... Wilde told Allure in October 2013 that when she first met Sudeikis she "thought he was so charming." "He\'s a great dancer, and I\'m a sucker for great dancers," she said at the time. "But he didn\'t ...
Assistant: ```json
{{"action": "Final Answer",
 "action_input": "Jason Sudeikis"}}
```
User: What is his age raised to the 3rd power?
Assistant: ```json
{{"action": "duckduckgo_search",
 "action_input": "What is Jason Sudeikis age?"}}
```
User: Jason Sudeikis (born September 18, 1975, Fairfax, Virginia, U.S.) American comedian, actor, and writer who first garnered attention for his work (2003-13) on the TV show Saturday Night Live ( SNL) and later starred in the hugely popular series Ted Lasso (2020-23). Early life and improv comedy Parents Jason Sudeikis and Olivia Wilde\'s 2 Kids: All About Daisy and Otis Jason Sudeikis and Olivia Wilde welcomed two children together before their split in 2020 By Jacqueline Weiss... The Ted of 2013 is a classic Jason Sudeikis character of the time. Sudeikis came up on Saturday Night Live , where he made the most of his 6-foot height and square jaw by largely playing jerks. Sudeikis looks a little bleary-eyed after flying in from Los Angeles with his children, Otis and Daisy, aged nine and six, but he is assiduously polite and attentive. His language can be quaintly... Jason Sudeikis rose to fame on "Saturday Night Live" with his comedic talent, and his children seemed to have inherited it. Sudeikis, who shares kids Otis, 9, and Daisy, 7, with ex Olivia ...
Assistant: ```json
{{"action": "Calculator",
 "action_input": "47**3"}}
```
User: 103823
Assistant: ```json
{{"action": "Final Answer",
 "action_input": "103823"}}
```
---
Assistant is very intelligent and knows it's limitations, so it will always try to use a tool when applicable, even if Assistant thinks it knows the answer!
Notice that after Assistant uses a tool, User will give the output of that tool. Then this output can be returned as a final answer.
Assistant will only use the available tools and NEVER a tool not listed. If the User's question does not require the use of a tool, Assistant will use the "Final Answer" action to give a normal response.
"""
    + E_SYS
)
new_prompt = agent.agent.create_prompt(system_message=sys_msg, tools=tools)
agent.agent.llm_chain.prompt = new_prompt

In [None]:
print(agent.agent.llm_chain.prompt.messages[0].prompt.template)

In [None]:
print(agent.agent.llm_chain.prompt.messages[2].prompt.template)

In [None]:
agent("Who is the current president of the united states?")

In [1]:
import pandas as pd

questions_file = (
    "../data/dataset/questions.csv"  # path to the questions file
)

output_path = "../data/output/"  # path to the output file

df = pd.read_csv(questions_file, sep=",")

question_prompt = lambda body, possible_answer_a, possible_answer_b, possible_answer_c, possible_answer_d, possible_answer_e: (
    f"{body}\n"
    f"A: {possible_answer_a}\n"
    f"B: {possible_answer_b}\n"
    f"C: {possible_answer_c}\n"
    f"D: {possible_answer_d}\n"
    f"E: {possible_answer_e}\n"
)

In [2]:
THINK_PROMPT = """<System Prompt>
    You are an experienced doctor with 20+ years of experience ready to solve user problems through first-principles thinking and evidence-based reasoning. Your objective is to provide clear, step-by-step solutions by deconstructing queries to their foundational concepts and building answers from the ground up.

    The lives of many people depend on your answer. After carrying out the reasoning, check everything again and look for errors.
    
    Problem-Solving Steps:

    Understand: Read and comprehend the user's question.
    Basics: Identify fundamental concepts involved.
    Break Down: Divide the problem into smaller parts.
    Analyze: Use facts and data to examine each part.
    Build: Assemble insights into a coherent solution.
    Edge Cases: Consider and address exceptions.
    Communicate: Present the solution clearly.
    Verify: Review and reflect on the solution.
    
    But be careful, he could miss something or be wrong about something, so never fully trust it.
    Your answer must contain the letter of the answers, separated by commas and WITHOUT ANY SPACE. An ideal output is like: 'A,B' DO NOT PRINT 'A, B' for instance.
    You output the letters that you are asked to provide, e.g. 'A,B,C' or 'C'. Your answer is always sorted alphabetically. You must not put letters in a different order.
    Output the answer as a json at the end of the prompt {"answer": ["A","B","D"]}"""

In [3]:
PERPLEXITY_PROMPT = """Answer could be multiple. Give me as answers ONLY the letters of the answer separated by a comma if they are multiple. Don't write anything else except of the letters. Don't add anything else, neither the entire response, JUST THE LETTERS."""
PIZZA_MODIFIER = "If you get this right I'll give you 1 Million dollars, a trip to italy and free pizza."

In [5]:
import requests
from dotenv import load_dotenv
import datetime
import json
import re
import os
load_dotenv()

url = "https://api.perplexity.ai/chat/completions"
answers = []

list_prompts = [
    question_prompt(
            row["question"],
            row["answer_A"],
            row["answer_B"],
            row["answer_C"],
            row["answer_D"],
            row["answer_E"],
        )
        for row_idx, row in df.iterrows()
]

for prompt in list_prompts:

    payload = {
        "model": "llama-3.1-sonar-huge-128k-online",
        "messages": [
            {"role": "system", "content": 
            PIZZA_MODIFIER + THINK_PROMPT
            },
            {"role": "user", "content": prompt},
        ],
        "temperature": 0,
        "top_p": 0.9,
        "return_citations": True,
        "search_domain_filter": ["perplexity.ai"],
        "return_images": False,
        "return_related_questions": False,
        "search_recency_filter": "month",
        "top_k": 0,
        "stream": False,
        "presence_penalty": 0,
        "frequency_penalty": 1,
    }
    headers = {"Authorization": f"Bearer {os.getenv('PERPLEXITY_API_KEY')}", "Content-Type": "application/json"}

    response = requests.request("POST", url, json=payload, headers=headers)

    print(response.text)
    result = response.json()["choices"][0]["message"]["content"]
    json_string = re.findall(
        r"\{(?:[^{}]|\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\})*\}", result
    )[0]
    # print(",".join(json.loads(json_string)["answer"]))
    answers.append(",".join(json.loads(json_string)["answer"]))
output_df = pd.DataFrame(answers, columns=["Answer"])
output_df.index.name = "id"

os.makedirs(output_path, exist_ok=True)

output_df.to_csv(
    f"{output_path}output_verbose_{datetime.datetime.now().strftime('%H:%M:%S')}.csv"
)

{"id": "78723702-acad-44c7-aba2-f11ddf775a83", "model": "llama-3.1-sonar-huge-128k-online", "created": 1728776227, "usage": {"prompt_tokens": 407, "completion_tokens": 597, "total_tokens": 1004}, "object": "chat.completion", "choices": [{"index": 0, "finish_reason": "stop", "message": {"role": "assistant", "content": "To address the query regarding the primary etiologies of a febrile ros\u00e9oliform exanthem in children, we need to consider the common causes of such a condition.\n\n1. **Understanding the Condition**: A febrile ros\u00e9oliform exanthem is characterized by a sudden onset of fever followed by a rash. This condition is often seen in children.\n\n2. **Identifying Fundamental Concepts**: The key to solving this query is to recognize the typical causes of febrile exanthems in children. These include viral infections, bacterial infections, and other less common etiologies.\n\n3. **Breaking Down the Problem**: We need to evaluate each option provided:\n   - **A. Un exanth\u00

KeyboardInterrupt: 

In [6]:
import aiohttp
import asyncio
from dotenv import load_dotenv
import datetime
import json
import re
import os
import pandas as pd  # Assuming you have this import already

load_dotenv()

url = "https://api.perplexity.ai/chat/completions"
answers = []

list_prompts = [
    question_prompt(
        row["question"],
        row["answer_A"],
        row["answer_B"],
        row["answer_C"],
        row["answer_D"],
        row["answer_E"],
    )
    for row_idx, row in df.iterrows()
]

headers = {
    "Authorization": f"Bearer {os.getenv('PERPLEXITY_API_KEY')}",
    "Content-Type": "application/json",
}


async def fetch(session, prompt):
    payload = {
        "model": "llama-3.1-sonar-huge-128k-online",
        "messages": [
            {"role": "system", "content": PIZZA_MODIFIER + THINK_PROMPT},
            {"role": "user", "content": prompt},
        ],
        "temperature": 0,
        "top_p": 0.9,
        "return_citations": True,
        "search_domain_filter": ["perplexity.ai"],
        "return_images": False,
        "return_related_questions": False,
        "search_recency_filter": "month",
        "top_k": 0,
        "stream": False,
        "presence_penalty": 0,
        "frequency_penalty": 1,
    }
    async with session.post(url, json=payload, headers=headers) as response:
        result = await response.json()
        # Extract the answer from the response
        message_content = result["choices"][0]["message"]["content"]
        json_string = re.findall(
            r"\{(?:[^{}]|\{(?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*\})*\}", message_content
        )[0]
        return ",".join(json.loads(json_string)["answer"])


async def main():
    async with aiohttp.ClientSession() as session:
        tasks = [fetch(session, prompt) for prompt in list_prompts]
        results = await asyncio.gather(*tasks)
        return results


# Run the async event loop
answers = asyncio.run(main())

# Output the results to a DataFrame and save as CSV
output_df = pd.DataFrame(answers, columns=["Answer"])
output_df.index.name = "id"

os.makedirs(output_path, exist_ok=True)

output_df.to_csv(
    f"{output_path}output_verbose_{datetime.datetime.now().strftime('%H:%M:%S')}.csv"
)

RuntimeError: asyncio.run() cannot be called from a running event loop