In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

import chromadb
from langchain.embeddings import OpenAIEmbeddings
import os

from langchain.agents import tool
import search_functions
from tqdm import tqdm

from langchain.tools.render import format_tool_to_openai_function

from langchain.prompts import MessagesPlaceholder
from langchain_core.messages import AIMessage, HumanMessage
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents import AgentExecutor
from langchain.agents.format_scratchpad import format_to_openai_function_messages
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser

In [2]:
os.environ['OPENAI_API_KEY'] = '.'

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

In [3]:
embeddings = OpenAIEmbeddings()
client = chromadb.Client()

print(client.list_collections())

for i in range(len(client.list_collections())):
    if len(client.list_collections()) == 0:
        break
    client.delete_collection(client.list_collections()[0].name)

print(client.list_collections())

embedded_chunks_collection = client.get_or_create_collection("embedded_chunks")
context_library_collection = client.get_or_create_collection("context_library")

[]
[]


In [29]:
@tool
def search(keywords: str, start_year=1000, end_year=2023) -> int:
    """Search for papers to increase the paper count. Input should be a string of keywords. Use
        this format: [keyword search], [start year]-[end year]. You may include years as the last
        word in the query, e.g. 'machine learning 2020' or 'quantum computing ai 2010-2020'. The
        current year is 2023."""
    
    links = search_functions.get_arxiv_articles(keywords=keywords)
    print(links)
    for link in tqdm(links, desc="Processing articles"):
        try:
            # Get chunks from the arXiv PDF
            chunks = search_functions.request_arxiv_pdf_chunks(link)

            # Embed the chunks
            embedded_chunks = embeddings.embed_documents(chunks[0])

            # Add each embedded chunk to the collection
            for i, embedded_chunk in enumerate(embedded_chunks):
                # Assuming the method expects a dictionary
                embedded_chunks_collection.upsert(
                    ids=f"{link}_{i}",  # Creating a unique ID for each chunk
                    documents=chunks[0][i],
                    embeddings=embedded_chunk
                )
        except Exception as e:
            print(f"Error processing {link}: {e}")


    ###### FIX THIS SO THAT THE ACTUAL AMOUNT IS THE ACTUAL AMOUNT################
    return f"Added {len(links)} new articles to the paper count."

@tool
def gather_evidence(query: str) -> int:
    """Give a specific question to get evidence for it. This will increase evidence and relevant
        paper counts."""
    
    M_results = 5
    summary_length = "750 characters max"
    relevance_score_threshold = 9

    query_vector = embeddings.embed_query(query)
    
    ###### Figure out how to do MMR ######
    ###### MMR ######
    ###### MMRR #$$#$#$#$###
    results = embedded_chunks_collection.query(
        query_embeddings = query_vector,
        n_results = M_results
    )
    
    prompt = ChatPromptTemplate.from_template(
        """Summarize the text below to help answer the given question. Do not directly answer the question,
        instead summarize the Text so that it gives evidence to answer the question. Reply 'Not applicable' if
        text is irrelevant. Use {summary_length}. At the end of your response, provide an integer score from
        1-10 on a newline indicating direct relevance to question. Do not explain your score.
        (
            Directly Relevant Example:
            Relevant Information Summary: "The text..." (Summarizes the Text chunk in direct relation to the question, highlighting the ways it answers the question)
            <SCORE:> 10
            
            Very Useful Example:
            Relevant Information Summary: "The text..." (Summarizes the Text chunk in direct relation to the question, highlighting the ways it answers the question)
            <SCORE:> 8

            Somewhat Relevant Example:
            Relevant Information Summary: "The text..." (Summarizes the Text chunk in direct relation to the question, pointing out similarities to the question)
            <SCORE:> 6

            Not Relevant Example:
            Relevant Information Summary: "Not applicable."
        )
        
        Text: {chunk}
        Excerpt from: {citation}
        Question: {question}
        Relevant Information Summary:"""
    )
        
    chain = prompt | llm

    if len(results['ids']) > 0:
        combined_list = []
        for i in range(len(results['ids'])):
            response = chain.invoke({
                "summary_length": "max 750 characters",
                "chunk": results['documents'][0][i],
                "citation": results['ids'][0][i],
                "question": query
            })

            # Extract the score from the response
            #print(response.content)
            try:
                score_str = response.content.split('<SCORE:>')[1].strip()
                score = int(score_str)
            except:
                score = 0

            # Add a tuple of (score, summary) to the combined list
            combined_list.append((score, response.content, results['ids'][0][i]))

        # Sort the list based on scores in descending order
        combined_list.sort(key=lambda x: x[0], reverse=True)

        for score, response, id in combined_list:
            if score >= relevance_score_threshold:
                context_library_collection.add(
                        ids = id,  # Creating a unique ID for each chunk
                        ##########documents = results['documents'][0][i],
                        documents = response,
                        metadatas = {"score": score}
                ) 
            
        return combined_list[0]
    
    else:
        return "No queries found"

@tool
def answer_question(question: str) -> str:
    """Ask a model to propose an answer using evidence from papers. The input is the question to
    be answered. The tool may fail, indicating that better or different evidence should be
    found."""

    answer_length = "750 words maximum"

    prompt = ChatPromptTemplate.from_template(
        """We are collecting background information for the question/task below.
        Provide a brief summary of information you know (about 50 words) that could help answer
        the question - do not answer it directly and ignore formatting instructions. If there is 
        nothing to contribute, do not answer.
        Question: {question}
        Answer: Here is what I know. """
    )

    ask_llm = prompt | llm

    latent_knowledge = ""
    """ask_llm.invoke({
            "question": question
    })"""

    context = ""
    for i in range(context_library_collection.count()):
        context += context_library_collection.get()['documents'][i]
        context += "Citation: " + context_library_collection.get()['ids'][i][:len(context_library_collection.get()['ids'][i])-2]
    
    
    prompt = ChatPromptTemplate.from_template(
        """Write an answer {answer_length} for the question below based on the provided context. If
        the context provides insufficient information, reply 'I cannot answer'. For each part of
        your answer, indicate which sources most support it via valid citation markers at the end
        of sentences, like (Example 2012). Answer in an unbiased, comprehensive, and scholarly
        tone. If the question is subjective, provide an opinionated answer in the concluding 1-2
        sentences.
        {context}
        Extra background information: {latent_knowledge}
        Question: {question}
        Answer:"""
    )

    ask_llm = prompt | llm

    answer = ask_llm.invoke({
            "answer_length": answer_length,
            "question": question,
            "context": context,
            "latent_knowledge": latent_knowledge,
    })

    return answer

In [30]:
tools = [search, gather_evidence, answer_question]
llm_with_tools = llm.bind(functions=[format_tool_to_openai_function(t) for t in tools])

In [31]:
MEMORY_KEY = "chat_history"
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful AI assistant.",
        ),
        MessagesPlaceholder(variable_name=MEMORY_KEY),
        ("""Answer question: {question}. Search for papers, gather evidence, and answer. If you do not,
            have enough evidence, you can search for more papers (preferred) or gather more evidence
            with a different phrase. You may rephrase or break-up the question in those steps. Once
            you have five or more pieces of evidence from multiple sources, or you have tried many
            times, call answer_question tool. You may reject the answer and try again if it is
            incomplete."""
        ),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

chat_history = []

agent = (
    {
        "question": lambda x: x["question"],
        "agent_scratchpad": lambda x: format_to_openai_function_messages(
            x["intermediate_steps"]
        ),
        "chat_history": lambda x: x["chat_history"],
    }
    | prompt
    | llm_with_tools
    | OpenAIFunctionsAgentOutputParser()
)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [32]:
question = "How can we ensure AI systems make ethical decisions?"
result = agent_executor.invoke({"question": question, "chat_history": chat_history})
chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=result["output"]),
    ]
)
#agent_executor.invoke({"question": "are these real words?", "chat_history": chat_history})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `search` with `{'keywords': 'AI systems ethical decisions'}`


[0m[]


Processing articles: 0it [00:00, ?it/s]

[36;1m[1;3mAdded 0 new articles to the paper count.[0m




[32;1m[1;3m
Invoking: `gather_evidence` with `{'query': 'ethical decision making in AI systems'}`


[0m[33;1m[1;3m(8, 'The text includes references to various papers and reports related to ethical principles and decision making in AI systems. These include topics such as ethical design, human-agent collectives, personalized persuasion, explainable agency, computational models of ethical reasoning, and voting-based systems for ethical decision making. The text also mentions the challenges and roadblocks to the adoption of self-driving vehicles. Overall, the text provides evidence of research and discussions on ethical decision making in AI systems. \n\n<SCORE:> 8', 'http://arxiv.org/abs/1812.02953v1_10')[0m[32;1m[1;3m
Invoking: `gather_evidence` with `{'query': 'ethical AI decision making frameworks'}`


[0m

Insert of existing embedding ID: http://arxiv.org/abs/2306.01774v1_3
Add of existing embedding ID: http://arxiv.org/abs/2306.01774v1_3


[33;1m[1;3m(9, "The text discusses various ethical AI decision-making frameworks, including the Ethics Guidelines developed by the European Commission's High-level expert group on artificial intelligence (AI HLEG), the responsible research and innovation (RRI) framework, the principles of responsible AI proposed by Crowley et al., and the framework presented by Hallensleben et al. These frameworks emphasize principles such as respect for human autonomy, prevention of harm, fairness, explicability, adaptability, responsibility, and transparency. They also highlight the importance of considering cultural diversity and representation in the development of AI systems to ensure inclusivity, fairness, and trustworthiness. The frameworks provide guidance on incorporating values into algorithmic decision-making, measuring the fulfillment of values, and monitoring ethically relevant system characteristics. \n<SCORE:> 9", 'http://arxiv.org/abs/2306.01774v1_3')[0m[32;1m[1;3m
Invoking: `gathe

In [36]:
result['output']

"Ensuring that AI systems make ethical decisions requires the implementation of various frameworks, principles, and methodologies. The European Commission's High-level expert group on AI has identified four ethical principles that can guide AI decision-making: respect for human autonomy, prevention of harm, fairness, and explicability. These principles emphasize the importance of ensuring that AI systems respect human values, do not cause harm, treat individuals fairly, and are transparent and explainable in their decision-making processes (European Commission 2019).\n\nIn addition to these principles, there are seven key requirements for achieving ethical AI, as mentioned in the text. These requirements include accountability, data governance, transparency, diversity, non-discrimination, societal and environmental well-being, and robustness. These requirements highlight the need for AI systems to be accountable for their actions, to handle data responsibly, to be transparent in their 