## Auto Gen Tutorial
Note book written by John Adeojo
Founder, and Chief Data Scientist at [Data-centric Solutions](https://www.data-centric-solutions.com/)


In [1]:
import autogen
import yaml
import openai 
import os

script_dir = "C:/Users/johna/OneDrive/Documents/api_keys/"
index_path = "G:/My Drive/Data-Centric Solutions/07. Blog Posts/AutoGen/autogen_tutorial/indexes/"
configurations_path = "G:/My Drive/Data-Centric Solutions/07. Blog Posts/AutoGen/autogen_tutorial/"

config_list = autogen.config_list_from_json(
    env_or_file="configurations.json",
    file_location=configurations_path,
    filter_dict={
        "model": ["gpt-3.5-turbo-16k"],
    },
)

def get_apikey(script_dir=script_dir):
    """
    Reads API key from a configuration file.

    This function opens a configuration file named "apikeys.yml", reads the API key for OpenAI

    Returns:
    api_key (str): The OpenAI API key.
    """
    # Update the script_dir path to point to the correct location in your Google Drive
    script_dir = script_dir
    file_path = os.path.join(script_dir, "apikeys.yml")

    with open(file_path, 'r') as yamlfile:
        loaded_yamlfile = yaml.safe_load(yamlfile)
        API_KEY = loaded_yamlfile['openai']['api_key']

    return API_KEY

# Call the function to get the API key
openai.api_key = get_apikey()

In [2]:
# Tool 1: Does a query based search for Wikipages
from typing import Any, List
import wikipedia
from llama_index import download_loader, VectorStoreIndex, ServiceContext
from llama_index.node_parser import SimpleNodeParser
from llama_index.text_splitter import get_default_text_splitter
import openai
from pydantic import BaseModel
from llama_index.program import OpenAIPydanticProgram
from utils import get_apikey
from typing import Callable, Dict, Optional, Union, List, Tuple, Any
from llama_hub.wikipedia.base import WikipediaReader
from llama_index import StorageContext
from llama_index import load_index_from_storage
import json
from llama_index.llms.openai import OpenAI

def load_index(filepath: str):
    # rebuild storage context
    storage_context = StorageContext.from_defaults(persist_dir=index_path)
    # load index
    return load_index_from_storage(storage_context)

def read_json_file(file_path: str) -> dict:
    with open(file_path, 'r') as f:
        data = json.load(f)
    return data

def create_wikidocs(wikipage_requests):
    print(f"Preparing to Download:{wikipage_requests}")
    WikipediaReader = download_loader("WikipediaReader")
    loader = WikipediaReader()
    documents = loader.load_data(pages=wikipage_requests)
    print("Finished downloading pages")
    return documents

def index_wikipedia_pages(wikipage_requests):
    print(f"Preparing to index Wikipages: {wikipage_requests}")
    documents = create_wikidocs(wikipage_requests)
    text_splits = get_default_text_splitter(chunk_size=150, chunk_overlap=45)
    parser = SimpleNodeParser.from_defaults(text_splitter=text_splits)
    service_context = ServiceContext.from_defaults(node_parser=parser)
    index =  VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=False)
    index.storage_context.persist(index_path)
    print(f"{wikipage_requests} have been indexed.")
    return index

def search_and_index_wikipedia(
        query: str, lang: str = "en", results_limit: int = 5, **load_kwargs: Any
    ) -> List[List]:
    wikipedia.set_lang(lang)
    wikipage_requests = wikipedia.search(query, results=results_limit)
    index_wikipedia_pages(wikipage_requests)
    return print(f"Finished indexing: {wikipage_requests}")

def query_vector_db(
    search_string: str,
    file_path = index_path,
     n_results: int = 10
) -> None: 
    index = load_index(filepath=index_path)
    query_engine = index.as_query_engine(
        response_mode="compact", verbose=True, similarity_top_k=n_results
    )
    nodes = query_engine.query(search_string).source_nodes
    retrieved_context = {
        # "ids": [],
        "text": []
    }
    for node in nodes:
        # retrieved_context["ids"].append(node.node.id_)
        retrieved_context["text"].append(node.node.text)
    
    file_path = index_path + "retrieved_context.json"
    with open(file_path, 'w') as f:
        json.dump(retrieved_context, f)
    
    return retrieved_context

def generate_response(task:str):
    openai.api_key = get_apikey()
    context = read_json_file(index_path + "retrieved_context.json")
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo-16k",  
        messages=[
            {"role": "system", "content": "You are a helpful assistant, you think step by step to help you respond"},
            {"role": "user", "content": task},
            {"role": "assistant", "content": f"Use the information `{context}` to help you respond to the `{task}`"}
        ]
    )
    return response['choices'][0]['message']['content']

In [3]:
llm_config = {
    "functions": [
        {
            "name": "search_and_index_wikipedia",
            "description": "Use this to search for and index relevant wikipedia pages.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "A search string for relevant Wikipedia pages.",
                    }
                },
                "required": ["query"],
            },
        },
        
        {
            "name": "query_vector_db",
            "description": "Useful for retrieving relevant context from the Wikipedia index.",
            "parameters": {
                "type": "object",
                "properties": {
                    "search_string": {
                        "type": "string",
                        "description": "A query to find relevant context from the Wikipedia index.",
                    }
                },
                "required": ["search_string"],
            },
        },

        {
            "name": "generate_response",
            "description": "Useful to generate a response based on additional context provided by wikipedia search.",
            "parameters": {
                "type": "object",
                "properties": {
                    "task": {
                        "type": "string",
                        "task": "Your understanding of the task.",
                    }
                },
                "required": ["task"],
            },
        },   
    ],
    "config_list": config_list,
    "request_timeout": 120,
    # "seed":43
}

In [5]:
import autogen 
user_proxy = autogen.UserProxyAgent(
    name="user_proxy",
    is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("TERMINATE"),
    human_input_mode="NEVER",
    max_consecutive_auto_reply=5,
    system_message= '''Your job is to use agents to complete the task.
    You will give a final response once you are satisfied with the answer''',
    # llm_config=llm_config,
    
)

assistant = autogen.AssistantAgent(
    name="assistant",
    system_message='''Your job is to come up with search queries to find relevant Wikipedia pages''',
    llm_config=llm_config,
    human_input_mode="NEVER"
)

# index_creator_agent = autogen.AssistantAgent(
#     name="index_creator_agent",
#     system_message='''Your job is to find and index relevant Wikipedia pages to help you respond to the task.
#     You should use the search_and_index_wikipedia tool for this.
#     Reply `TERMINATE` in the end when everything is done.''',
#     llm_config=llm_config,
#     human_input_mode="NEVER"
# )

# retrieve_agent = autogen.AssistantAgent(
#     name="retrieve_agent",
#     system_message='''You are an information retrieval agent. Your job is to retrieve relevant information from the index 
#     created by wikisearch_agent using the query_vector_db tool.
#     Reply `TERMINATE` in the end when everything is done.''',
#     llm_config=llm_config,
#     human_input_mode="NEVER"

# )

# response_agent = autogen.AssistantAgent(
#     name="response_agent",
#     system_message='''Your job is to generate a final response to the task based using the generate_response tool.
#     Reply `TERMINATE` in the end when everything is done.''',
#     llm_config=llm_config,
#     human_input_mode="NEVER"

# )

user_proxy.register_function(
    function_map={
        "search_and_index_wikipedia": search_and_index_wikipedia,
        "query_vector_db":query_vector_db,
        "generate_response":generate_response
    }
)


user_proxy.initiate_chat(
    assistant,
    message="""Which was the first bank to default in the 2023 US banking Crsis?""",
)

# groupchat = autogen.GroupChat(agents=[user_proxy, index_creator_agent, retrieve_agent, response_agent], messages=[], max_round=20)
# manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config)
# user_proxy.initiate_chat(manager, message="What is the population of Paris?")


[33muser_proxy[0m (to assistant):

Which was the first bank to default in the 2023 US banking Crsis?

--------------------------------------------------------------------------------
[33massistant[0m (to user_proxy):

[32m***** Suggested function Call: search_and_index_wikipedia *****[0m
Arguments: 
{
  "query": "2023 US banking crisis"
}
[32m***************************************************************[0m

--------------------------------------------------------------------------------
[35m
>>>>>>>> EXECUTING FUNCTION search_and_index_wikipedia...[0m
Preparing to index Wikipages: ['2023 United States banking crisis', 'List of banking crises', 'Venezuelan banking crisis of 1994', 'Ghana banking crisis', 'Bank run']
Preparing to Download:['2023 United States banking crisis', 'List of banking crises', 'Venezuelan banking crisis of 1994', 'Ghana banking crisis', 'Bank run']
[33muser_proxy[0m (to assistant):

[32m***** Response from calling function "search_and_index_wikiped