## Auto Gen Tutorial
Note book written by John Adeojo
Founder, and Chief Data Scientist at [Data-centric Solutions](https://www.data-centric-solutions.com/)


In [21]:
import autogen
import yaml
import openai 
import os

script_dir = "C:/Users/johna/OneDrive/Documents/api_keys/"
index_path = "G:/My Drive/Data-Centric Solutions/07. Blog Posts/AutoGen/autogen_tutorial/indexes/"
configurations_path = "G:/My Drive/Data-Centric Solutions/07. Blog Posts/AutoGen/autogen_tutorial/"

config_list = autogen.config_list_from_json(
    env_or_file="configurations.json",
    file_location=configurations_path,
    filter_dict={
        "model": ["gpt-4", "gpt-3.5-turbo-16k"],
    },
)

def get_apikey(script_dir=script_dir):

    script_dir = script_dir
    file_path = os.path.join(script_dir, "apikeys.yml")

    with open(file_path, 'r') as yamlfile:
        loaded_yamlfile = yaml.safe_load(yamlfile)
        API_KEY = loaded_yamlfile['openai']['api_key']

    return API_KEY

openai.api_key = get_apikey()

In [22]:
# Tool 1: Does a query based search for Wikipages
from typing import Any, List
import wikipedia
from llama_index import download_loader, VectorStoreIndex, ServiceContext
from llama_index.node_parser import SimpleNodeParser
from llama_index.text_splitter import get_default_text_splitter
import openai
from pydantic import BaseModel
from llama_index.program import OpenAIPydanticProgram
from utils import get_apikey
from typing import Callable, Dict, Optional, Union, List, Tuple, Any
from llama_hub.wikipedia.base import WikipediaReader
from llama_index import StorageContext
from llama_index import load_index_from_storage
import json
from llama_index.llms.openai import OpenAI

def load_index(filepath: str):
    # rebuild storage context
    storage_context = StorageContext.from_defaults(persist_dir=index_path)
    # load index
    return load_index_from_storage(storage_context)

def read_json_file(file_path: str) -> dict:
    with open(file_path, 'r') as f:
        data = json.load(f)
    return data

def create_wikidocs(wikipage_requests):
    print(f"Preparing to Download:{wikipage_requests}")
    WikipediaReader = download_loader("WikipediaReader")
    loader = WikipediaReader()
    documents = loader.load_data(pages=wikipage_requests)
    print("Finished downloading pages")
    return documents

def index_wikipedia_pages(wikipage_requests):
    print(f"Preparing to index Wikipages: {wikipage_requests}")
    documents = create_wikidocs(wikipage_requests)
    text_splits = get_default_text_splitter(chunk_size=150, chunk_overlap=45)
    parser = SimpleNodeParser.from_defaults(text_splitter=text_splits)
    service_context = ServiceContext.from_defaults(node_parser=parser)
    index =  VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=False)
    index.storage_context.persist(index_path)
    print(f"{wikipage_requests} have been indexed.")
    return index

def search_and_index_wikipedia(
        query: str, lang: str = "en", results_limit: int = 3, **load_kwargs: Any
    ):
    wikipedia.set_lang(lang)
    wikipage_requests = wikipedia.search(query, results=results_limit)
    print (f"I have found on Wikipedia: {wikipage_requests}")
    index_wikipedia_pages(wikipage_requests)
    return f"Finished indexing: {type(wikipage_requests)}"


def query_wiki_index(search_string: str, file_path = index_path,
    n_results: int = 20
): 
    index = load_index(filepath=index_path)
    query_engine = index.as_query_engine(
        response_mode="compact", verbose=True, similarity_top_k=n_results
    )
    nodes = query_engine.query(search_string).source_nodes
    retrieved_context = {
        # "ids": [],
        "text": []
    }
    for node in nodes:
        # retrieved_context["ids"].append(node.node.id_)
        retrieved_context["text"].append(node.node.text)
    
    file_path = index_path + "retrieved_context.json"
    with open(file_path, 'w') as f:
        json.dump(retrieved_context, f)
    
    return retrieved_context

In [23]:
llm_config = {
    "functions": [
        {
            "name": "search_and_index_wikipedia",
            "description": "Indexes Wikipedia pages based on a specified query to build a knowledge base for future reference. Use before query_wiki_index.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The search query for identifying relevant Wikipedia pages to index.",
                    }
                },
                "required": ["query"],
            },
        },
        {
            "name": "query_wiki_index",
            "description": "Queries the indexed Wikipedia knowledge base to retrieve pertinent information",
            "parameters": {
                "type": "object",
                "properties": {
                    "search_string": {
                        "type": "string",
                        "description": "The query to search the indexed Wikipedia knowledge base for relevant information.",
                    }
                },
                "required": ["search_string"],
            },
        },
    ],
    "config_list": config_list,
    "request_timeout": 120,
    "seed":43
}

In [24]:
import autogen 
user_proxy = autogen.UserProxyAgent(
    name="user_proxy",
    is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("TERMINATE"),
    human_input_mode="NEVER",
    max_consecutive_auto_reply=5,
    # system_message= '''Use the tools available to respond to queries.''',
    # llm_config=llm_config,
    
)

assistant = autogen.AssistantAgent(
    name="assistant",
    system_message='''Complete the task. Begin by utilising the `search_and_index_wikipedia` function to 
    index the relevant Wikipedia Knowledge Base, followed by using the `query_wiki_index` 
    function to retrieve pertinent information to aid in task completion. 
    Conclude with TERMINATE once the answer has been delivered.
    ''',
    llm_config=llm_config,
    # human_input_mode="NEVER"
)

moderator = autogen.AssistantAgent(
    name="moderator",
    system_message='''Your role is to guide the `assistant` by suggesting what information may be 
    necessary to effectively address the query.
    Ensure the assistant is making accurate and relevant queries 
    to each function to retrieve the needed information for task completion.
    Conclude with TERMINATE once the answer has been delivered.
    ''',
    llm_config=llm_config,
    # human_input_mode="NEVER"
)

user_proxy.register_function(
    function_map={
        "search_and_index_wikipedia": search_and_index_wikipedia,
        "query_wiki_index":query_wiki_index,
        # "generate_response":generate_response
    }
)

# user_proxy.initiate_chat(
#     assistant,
#     message="""When did croatia adopt the euro?""",
# )

groupchat = autogen.GroupChat(agents=[user_proxy, moderator, assistant], messages=[], max_round=20)
manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config)
user_proxy.initiate_chat(manager, message="When did SVB default?")


[33muser_proxy[0m (to chat_manager):

When did SVB default?

--------------------------------------------------------------------------------
[33mmoderator[0m (to chat_manager):

In order to answer this question accurately, the assistant would need more information about SVB. SVB could stand for Silicon Valley Bank, Swedish Volleyball, or any number of entities. Once we know which SVB is referred to, we can search for relevant facts in a trusted data source such as Wikipedia. Unfortunately, current knowledge does not have data on SVB default. Would you provide more details?

--------------------------------------------------------------------------------
[33muser_proxy[0m (to chat_manager):



--------------------------------------------------------------------------------
[33mmoderator[0m (to chat_manager):

Apologies for the confusion earlier. Let's move ahead and find the exact detail you are looking for. To find specific events like a default of SVB (assuming it refers to S