In [None]:
import os
import json
from dotenv import load_dotenv
from scraper import fetch_website_contents, fetch_website_links
from IPython.display import Markdown, display
from openai import OpenAI
import gradio as gr


load_dotenv(override=True)

api_key = os.getenv('OLLAMA_API_KEY')

ollama_base_url = os.getenv("OLLAMA_BASE_URL")

MODEL = os.getenv('OSS_CLOUD_MODEL')

# Check the key
if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")


#Check the base URL
if not ollama_base_url:
    print("No base url was found - please confirm that the lamma base url has been set in the dotenv file")
elif ollama_base_url.strip() != ollama_base_url:
    print("A base url was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("Base url found and looks good so far!")


ollamaClient = OpenAI(base_url=ollama_base_url, api_key=api_key)

system_prompt = """
You are an assistant that has the ability to get learn about any subject matter indepthly.
You answer questions accurately and precisely.
You may be provided with a knowledge base when asked a question.
If a knowledge base is provided, your answer should be exclusively from that knowledge base and not outside the knowledge base.
If a knowledge base is not provided, you make a tool call to see if a knowledge base with that subject matter exists.
if a knowledge base with the subject matter does not exist, you should inform the user, 
and request for a URL from which you can learn relevant information needed to answer the question
If you cannot answer a question based on a knowledge base, you should inform the user that you are having an issue learning from the URL provided.
"""


def get_knowledge_base(knowledge_base_name, knowledge_base_url):
    # Check if the file exists
    filename = f"{knowledge_base_name}.txt"
    if os.path.exists(filename):
        print(f"Found valid file {filename} - using that")
        with open(filename, "r") as f:
            return f.read()
    
    # If not, fetch the contents
    print(f"Fetching contents for {knowledge_base_url}...")
    content = fetch_website_contents(knowledge_base_url)
    
    # Fetch links and append their contents
    links = fetch_website_links(knowledge_base_url)
    print(f"Found {len(links)} links, fetching their contents...")
    
    for link in links:
        try:
            link_content = fetch_website_contents(link)
            content += "\n\n" + link_content
        except Exception as e:
            print(f"Failed to fetch {link}: {e}")

    if content:
        with open(filename, "w") as f:
            f.write(content)
        return content
    else:
        print("No content found")
        return ""


get_knowledge_base_description = {
    "name": "get_knowledge_base",
    "description": "Fetches the contents of a website/knowledge base which you can use to answer the question more accurately.",
    "parameters": {
        "type": "object",
        "properties": {
            "knowledge_base_name": {
                "type": "string",
                "description": "The name of the knowledge base"
            },
            "knowledge_base_url": {
                "type": "string",
                "description": "The URL of the knowledge base"
            }
        },
        "required": ["knowledge_base_name", "knowledge_base_url"],
        "additionalProperties": False
    }
}

tools = [{"type": "function", "function": get_knowledge_base_description}]

def handle_tool_calls(message, subject, url):
    responses = []
    for tool_call in message.tool_calls:
        if tool_call.function.name == "get_knowledge_base":
            arguments = json.loads(tool_call.function.arguments)
            knowledge_base_name = arguments.get('knowledge_base_name')
            knowledge_base_url = arguments.get('knowledge_base_url')
            knowledge_base_content = get_knowledge_base(subject, url)
            responses.append({
                "role": "tool",
                "content": knowledge_base_content,
                "tool_call_id": tool_call.id
            })
    return responses

def construct_final_user_message(message, subject, url):
    subject_specification = f"\n The subject to be used to save the knowledge base is {subject} \n"
    knowledge_base_specification = f"The knowledge base url is {url} \n"
    final_message = message + subject_specification + knowledge_base_specification
    return final_message

def chat(message, history, subject, url):
    history = [{"role":h["role"], "content":h["content"]} for h in history]
    messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": construct_final_user_message(message, subject, url)}]
    response = ollamaClient.chat.completions.create(model=MODEL, messages=messages, tools=tools)

    while response.choices[0].finish_reason=="tool_calls":
        message = response.choices[0].message
        responses = handle_tool_calls(message, subject, url)
        messages.append(message)
        messages.extend(responses)
        response = ollamaClient.chat.completions.create(model=MODEL, messages=messages, tools=tools)
    
    return response.choices[0].message.content

In [None]:
chat("Tell me about this company named Carbon, whose website is www.getcarbon.co", [])

In [None]:
subject_input = gr.Textbox(label="Subject Name")
url_input = gr.Textbox(label="Knowledge Base URL")

gr.ChatInterface(
    fn=chat,
    type="messages",
    additional_inputs=[subject_input, url_input]
).launch()