# Agentic AI for Beginners
# Simple chatbot to answer queries from a website

## 1. Importing Required Libraries

In [69]:
from openai import OpenAI                # Imports the OpenAI API client to access GPT-4 or GPT-3.5
import os                                # For setting environment variables
import requests                          # To fetch data from webpages
from bs4 import BeautifulSoup            # To parse HTML content and extract clean text/links
from google.colab import userdata        # Secure way to store/retrieve sensitive info in Google Colab

## 2. Loading the OpenAI API Key

In [70]:
os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")  # Fetches API key from Colab secrets. Get your openai keys from https://platform.openai.com/api-keys
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])          # Creates the OpenAI client


## 3. Get All Child Links from a Base URL
### In the below example i am scarpping https://www.sc.com/sg/ and all its child pages. Make the necessary changes as per your required website

In [71]:
from urllib.parse import urljoin, urlparse  # Helps combine and parse URLs

def get_child_links(base_url, max_links=10):
    base = urlparse(base_url)               # Extract domain parts from URL
    domain = base.netloc                    # E.g., "www.sc.com"
    try:
        response = requests.get(base_url, timeout=10)     # Download HTML content
        soup = BeautifulSoup(response.text, "html.parser")  # Parse HTML using BeautifulSoup
        links = set()                         # Use set to avoid duplicates

        for a in soup.find_all("a", href=True):            # Loop through all <a href="">
            href = a['href']                               # Get the link URL
            full_url = urljoin(base_url, href)             # Convert relative to full URL
            parsed = urlparse(full_url)

            # Filter: Only include links from the same domain and inside "/sg/". Make changes based on parent page you are scrapping
            if parsed.netloc == domain and parsed.path.startswith("/sg/"):
                links.add(full_url)

            if len(links) >= max_links:
                break
        return list(links)
        print(links)
    except Exception as e:
        return []


## 4. Read and Extract Text from URLs ( Parent and Child Links )

In [72]:
def read_from_url(url, show_links=False):
    try:
        all_pages = [url] + get_child_links(url, max_links=10)  # We have limited the child pages to be crawled to max 10. You can change it based on your requirements. In this example the agent will only respond from these 10 child pages it crawled

        if show_links:
            print("🔗 Indexed the following pages:")
            for page in all_pages:
                print(page)

        all_text = ""

        for page in all_pages:
            try:
                res = requests.get(page, timeout=10)
                soup = BeautifulSoup(res.text, "html.parser")
                for tag in soup(["script", "style", "noscript"]):
                    tag.decompose()
                text = soup.get_text(separator="\n").strip()
                all_text += f"\n\n--- Content from {page} ---\n{text}"
            except Exception as inner_e:
                all_text += f"\n\n[Error reading {page}]: {inner_e}"

        return all_text[:12000]  # Change the number of words you want to extract from each page.
    except Exception as e:
        return f"Error reading base URL: {e}"



In [73]:
# This is a test code. Run this separately to inspect what pages were crawled
# read_from_url("https://www.sc.com/sg", show_links=True)


🔗 Indexed the following pages:
https://www.sc.com/sg
https://www.sc.com/sg/
https://www.sc.com/sg/save/current-accounts/wealth-saver/?intcid=web_listing-sc_com_top_nav-homepg1-staticmedia_others-sng-homepage_new-wealth-saver-acquisition-sc_com_organic-sg-en
https://www.sc.com/sg/search/
https://www.sc.com/sg/save/savings-accounts/esaver/?intcid=web_listing-sc_com_top_nav-homepg1-staticmedia_others-sng-homepage_new-esaver-acquisition-sc_com_organic-sg-en
https://www.sc.com/sg/bank-with-us/mobile-banking-services/standard-chartered-mobile/
https://www.sc.com/sg/priority/
https://www.sc.com/sg/promotions/referral-signup/
https://www.sc.com/sg/international-banking/
https://www.sc.com/sg/business/
https://www.sc.com/sg/save/current-accounts/bonussaver/?intcid=web_listing-sc_com_top_nav-homepg1-staticmedia_others-sng-homepage_new-bsaver-acquisition-sc_com_organic-sg-en


'\n\n--- Content from https://www.sc.com/sg ---\nStandard Chartered Singapore\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\r\n                            Personal\r\n                        \n\n\n\n\n\n\n\r\n                            Priority\r\n                        \n\n\n\n\n\n\n\r\n                            International\r\n                        \n\n\n\n\n\n\n\r\n                            Business\r\n                        \n\n\n\n\n\n\n\r\n                            Private\r\n                        \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\r\n                                Referral Sign Up\r\n                            \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\r\n                                Search\r\n                            \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\

## 5. Defining Tools for the Agent

In [81]:
tools = [
    {
        "type": "function",  # Tells the agent this is a callable tool
        "function": {
            "name": "read_standard_chartered_pages",  # Tool name used by the agent. Change it as per your requirmeents
            "description": (
                "Reads the Standard Chartered Singapore website starting from the home page (https://www.sc.com/sg), "
                "and automatically includes key internal pages under /sg/. Returns all useful text from those pages."
            ),
            "parameters": {
                "type": "object",
                "properties": {},
                "required": []
            }
        }
    }
]

# This map links the tool name to the actual Python function
tool_map = {
    "read_standard_chartered_pages": lambda: read_from_url("https://www.sc.com/sg", show_links=False)
}

## 6. Define the Agent Function
### The function takes a question from the user, lets GPT decide if it needs to call a tool, and then answers using real website content.

In [82]:
def agent_chat(user_question):
    messages = [
        {
            "role": "system",
            "content": (
                "You are a helpful assistant. You must use the provided `read_standard_chartered_pages` tool "
                "to read and answer questions based only on the content of https://www.sc.com/sg and its child pages.\n"
                "You are not allowed to make up any information. If something is not found, say so."
            )
        },
        {
            "role": "user",
            "content": user_question
        }
    ]

    response = client.chat.completions.create(
        model="gpt-4",                        # You can change this to any other gpt or LLM as required. This has a implication as each LLM has a restiction of number of tokens/words it can take input and process
        messages=messages,
        tools=tools,
        tool_choice="auto",                   # LLM decides when to use the tool
        temperature=0.2                       # Low randomness = more factual
    )

    # If the LLM wants to use a tool
    if response.choices[0].message.tool_calls:
        tool_call = response.choices[0].message.tool_calls[0]
        tool_name = tool_call.function.name
        tool_response = tool_map[tool_name]()  # Call the tool using tool_map

        # Add the first response (with the tool call) and the tool response to the messages list
        messages.append(response.choices[0].message)
        messages.append({
            "role": "tool",
            "tool_call_id": tool_call.id,
            "name": tool_name,
            "content": tool_response
        })

        # Now provide tool response back to the LLM
        second_response = client.chat.completions.create(
            model="gpt-4",
            messages=messages
        )
        return second_response.choices[0].message.content
    else:
        return response.choices[0].message.content

## 7. Try Asking a Question
### In this example we have crawled only 10 pages. So the agent will answer only if the question is from content from these 10 pages

In [83]:
agent_chat("What are bonus saver account  benefit?")

'Based on the information from the Standard Chartered Singapore website, the benefits of a Bonus$aver Account include:\n\n1. An interest rate of up to 8.05% p.a.\n\n2. The choice for account holders to have control over how their money grows.\n\nIn addition, there is a promotion for a bonus of 50 units of SPDR STI ETF (SGX:ES3) valued at S$198 when you sign up for a Bonus$aver Account and a Bonus$aver World Mastercard Credit Card. This offer is valid from 1st June to 30 June 2025, and terms and conditions apply.'