In [3]:
import requests
from bs4 import BeautifulSoup
from langchain.text_splitter import RecursiveCharacterTextSplitter

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

# Function to scrape a webpage
def scrape_website(url):
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    text = ' '.join([p.text for p in soup.find_all('p')])  # Get text from all paragraphs
    return text

# Scrape the data
url = "https://www.melbourne.vic.gov.au/portable-advertising-board-permits"  # Replace with the URL of your choice
raw_text = scrape_website(url)

# Initialize the text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
)

# Split the text into chunks
chunks = text_splitter.split_text(raw_text)

print(f"Number of chunks: {len(chunks)}")

Number of chunks: 11


In [4]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

# Initialize the embedding model
model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)

# Vector store from chunks
vector_db = Chroma.from_texts(
    texts=chunks,
    embedding=embeddings,
    persist_directory="./chroma_db"
)

vector_db.persist()

print("Vector database created and persisted.")

  return forward_call(*args, **kwargs)


Vector database created and persisted.


  vector_db.persist()


In [6]:
from langchain.prompts import PromptTemplate
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA

# Initialize the LLM (make sure Ollama is running with a model like 'llama2')
llm = Ollama(model="llama3.2")

# Create a retriever from the vector store
retriever = vector_db.as_retriever()

# Create a prompt template for the LLM
prompt_template = """
Use the following context to answer the question. If you don't know the answer, say you don't know.
Context: {context}
Question: {question}
"""
PROMPT = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
)

# Set up the RAG chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  # 'stuff' puts all retrieved docs in one context
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)

# user query
query = "I just opened a cafe in the City of Melbourne, and I want to put a sandwich board on the footpath. What are the rules and do I need a permit?"
response = qa_chain.invoke({"query": query})

print("\n--- Model Response ---")
print(response['result'])


  return forward_call(*args, **kwargs)



--- Model Response ---
To put up a portable advertising board (sandwich board) on the footpath outside your cafe in the City of Melbourne, you will need a permit. 

You must hold public liability insurance with a minimum coverage of $20 million for a single claim. The policy must cover any location within the municipality and must list City of Melbourne as an interested party.

You also need to upload a current letter on official letterhead from your cafe's property owner or body corporate, stating they have no objection to you placing a portable advertising board outside their property.

There is a non-refundable application fee of $248 per application, and a permit fee of $754 per permit. Permits are valid for 12 months from the date of issue.

Additionally, you should review the permit conditions to ensure that your location is not in a prohibited area (such as parks or gardens) and that you will be responsible for keeping the streets clean and minimizing disturbance from late-nigh

### Chatbot Functionality

In [None]:
import requests
from bs4 import BeautifulSoup
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA
import os

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

# Data Scraping Function
def scrape_website(url):
    """Scrapes text from all <p> tags on a given URL."""
    print(f"Scraping data from: {url}")
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status() 
        soup = BeautifulSoup(response.content, 'html.parser')
        text = ' '.join([p.text for p in soup.find_all('p')])
        if not text.strip():
            print("Warning: No text found in <p> tags. Scrape may not be successful.")
        return text
    except requests.exceptions.RequestException as e:
        print(f"Error during web scraping: {e}")
        return ""


# Calling Data Scrape Function 
url = "https://www.melbourne.vic.gov.au/portable-advertising-board-permits"
raw_text = scrape_website(url)

if not raw_text:
    print("Failed to get data from the URL. Exiting.")
    exit()

# Text splitter Initialization
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
)

# Split the text into chunks
chunks = text_splitter.split_text(raw_text)
print(f"Number of chunks: {len(chunks)}")

# Embedding Model
model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)

# Create a vector store from the chunks
persist_directory = "./chroma_db"
if os.path.exists(persist_directory):
    print("Vector database already exists. Loading it from disk.")
    vector_db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
else:
    print("Vector database not found. Creating and persisting it.")
    vector_db = Chroma.from_texts(
        texts=chunks,
        embedding=embeddings,
        persist_directory=persist_directory
    )
    vector_db.persist()

# --- One-time Setup: LLM and RAG Chain Configuration ---

# Initialize the LLM (make sure Ollama is running with a model like 'llama3')
# Note: Ollama must be running and the specified model must be available.
try:
    llm = Ollama(model="llama3.2")
    _ = llm.invoke("Hello, are you ready?")
    print("LLM initialized successfully.")
except Exception as e:
    print(f"Error initializing LLM: {e}")
    print("Please ensure Ollama is running and the 'llama3.2' model is installed.")
    exit()

# Create a retriever from the vector store
retriever = vector_db.as_retriever()

# Create a prompt template for the LLM
prompt_template = """
Use the following context to answer the question. If you don't know the answer, say you don't know.
Context: {context}
Question: {question}
"""
PROMPT = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
)

# Set up the RAG chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)

# --- Chatbot --- 
print("\n--- Chatbot is Ready! ---")
print("I'm ready to answer questions about portable advertising board permits in Melbourne.")
print("Type 'exit' to end the conversation.")
print("-" * 25)

while True:
    user_query = input("\nYour question: ")

    # Check for an exit command
    if user_query.lower() == 'exit':
        print("Goodbye!")
        break

    # Get the response from the RAG chain
    try:
        response = qa_chain.invoke({"query": user_query})

        # Display the response
        print("\n--- Model Response ---")
        print(response['result'])

        # Display the source documents
        print("\n--- Source Documents ---")
        for i, doc in enumerate(response['source_documents']):
            print(f"Document {i+1}:\n{doc.page_content}\n")
            
    except Exception as e:
        print(f"An error occurred: {e}")
        print("Please check your LLM connection and try again.")
        continue


Scraping data from: https://www.melbourne.vic.gov.au/portable-advertising-board-permits
Number of chunks: 11
Vector database already exists. Loading it from disk.


  vector_db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)


LLM initialized successfully.

--- Chatbot is Ready! ---
I'm ready to answer questions about portable advertising board permits in Melbourne.
Type 'exit' to end the conversation.
-------------------------



Your question:  I just opened a cafe in the City of Melbourne, and I want to put a sandwich board on the footpath. What are the rules and do I need a permit?


  return forward_call(*args, **kwargs)



--- Model Response ---
To place a portable advertising board (sandwich board) on the footpath of your cafe in the City of Melbourne, you will need to obtain a permit.

Here are the key requirements:

1. You must hold public liability insurance with a minimum coverage of $20 million for a single claim.
2. The policy must cover any location within the municipality and list City of Melbourne as an interested party.
3. You need to upload a current letter on official letterhead from the property owner or body corporate (if your cafe is in a multi-tenanted property) stating they have no objection to placing a portable advertising board outside the property.
4. There is a non-refundable application fee of $248 per application and a permit fee of $754 per permit.

Before applying, you should review the permit conditions and ensure that your sandwich board will not be displayed in any prohibited areas, such as parks and gardens or designated areas with high pedestrian volumes.

It's also worth


Your question:  How to apply for You must hold public liability insurance with a minimum coverage of $20 million for a single claim.


  return forward_call(*args, **kwargs)



--- Model Response ---
To apply for the permit, you need to upload a public liability insurance certificate of currency with a minimum coverage of $20 million for a single claim. The policy must cover any location within the municipality and must list City of Melbourne as an interested party.

--- Source Documents ---
Document 1:
boards to be displayed across the municipality on a given day. To be eligible for this permit your agency must hold public liability insurance with a minimum coverage of $20 million. See supporting documents below for more information. Permit holders are responsible for ensuring: Please review the permit conditions: Real estate pointer board permit conditions Real estate pointer board permit conditions You need to upload a public liability insurance certificate of currency with a minimum coverage of $20 million for a single claim. The policy must cover any location within the municipality and must list City of Melbourne as an interested party. Read more about