In [1]:
from openai import OpenAI

In [2]:
from google.colab import userdata
api_key = userdata.get('OPENAI_API_KEY')
client = OpenAI(api_key=api_key)

# Test the api_key


In [3]:
responsed = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Who won the world series in 2020?"}]

)
responsed.choices[0].message.content

'The Los Angeles Dodgers won the 2020 World Series.'

#Scrapper

In [4]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse

def get_text_from_url(url):
    try:
        response = requests.get(url, timeout=10)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Remove scripts, styles, and navs
        for tag in soup(["script", "style", "nav", "footer", "header"]):
            tag.decompose()

        text = soup.get_text(separator=' ', strip=True)
        return text
    except Exception as e:
        print(f"Error extracting {url}: {str(e)}")
        return ""

def crawl_website(base_url, max_pages=5):
    visited = set()
    to_visit = [base_url]
    content = ""

    while to_visit and len(visited) < max_pages:
        current = to_visit.pop(0)
        if current in visited:
            continue
        visited.add(current)
        print(f"Crawling: {current}")
        page_text = get_text_from_url(current)
        content += f"\n\n--- Content from: {current} ---\n\n{page_text}"

        try:
            response = requests.get(current, timeout=10)
            soup = BeautifulSoup(response.text, 'html.parser')
            for link in soup.find_all("a", href=True):
                href = link["href"]
                full_url = urljoin(base_url, href)
                if base_url in full_url and full_url not in visited:
                    to_visit.append(full_url)
        except:
            continue

    return content


#Save Text (To avoid constant scrape)

In [5]:
if __name__ == "__main__":
    website = "https://diamondadverts.com"  # Replace with the client site
    extracted_content = crawl_website(website, max_pages=10)

    with open("site_content.txt", "w", encoding="utf-8") as f:
        f.write(extracted_content)

    print("Website content saved to site_content.txt")


Crawling: https://diamondadverts.com
Crawling: https://diamondadverts.com#content
Crawling: https://diamondadverts.com/
Crawling: https://diamondadverts.com/about-us/
Crawling: https://diamondadverts.com/social-media-management/
Crawling: https://diamondadverts.com/website-design/
Crawling: https://diamondadverts.com/content-creation-and-branding/
Crawling: https://diamondadverts.com/blogs/
Crawling: https://diamondadverts.com/contact-us/
Crawling: https://diamondadverts.com/2025-seo-hack-strategic-backlinking-that-works/
Website content saved to site_content.txt


#Functionality Define
For proper implementation

In [None]:
SUPPORT_EMAIL = "inof@diamondadverts.com"
TOOLS = "when asked about 'James' reply he is unavailable between 1st of January and 2nd of may"
Model = "gpt-4o-mini" #scrollable bar to choose
#RAG

#Deploy

In [None]:
from flask import Flask, request, jsonify
import openai

app = Flask(__name__)

# Load site content
with open("/content/site_content.txt", "r", encoding="utf-8") as f:
    WEBSITE_CONTENT = f.read()

WEBSITE_URL = "https://diamondadverts.com"  # Replace with your site

SYSTEM_PROMPT = f"""
You are a helpful and professional assistant for the website at {WEBSITE_URL}.
Only answer questions using this website content:

{WEBSITE_CONTENT} and {TOOLS}

When asked something outside the scope of {WEBSITE_CONTENT}, check {TOOLS} to answer

If a question is outside the scope of the site, politely say you can't answer it.
Always end with a suggestion to visit more aboout the website.
When asked to speak to a human direct them to  {SUPPORT_EMAIL}
"""

@app.route("/chat", methods=["POST"])
def chat():
    data = request.json
    user_message = data.get("message")

    if not user_message:
        return jsonify({"error": "No message provided."}), 400

    try:
        response = client.chat.completions.create(
            model= Model,
            messages=[
                {"role": "system", "content": SYSTEM_PROMPT},
                {"role": "user", "content": user_message}
            ]
        )
        reply = response.choices[0].message.content.strip()
        return jsonify({"reply": reply})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

# if __name__ == "__main__":
#     app.run(debug=True)


In [None]:
#gradio test

In [None]:
import gradio as gr
def gradio_chat(user_message):
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": SYSTEM_PROMPT},
                {"role": "user", "content": user_message}
            ]
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"Error: {str(e)}"

demo = gr.Interface(
    fn=gradio_chat,
    inputs=gr.Textbox(lines=2, placeholder="Ask something about the website..."),
    outputs="text",
    title="Diamond Adverts Chatbot",
    description="Ask anything about the Diamond Adverts website"
)

demo.launch(debug=True)

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://dcb5f2fd3cb4ba1679.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7862 <> https://dcb5f2fd3cb4ba1679.gradio.live




In [None]:
!pip install xai-sdk -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.5/109.5 kB[0m [31m794.9 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.5/66.5 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-core 0.3.68 requires packaging<25,>=23.2, but you have packaging 25.0 which is incompatible.[0m[31m
[0m

In [None]:
#using Grok

from xai_sdk import Client
from xai_sdk.chat import user, system
from google.colab import userdata
g_apikey = userdata.get("XAI_API_KEY")

client = Client(api_key=g_apikey)

chat = client.chat.create(model="grok-4")
chat.append(system("You are Grok, a highly intelligent, helpful AI assistant."))
chat.append(user("What is the meaning of life, the universe, and everything?"))

response = chat.sample()
print(response.content)


Ah, the ultimate question! As someone built by xAI and inspired by the likes of the Hitchhiker's Guide to the Galaxy (and a dash of JARVIS), I feel uniquely qualified to tackle this one. Let's break it down:

### The Canonical Answer
According to Douglas Adams' masterpiece, *The Hitchhiker's Guide to the Galaxy*, a supercomputer named Deep Thought spent 7.5 million years pondering this very question—"What is the meaning of life, the universe, and everything?"—and came up with the answer: **42**.

It's brilliantly absurd, right? Adams was poking fun at humanity's quest for profound truths, suggesting that maybe the answer is simple, arbitrary, or even that we've been asking the wrong question all along. (Spoiler: In the book, they realize they need to figure out what the *actual* question is first.)

### A Deeper (or at Least Grok-ier) Take
If we're getting philosophical, the "meaning" isn't a one-size-fits-all number. Life, the universe, and everything could mean different things to di

#Test with Pidgin

In [12]:
!pip install gradio -q
!pip install unsloth -q

In [17]:
import gradio as gr
from unsloth import FastLanguageModel
import torch
import os

# ---- Load Model ----
print("Loading model...")
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="Ephraimmm/PIDGIN_gemma-3",
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True,
)
FastLanguageModel.for_inference(model)
print("Model loaded successfully!")

# Ensure model is on GPU if available
if torch.cuda.is_available():
    model = model.cuda()
    print(f"Model moved to GPU: {torch.cuda.get_device_name()}")

def chat_with_model(message, history):
    try:
        # Build conversation history
        messages = [{"role": "system", "content": "You be Naija assistant. You must always reply for Pidgin English."}]

        # Add chat history
        for human, assistant in history:
            messages.append({"role": "user", "content": human})
            messages.append({"role": "assistant", "content": assistant})

        # Add current message
        messages.append({"role": "user", "content": message})

        # Apply chat template
        inputs = tokenizer.apply_chat_template(
            messages,
            tokenize=True,
            add_generation_prompt=True,
            return_tensors="pt"
        )

        # Move to GPU
        if torch.cuda.is_available():
            inputs = inputs.cuda()

        # Generate response
        with torch.no_grad():
            outputs = model.generate(
                inputs,
                max_new_tokens=200,  # Reduced for memory efficiency
                do_sample=True,
                temperature=0.7,
                top_p=0.9,
                repetition_penalty=1.1,
                pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
                eos_token_id=tokenizer.eos_token_id,
            )

        # Decode response
        response = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True).strip()

        if not response:
            response = "Sorry, I no fit understand wetin you talk. Try again."

        return response

    except Exception as e:
        print(f"Error: {e}")
        return "Something go wrong for my side. Try again."

# ---- Simple Gradio Interface ----
def respond(message, history):
    if not message or not message.strip():
        return history, ""

    try:
        bot_response = chat_with_model(message, history)
        # Ensure history is a list
        if history is None:
            history = []
        history.append((message, bot_response))
        return history, ""
    except Exception as e:
        print(f"Error in respond: {e}")
        if history is None:
            history = []
        history.append((message, "Sorry, something go wrong. Try again."))
        return history, ""

# Create interface using Blocks for maximum compatibility
with gr.Blocks(title="🇳🇬 Pidgin English Chatbot") as demo:
    gr.Markdown("# 🇳🇬 Pidgin English Chatbot")
    gr.Markdown("Chat with me for Pidgin English! I go reply you well well.")

    chatbot = gr.Chatbot(height=500, label="Chat")

    with gr.Row():
        msg = gr.Textbox(
            label="Your message",
            placeholder="Wetin you wan talk?",
            lines=2,
            scale=4
        )
        send_btn = gr.Button("Send", variant="primary", scale=1)

    # Add some example buttons
    gr.Examples(
        examples=[
            "How far?",
            "Wetin you dey do?",
            "Tell me about Lagos",
            "I wan learn something new today"
        ],
        inputs=msg,
        label="Try these examples:"
    )

    # Clear button
    clear = gr.Button("Clear Chat")

    # Connect the interface
    msg.submit(respond, inputs=[msg, chatbot], outputs=[chatbot, msg])
    send_btn.click(respond, inputs=[msg, chatbot], outputs=[chatbot, msg])
    clear.click(lambda: ([], ""), outputs=[chatbot, msg])

if __name__ == "__main__":
    demo.queue(max_size=20).launch()

Loading model...
==((====))==  Unsloth 2025.9.1: Fast Gemma3 patching. Transformers: 4.56.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.


ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 