In [1]:
pip install gradio transformers torch


Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Note: you may need to restart the kernel to use updated packages.


In [7]:
import os
os.environ.pop("PYTORCH_CUDA_ALLOC_CONF", None)  # Safely remove if exists


In [1]:
import torch
torch.cuda.empty_cache()


In [None]:
WORKING=====================================================================================================

In [2]:
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained("cyai.v1").cuda()
tokenizer = AutoTokenizer.from_pretrained("cyai.v1")

# Add missing pad token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.resize_token_embeddings(len(tokenizer))

# Chat function
def chat_with_model(message, history=[]):
    prompt = f"You are a cybersecurity assistant. Answer briefly and clearly.\nUser: {message}\nAI:"
    for user_msg, bot_msg in history:
        prompt += f"User: {user_msg}\nAI: {bot_msg}\n"
    prompt += f"User: {message}\nAI:"

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to("cuda")
    outputs = model.generate(
        **inputs,
        max_new_tokens=100,
        do_sample=True,
        temperature=0.7,
        top_p=0.95,
        pad_token_id=tokenizer.pad_token_id
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract only the new answer part
    response = response.split("AI:")[-1].strip()
    history.append((message, response))
    return history, history  # ✅ Return full history for chatbot + state

# Gradio UI
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Ask CyberSecBot", placeholder="Type your message here...")
    clear = gr.Button("Clear")

    history = gr.State([])

    msg.submit(chat_with_model, [msg, history], [chatbot, history])
    clear.click(lambda: [], None, chatbot)

demo.launch()


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  chatbot = gr.Chatbot()


* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




In [None]:
=========================================================================================================================

In [1]:
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained("cyai.v1").cuda()
tokenizer = AutoTokenizer.from_pretrained("cyai.v1")

# Add missing pad token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.resize_token_embeddings(len(tokenizer))

# Chat function (with OpenAI-style messages)
def chat_with_model(message, history):
    prompt = ""
    for turn in history:
        if turn["role"] == "user":
            prompt += f"User: {turn['content']}\n"
        elif turn["role"] == "assistant":
            prompt += f"AI: {turn['content']}\n"
    prompt += f"User: {message}\nAI:"

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to("cuda")
    outputs = model.generate(
        **inputs,
        max_new_tokens=50,
        do_sample=True,
        temperature=0.7,
        top_p=0.95,
        pad_token_id=tokenizer.pad_token_id
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = response.split("AI:")[-1].strip()

    history.append({"role": "user", "content": message})
    history.append({"role": "assistant", "content": response})
    return history, history

# Gradio UI
with gr.Blocks() as demo:
    chatbot = gr.Chatbot(label="CyberSecBot", type="messages")
    msg = gr.Textbox(label="Ask CyberSecBot", placeholder="Type your question here...")
    clear = gr.Button("Clear")
    history = gr.State([])

    msg.submit(chat_with_model, [msg, history], [chatbot, history])
    clear.click(lambda: [], None, [chatbot, history])

demo.launch()


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




In [None]:
==================================================================================================================================================================================================================================================

In [12]:
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained("cyai.v1", torch_dtype=torch.float16).cuda()
tokenizer = AutoTokenizer.from_pretrained("cyai.v1")

# Add missing pad token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.resize_token_embeddings(len(tokenizer))

# Chat function (optimized)
def chat_with_model(message, history):
    # Limit prompt history to last 5 exchanges to avoid long input
    trimmed_history = history[-10:] if len(history) > 10 else history
    prompt = "You are a highly skilled cybersecurity AI assistant.\n"
    for turn in trimmed_history:
        if turn["role"] == "user":
            prompt += f"User: {turn['content']}\n"
        elif turn["role"] == "assistant":
            prompt += f"AI: {turn['content']}\n"
    prompt += f"User: {message}\nAI:"

    # Tokenize and generate
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to("cuda")
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=30,  # Smaller response
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.pad_token_id
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = response.split("AI:")[-1].strip()

    history.append({"role": "user", "content": message})
    history.append({"role": "assistant", "content": response})
    return history, history

# Gradio UI
with gr.Blocks() as demo:
    chatbot = gr.Chatbot(label="CyberSecBot", type="messages")
    msg = gr.Textbox(label="Ask CyberSecBot", placeholder="Type your question here...")
    clear = gr.Button("Clear")
    history = gr.State([])

    msg.submit(chat_with_model, [msg, history], [chatbot, history])
    clear.click(lambda: [], None, [chatbot, history])

demo.launch()


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

OSError: The paging file is too small for this operation to complete. (os error 1455)

In [None]:
=========================================================================================================================

In [26]:
import torch
torch.cuda.empty_cache()


In [2]:
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained("cyai.v1", torch_dtype=torch.float16).cuda()
tokenizer = AutoTokenizer.from_pretrained("cyai.v1")

# Add missing pad token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.resize_token_embeddings(len(tokenizer))

# Chat function with exception handling
def chat_with_model(message, history):
    try:
        trimmed_history = history[-10:] if len(history) > 10 else history
        prompt = "You are a highly skilled cybersecurity AI assistant.\n"
        for turn in trimmed_history:
            if turn["role"] == "user":
                prompt += f"User: {turn['content']}\n"
            elif turn["role"] == "assistant":
                prompt += f"AI: {turn['content']}\n"
        prompt += f"User: {message}\nAI:"

        # Tokenize input
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to("cuda")

        # Generate response
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=50,
                do_sample=True,
                temperature=0.7,
                top_p=0.9,
                pad_token_id=tokenizer.pad_token_id
            )

        decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Extract the final AI response
        if "AI:" in decoded:
            response = decoded.split("AI:")[-1].strip()
        else:
            response = decoded.strip()

        history.append({"role": "user", "content": message})
        history.append({"role": "assistant", "content": response})

        return history, history

    except Exception as e:
        # Return the error message to the UI for debugging
        error_message = f"⚠️ Error: {str(e)}"
        history.append({"role": "assistant", "content": error_message})
        return history, history

# Gradio UI
with gr.Blocks() as demo:
    chatbot = gr.Chatbot(label="CyberSecBot", type="messages")
    msg = gr.Textbox(label="Ask CyberSecBot", placeholder="Type your question here...")
    clear = gr.Button("Clear")
    history = gr.State([])

    msg.submit(chat_with_model, [msg, history], [chatbot, history])
    clear.click(lambda: [], None, [chatbot, history])

demo.launch()


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




In [1]:
import torch
torch.cuda.empty_cache()


In [2]:
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained("cyai.v1", torch_dtype=torch.float16).cuda()
tokenizer = AutoTokenizer.from_pretrained("cyai.v1")

# Add missing pad token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.resize_token_embeddings(len(tokenizer))

# Chat function with exception handling
def chat_with_model(message, history):
    try:
        trimmed_history = history[-10:] if len(history) > 10 else history
        prompt = "You are a highly skilled cybersecurity AI assistant.\n"
        for turn in trimmed_history:
            if turn["role"] == "user":
                prompt += f"User: {turn['content']}\n"
            elif turn["role"] == "assistant":
                prompt += f"AI: {turn['content']}\n"
        prompt += f"User: {message}\nAI:"

        # Tokenize input
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to("cuda")

        # Generate response
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=50,
                do_sample=True,
                temperature=0.7,
                top_p=0.9,
                pad_token_id=tokenizer.pad_token_id
            )

        decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Extract the final AI response
        if "AI:" in decoded:
            response = decoded.split("AI:")[-1].strip()
        else:
            response = decoded.strip()

        history.append({"role": "user", "content": message})
        history.append({"role": "assistant", "content": response})

        return history, history

    except Exception as e:
        # Return the error message to the UI for debugging
        error_message = f"⚠️ Error: {str(e)}"
        history.append({"role": "assistant", "content": error_message})
        return history, history

# Gradio UI
with gr.Blocks() as demo:
    chatbot = gr.Chatbot(label="CyberSecBot", type="messages")
    msg = gr.Textbox(label="Ask CyberSecBot", placeholder="Type your question here...")
    clear = gr.Button("Clear")
    history = gr.State([])

    msg.submit(chat_with_model, [msg, history], [chatbot, history])
    clear.click(lambda: [], None, [chatbot, history])

demo.launch()


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


