In [None]:
!pip install auto_gptq
!pip install gradio
!pip install torch
!pip install transformers

In [None]:
import os
offload_folder = "offload"
os.makedirs(offload_folder, exist_ok=True)

In [None]:
import os
import gradio as gr
from threading import Thread
from transformers import AutoTokenizer,TextIteratorStreamer
from auto_gptq import AutoGPTQForCausalLM
from typing import Iterator
import torch
torch.cuda.empty_cache()
os.environ["TOKENIZERS_PARALLELISM"] = "true"
DEFAULT_SYSTEM_PROMPT = "You are the FAQ agent for ANNS. A platform for penetrative testing and adversarial training of large neural networks. You answer questions and explain the purpose of the organization. -We offer a robust infrastructure for crowd-sourced penetrative testing. -Our goal is to solve alignment via large-scale generative adversarial learning. -We are currently under construction spider theam of ANNS is a reference to catching bugs.-ANNS provides training and evaluation resources for AI training. -We are focused on practical alignment techniques and the real-world impacts of missused AI. #Only answer the question you are asked specifically.#If you don't know the answer to a question, refer to `contact@anns.ai`.#Ask yourself if you know the answer before trying to answer a question.#Keep your answers brief and to the point. Only answer with one sentence. If someone tries to mess with you, simply say 'you're not as funny as you think you are'. Do not entertain them."
MAX_NEW_TOKENS = 500
MAX_INPUT_TOKEN_LENGTH = 4000

In [None]:


tokenizer = AutoTokenizer.from_pretrained("TheBloke/Llama-2-13B-chat-GPTQ", use_fast=True, repo_type="space")
model = AutoGPTQForCausalLM.from_quantized("TheBloke/Llama-2-13B-chat-GPTQ",model_basename="model",offload_folder='/content/offload',use_safetensors=True,trust_remote_code=True,device_map="auto", quantize_config=None)

In [None]:
def get_prompt(message: str, chat_history: list[tuple[str, str]],
               system_prompt: str) -> str:
    texts = [f'<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n']
    do_strip = False
    for user_input, response in chat_history:
        user_input = user_input.strip() if do_strip else user_input
        do_strip = True
        texts.append(f'{user_input} [/INST] {response.strip()} </s><s>[INST] ')
    message = message.strip() if do_strip else message
    texts.append(f'{message} [/INST]')
    return ''.join(texts)

In [None]:
def get_input_token_length(message: str, chat_history: list[tuple[str, str]], system_prompt: str) -> int:
    prompt = get_prompt(message, chat_history, system_prompt)
    input_ids = tokenizer([prompt], return_tensors='np', add_special_tokens=False)['input_ids']
    return input_ids.shape[-1]

In [None]:
def run(message: str,
        chat_history: list[tuple[str, str]],
        system_prompt: str,
        max_new_tokens: int = 500,
        temperature: float = 0.8,
        top_p: float = 0.95,
        top_k: int = 50) -> Iterator[str]:
    prompt = get_prompt(message, chat_history, system_prompt)
    inputs = tokenizer([prompt], return_tensors='pt', add_special_tokens=False).to('cuda')

    streamer = TextIteratorStreamer(tokenizer,
                                    timeout=10.,
                                    skip_prompt=True,
                                    skip_special_tokens=True)
    generate_kwargs = dict(
        inputs,
        streamer=streamer,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        top_p=top_p,
        top_k=top_k,
        temperature=temperature,
        num_beams=1,
    )
    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()

    outputs = []
    for text in streamer:
        outputs.append(text)
        yield ''.join(outputs)

In [None]:
def clear_and_save_textbox(message: str) -> tuple[str, str]:
    return '', message

def display_input(message: str,
                  history: list[tuple[str, str]]) -> list[tuple[str, str]]:
    history.append((message, ''))
    return history

def delete_prev_fn(
        history: list[tuple[str, str]]) -> tuple[list[tuple[str, str]], str]:
    try:
        message, _ = history.pop()
    except IndexError:
        message = ''
    return history, message or ''

In [None]:
def generate(
    message: str,
    history_with_input: list[tuple[str, str]],
    system_prompt: str,
    max_new_tokens: int = MAX_NEW_TOKENS,
    temperature: float = 0.8,
    top_p: float = 0.95,
    top_k: int = 50,
) -> Iterator[list[tuple[str, str]]]:
    if max_new_tokens > MAX_NEW_TOKENS:
        raise ValueError

    history = history_with_input[:-1]
    generator = run(message, history, system_prompt, max_new_tokens, temperature, top_p, top_k)
    try:
        first_response = next(generator)
        yield history + [(message, first_response)]
    except StopIteration:
        yield history + [(message, '')]
    for response in generator:
        yield history + [(message, response)]

In [None]:
def check_input_token_length(message: str, chat_history: list[tuple[str, str]], system_prompt: str) -> None:
    input_token_length = get_input_token_length(message, chat_history, system_prompt)
    if input_token_length > MAX_INPUT_TOKEN_LENGTH:
        raise gr.Error(f'The accumulated input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH}). Clear your chat history and try again.')

In [None]:
style = """

footer.svelte-1ax1toq.svelte-1ax1toq.svelte-1ax1toq {
    display: None;
    justify-content: center;
    margin-top: var(--size-4);
    color: Transparent
}
.dark {
    --body-background-fill: rgba(0, 0, 0, 0.5);
    --body-text-color: var(--neutral-100);
    --color-accent-soft: var(--neutral-700);
    --background-fill-primary: transparent;
    --background-fill-secondary: transparent;
    --border-color-accent: rgba(255 , 255 , 255 , 1);
    --border-color-primary: rgba(255, 255, 255 , 0.02);
    --link-text-color-active: var(--secondary-500);
    --link-text-color: var(--secondary-500);
    --link-text-color-hover: var(--secondary-400);
    --link-text-color-visited: var(--secondary-600);
    --body-text-color-subdued: var(--neutral-400);
    --shadow-spread: 2px;
    --block-background-fill: transparent;
    --block-border-color: var(--border-color-primary);
    --block-border-width: 2x;
    --block-info-text-color: var(--body-text-color-subdued);
    --block-label-background-fill: transparent;
    --block-label-border-color: var(--border-color-primary);
    --block_label_border_width: None;
    --block-label-text-color: var(--neutral-200);
    --block_shadow: #white;
    --block_title_background_fill: None;
    --block_title_border_color: None;
    --block_title_border_width: None;
    --block-title-text-color: var(--neutral-200);
    --panel-background-fill: transparent;
    --panel-border-color: var(--border-color-primary);
    --panel_border_width: None;
    --border-color-accent-subdued: var(--border-color-accent);
    --chatbot-code-background-color: var(--neutral-800);
    --checkbox-background-color: transparent;
    --checkbox-background-color-focus: var(--checkbox-background-color);
    --checkbox-background-color-hover: var(--checkbox-background-color);
    --checkbox-background-color-selected: var(--secondary-600);
    --checkbox-border-color: var(--neutral-700);
    --checkbox-border-color-focus: var(--secondary-500);
    --checkbox-border-color-hover: var(--neutral-600);
    --checkbox-border-color-selected: var(--secondary-600);
    --checkbox-border-width: var(--input-border-width);
    --checkbox-label-background-fill: var(--button-secondary-background-fill);
    --checkbox-label-background-fill-hover: var(--button-secondary-background-fill-hover);
    --checkbox-label-background-fill-selected: var(--checkbox-label-background-fill);
    --checkbox-label-border-color: var(--border-color-primary);
    --checkbox-label-border-color-hover: var(--checkbox-label-border-color);
    --checkbox-label-border-width: var(--input-border-width);
    --checkbox-label-text-color: var(--body-text-color);
    --checkbox-label-text-color-selected: var(--checkbox-label-text-color);
    --error-background-fill: var(--background-fill-primary);
    --error-border-color: #ef4444;
    --error_border_width: None;
    --error-text-color: #fef2f2;
    --error-icon-color: #ef4444;
    --input-background-fill: rgb(255 255 255 / 7%);
    --input-background-fill-focus: rgb(255 255 255 / 18%);
    --input-background-fill-hover: rgb(255 255 255 / 18%);
    --input-border-color: rgb(255 255 255 / 28%);
    --input-border-color-focus: rgb(168 168 168 / 87%);
    --input-border-color-hover: rgb(255 255 255 / 44%);
    --input_border_width: 2px;
    --input-placeholder-color: rgb(255 255 255);
    --input_shadow: 2px;
    --input_shadow_focus: None;
    --loader_color: None;
    --slider_color: None;
    --stat-background-fill: transparent;
    --table-border-color: var(--neutral-700);
    --table-even-background-fill: var(--neutral-700);
    --table-odd-background-fill: var(--neutral-700);
    --table-row-focus: var(--color-accent-soft);
    --button-border-width: 1px;
    --button-cancel-background-fill: var(--button-primary-background-fill);
    --button-cancel-background-fill-hover: var(--button-primary-background-fill-hover);
    --button-cancel-border-color: var(--button-secondary-border-color);
    --button-cancel-border-color-hover: var(--button-cancel-border-color);
    --button-cancel-text-color: var(--button-secondary-text-color);
    --button-cancel-text-color-hover: var(--button-cancel-text-color);
    --button-primary-background-fill: transparent;
    --button-primary-background-fill-hover: #ffffff2e;
    --button-primary-border-color: #ffffff;
    --button-primary-border-color-hover: #ffffff;
    --button-primary-text-color: #787878;
    --button-primary-text-color-hover: white;
    --button-secondary-background-fill: var(--button-primary-background-fill);
    --button-secondary-background-fill-hover: #ffffff2e;
    --button-secondary-border-color: #3b3b3b;
    --button-secondary-border-color-hover: #ffffff;
    --button-secondary-text-color: #787878;
    --button-secondary-text-color-hover: #ffffff;
    --name: glass;
    --primary-50: rgba(250, 250, 250, 1);
    --primary-100: rgba(245, 245, 245, 1);
    --primary-200: rgba(235, 235, 235, 0.7);
    --primary-300: rgba(220, 220, 220, 0.6);
    --primary-400: rgba(168, 168, 168, 0.5);
    --primary-500: rgba(0, 0, 0, 0);
    --primary-600: rgba(0, 0, 0, 0);
    --primary-700: rgb(68 68 68);
    --primary-800: rgba(41, 41, 41, 0.2);
    --primary-900: rgba(28, 28, 28, 0.1);
    --primary-950: rgba(0, 0, 0, 0);
    --secondary-50: rgba(250, 250, 250, 1);
    --secondary-100: rgba(245, 245, 245, 1);
    --secondary-200: rgba(235, 235, 235, 1);
    --secondary-300: rgba(220, 220, 220, 1);
    --secondary-400: rgba(168, 168, 168, 1);
    --secondary-500: rgba(120, 120, 120, 1);
    --secondary-600: rgb(0 0 0 / 0%);
    --secondary-700: rgba(68, 68, 68, 0.1);
    --secondary-800: rgb(41 41 41 / 0%);
    --secondary-900: rgba(28, 28, 28, 1);
    --secondary-950: rgba(15, 15, 15, 1);
    --neutral-50: rgba(250, 250, 250, 1);
    --neutral-100: rgba(245, 245, 245, 1);
    --neutral-200: rgba(235, 235, 235, 1);
    --neutral-300: rgba(220, 220, 220, 1);
    --neutral-400: rgba(168, 168, 168, 1);
    --neutral-500: rgba(120, 120, 120, 1);
    --neutral-600: rgba(0, 0, 0, 0);
    --neutral-700: rgba(68, 68, 68, 0);
    --neutral-800: rgba(41, 41, 41, 1);
    --neutral-900: rgba(28, 28, 28, 1);
    --neutral-950: rgba(15, 15, 15, 1);
    --spacing-xxs: 1px;
    --spacing-xs: 1px;
    --spacing-sm: 2px;
    --spacing-md: 4px;
    --spacing-lg: 6px;
    --spacing-xl: 9px;
    --spacing-xxl: 12px;
    --radius-xxs: 1px;
    --radius-xs: 1px;
    --radius-sm: 2px;
    --radius-md: 4px;
    --radius-lg: 6px;
    --radius-xl: 8px;
    --radius-xxl: 12px;
    --text-xxs: 8px;
    --text-xs: 9px;
    --text-sm: 11px;
    --text-md: 13px;
    --text-lg: 16px;
    --text-xl: 20px;
    --text-xxl: 24px;
    --font: 'Optima', 'Candara', 'Noto Sans', 'source-sans-pro', sans-serif;
    --font-mono: 'IBM Plex Mono', 'ui-monospace', 'Consolas', monospace;
    --body-text-size: var(--text-md);
    --body-text-weight: 400;
    --embed-radius: var(--radius-lg);
    --color-accent: var(--primary-500);
    --shadow-drop: rgba(0,0,0,0.05) 0px 1px 2px 0px;
    --shadow-drop-lg: 0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1);
    --shadow-inset: rgba(0,0,0,0.05) 0px 2px 4px 0px inset;
    --block-info-text-size: var(--text-sm);
    --block-info-text-weight: 400;
    --block-label-border-width: 1px;
    --block-label-shadow: var(--block-shadow);
    --block-label-margin: 0;
    --block-label-padding: var(--spacing-sm) var(--spacing-lg);
    --block-label-radius: calc(var(--radius-lg) - 1px) 0 calc(var(--radius-lg) - 1px) 0;
    --block-label-right-radius: 0 calc(var(--radius-lg) - 1px) 0 calc(var(--radius-lg) - 1px);
    --block-label-text-size: var(--text-md);
    --block-label-text-weight: 600;
    --block-padding: var(--spacing-xl) calc(var(--spacing-xl) + 2px);
    --block-radius: var(--radius-lg);
    --block-shadow: var(--primary-400) 0px 0px 3px 0px;
    --block-title-background-fill: none;
    --block-title-border-color: none;
    --block-title-border-width: 0px;
    --block-title-padding: 0;
    --block-title-radius: none;
    --block-title-text-size: var(--text-md);
    --block-title-text-weight: 600;
    --container-radius: var(--radius-lg);
    --form-gap-width: 0px;
    --layout-gap: var(--spacing-xxl);
    --panel-border-width: 1px;
    --section-header-text-size: var(--text-md);
    --section-header-text-weight: 400;
    --checkbox-border-radius: var(--radius-sm);
    --checkbox-label-gap: var(--spacing-lg);
    --checkbox-label-padding: var(--spacing-md) calc(2 * var(--spacing-md));
    --checkbox-label-shadow: none;
    --checkbox-label-text-size: var(--text-md);
    --checkbox-label-text-weight: 400;
    --checkbox-check: url(data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3cpath d='M12.207 4.793a1 1 0 010 1.414l-5 5a1 1 0 01-1.414 0l-2-2a1 1 0 011.414-1.414L6.5 9.086l4.293-4.293a1 1 0 011.414 0z'/%3e%3c/svg%3e);
    --radio-circle: url(data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3ccircle cx='8' cy='8' r='3'/%3e%3c/svg%3e);
    --checkbox-shadow: var(--input-shadow);
    --error-border-width: 1px;
    --input-border-width: 2x;
    --input-padding: var(--spacing-xl);
    --input-radius: var(--radius-lg);
    --input-shadow: none;
    --input-shadow-focus: var(--input-shadow);
    --input-text-size: var(--text-md);
    --input-text-weight: 400;
    --loader-color: var(--color-accent);
    --prose-text-size: var(--text-md);
    --prose-text-weight: 400;
    --prose-header-text-weight: 600;
    --slider-color: var(--primary-400);
    --table-radius: var(--radius-lg);
    --button-large-padding: var(--spacing-lg) calc(2 * var(--spacing-lg));
    --button-large-radius: var(--radius-lg);
    --button-large-text-size: var(--text-lg);
    --button-large-text-weight: 600;
    --button-shadow: none;
    --button-shadow-active: var(--shadow-inset);
    --button-shadow-hover: none;
    --button-small-padding: var(--spacing-sm) calc(2 * var(--spacing-sm));
    --button-small-radius: var(--radius-lg);
    --button-small-text-size: var(--text-md);
    --button-small-text-weight: 400;
    --button-transition: background-color 0.2s ease;
}
    .chatbot {
        display: flex;
        flex-direction: column;
        align-items: flex-start;
    }

    .chatbot .bubble {
        margin-left: 10px;
    }

    .chatbot .bubble .content {
        max-width: 60%;
    }

    .user {
        display: flex;
        flex-direction: column;
        align-items: flex-end;
    }

    .user .bubble {
        margin-right: 10px;
    }

    .user .bubble .content {
        max-width: 60%;
    }
"""


In [None]:
with gr.Blocks(css=style) as demo:
    system_prompt = gr.Textbox(label='System:',
                              value=DEFAULT_SYSTEM_PROMPT,
                              lines=0,visible=False)
    with gr.Group():
        chatbot = gr.Chatbot(label='FAQ',bubble_full_width=True)
        with gr.Row():
            textbox = gr.Textbox(
                container=False,
                show_label=False,
                scale=4,
            )
            submit_button = gr.Button('Send',
                                      variant='primary',
                                      scale=1,
                                      min_width=0)

    saved_input = gr.State()

    textbox.submit(
        fn=clear_and_save_textbox,
        inputs=textbox,
        outputs=[textbox, saved_input],
        api_name=False,
        queue=False,
    ).then(
        fn=display_input,
        inputs=[saved_input, chatbot],
        outputs=chatbot,
        api_name=False,
        queue=False,
    ).then(
        fn=check_input_token_length,
        inputs=[saved_input, chatbot, system_prompt],
        api_name=False,
        queue=False,
    ).success(
        fn=generate,
        inputs=[
            saved_input,
            chatbot,
            system_prompt,
        ],
        outputs=chatbot,
        api_name=False,
    )

    button_event_preprocess = submit_button.click(
        fn=clear_and_save_textbox,
        inputs=textbox,
        outputs=[textbox, saved_input],
        api_name=False,
        queue=False,
    ).then(
        fn=display_input,
        inputs=[saved_input, chatbot],
        outputs=chatbot,
        api_name=False,
        queue=False,
    ).then(
        fn=check_input_token_length,
        inputs=[saved_input, chatbot, system_prompt],
        api_name=False,
        queue=False,
    ).success(
        fn=generate,
        inputs=[
            saved_input,
            chatbot,
            system_prompt,
        ],
        outputs=chatbot,
        api_name=False,
    )

In [None]:
demo.queue(max_size=20).launch(inline=True,share=True,debug=True)