<a href="https://colab.research.google.com/github/jackma-00/peft-of-a-llm/blob/main/gradio_ui.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
%%capture
!pip install gradio huggingface-hub==0.25.2
!pip install unsloth


In [8]:

!pip install -U polygon-api-client

Collecting polygon-api-client
  Downloading polygon_api_client-1.14.2-py3-none-any.whl.metadata (889 bytes)
Downloading polygon_api_client-1.14.2-py3-none-any.whl (40 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/40.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.6/40.6 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: polygon-api-client
Successfully installed polygon-api-client-1.14.2


In [9]:
from polygon import RESTClient
import json

# Your Polygon API Key
client = RESTClient(api_key="l3Ae5NltOnv2MqCVkfs62CSIKzWs_Z6H")

# List of tickers to track
tickers = ["SPY", "DIA", "QQQ", "IWM", "VXX"]

# Collect data in a structured format
data = []

for ticker in tickers:
    quote = client.get_previous_close_agg(ticker=ticker)

    # Access attributes directly from the first result
    formatted_quote = {
        "ticker": quote[0].ticker,
        "open": quote[0].open,
        "high": quote[0].high,
        "low": quote[0].low,
        "close": quote[0].close,
        "volume": quote[0].volume,
        "timestamp": quote[0].timestamp,
        "vwap": quote[0].vwap,
    }
    data.append(formatted_quote)

# Convert the data to a JSON string for LLM analysis
formatted_data = json.dumps(data, indent=4)
#print(formatted_data)

In [None]:
import gradio as gr
import torch
from threading import Thread
from unsloth import FastLanguageModel
from transformers import AutoTokenizer, TextStreamer, TextIteratorStreamer


# Fine-tuned Huggingface hosted model
model_name = "jackma-00/lora_model_1b"

# Model's parameters
max_seq_length = 2048
dtype = None
load_in_4bit = True
system_message = "You are a financial consultant. Answer your client's questions using yesterday's closing aggregates for the following key tickers: {}".format(formatted_data)
max_tokens = 1024
temperature = 1.5
top_p = 0.95

# Load model and tokenizer from pretrained
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

# Enable native 2x faster inference
FastLanguageModel.for_inference(model)

# Create a text streamer
text_streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)


# Define inference function
def respond(message, history):

    # Add system message
    messages = [{"role": "system", "content": system_message}]

    # Include chat history
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    # Lastly append user's message
    messages.append({"role": "user", "content": message})

    # Tokenize the input
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize = True,
        add_generation_prompt = True,
        return_tensors = "pt",
    ).to("cuda")

    # Generate arguments
    generate_kwargs = dict(
        input_ids=inputs,
        streamer=text_streamer,
        max_new_tokens=max_tokens,
        use_cache=True,
        temperature=temperature,
        top_p=top_p,
    )
    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()

    # Predict
    partial_message = ""
    for new_token in text_streamer:
        if new_token != '<':
            partial_message += new_token
            yield partial_message


# Define Gradio UI
gr = gr.ChatInterface(
    respond,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Ask me financial guidance", container=False, scale=7),
    title="Your financial consultant",
    description="Ask anything regarding finance, I will respond you based on the latest closing of major key tickets",
    theme="soft",
    examples=["Should I invest in gold today?", "How can I allocate additional 10k to my portfolio?", "What could be a good diversification strategy?"],
)

if __name__ == "__main__":
    gr.launch(debug=True)


==((====))==  Unsloth 2024.11.11: Fast Llama patching. Transformers:4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!




Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://6729a59d1789dfdd9d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
