# vLLM online

In [None]:
# first spin up the vLLM server. takes a while

# export CUDA_VISIBLE_DEVICES='0,1'
# python -m vllm.entrypoints.openai.api_server --model meta-llama/Meta-Llama-3-70B-Instruct --tensor-parallel-size=2 --disable-log-requests

In [None]:
from openai import OpenAI
import threading
import queue

In [None]:
# Set OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"

client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)

def llm_chat(messages, model="meta-llama/Meta-Llama-3-70B-Instruct"):
    chat_response = client.chat.completions.create(
        model=model,
        messages=messages
    )
    return chat_response.choices[0].message.content

def load_text_from(file_path):
    with open(file_path, 'r') as file:
        data = file.read().splitlines()
    return data

In [None]:
ticker = "aapl"

raw_data = load_text_from("/data/kai/forecasting/summary/aapl_2022-08-19_raw.txt")
raw_data = [d for d in raw_data if d != "<SEP>" and d != ""]

In [None]:
SUMMARY_PROMPT = f"""
You are a helpful assistant that filters and summarizes stock news specifically for the company with ticker symbol {ticker}. 

Your task is to:
1. Filter out irrelevant information.
2. Provide a concise summary that includes key numbers, growth trends, and the overall market outlook.
3. Mention major stock movements, significant economic indicators, and any notable company-specific news.
4. Avoid making up any information.

If there is no relevant information, the website is blocked, or there is an error message, return nothing.
"""

IGNORE_PROMPT = f"""
Return "<NONE>" if there is an error in the summarization or if the information is irrelevant. Otherwise, return "<TRUE>".
"""

COMBINE_PROMPT = f"""
Combine the summaries into one by following the guidelines:

1. Provide a concise summary that includes key numbers, growth trends, and the overall market outlook.
2. Mention major stock movements, significant economic indicators, and any notable company-specific news.
"""

FINAL_PROMPT = f"""
Given the following stock summaries related to the company with ticker symbol {ticker},

1. Combine them into one comprehensive summary
2. Provide key numbers, growth trends, and the overall market outlook.
3. Mention major stock movements, significant economic indicators, and any notable company-specific news.
4. Avoid making up any information.
"""

def message_template(prompt, content):
    messages=[{
        "role": "system",
        "content": prompt,
    }, {
        "role": "user",
        "content": content
    }]
    return messages

def call_llm_chat(prompt, messages, thread_id, result_queue):
    try:
        response = llm_chat(message_template(prompt, messages))
        result_queue.put((thread_id, response))
    except Exception as e:
        result_queue.put((thread_id, str(e)))

In [None]:
def batch_call_llm_chat(prompt, data):
    result_queue = queue.Queue()
    threads = []
    for thread_id, content in enumerate(data):
        thread = threading.Thread(target=call_llm_chat, args=(prompt, content, thread_id, result_queue))
        threads.append(thread)
        thread.start()

    # Wait for all threads to complete
    for thread in threads:
        thread.join()

    responses = [None] * len(data)
    while not result_queue.empty():
        thread_id, response = result_queue.get()
        responses[thread_id] = response

    return responses

In [None]:
# summarize and ignore error messages
summaries = batch_call_llm_chat(SUMMARY_PROMPT, raw_data)
result = batch_call_llm_chat(IGNORE_PROMPT, summaries)

valid_idxs = [i for i, r in enumerate(result) if r != "<NONE>"]
valid_summaries = [summaries[i] for i in valid_idxs]

In [None]:
valid_summaries

In [None]:
from transformers import AutoTokenizer

model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)

token_length = len(tokenizer.encode('\n'.join(valid_summaries)))
max_token_length = 4096

if token_length > max_token_length:
    print("splitting summaries...")
    # Determine how many summaries to combine per chunk
    avg_token_per_summary = token_length / len(valid_summaries)
    summaries_per_chunk = int(max_token_length / avg_token_per_summary)

    # Split the summaries into chunks
    valid_summaries_combined = [
        valid_summaries[i: i + summaries_per_chunk]
        for i in range(0, len(valid_summaries), summaries_per_chunk)
    ]

    combined_summary = batch_call_llm_chat(COMBINE_PROMPT, valid_summaries_combined)
    valid_summaries_combined.append(combined_summary)
else:
    valid_summaries_combined = valid_summaries

final_summary = llm_chat(message_template(FINAL_PROMPT, '\n'.join(valid_summaries_combined)))

In [None]:
print(final_summary)

# With vLLM offline

In [None]:
from vllm import LLM, SamplingParams

In [None]:
prompts = [
    "Hello, my name is",
    "The president of the United States is",
    "The capital of France is",
    "The future of AI is",
]
sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=8000)

In [None]:
llm = LLM(model="meta-llama/Meta-Llama-3-8B-Instruct", gpu_memory_utilization=0.9)

In [None]:
from transformers import AutoTokenizer
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token


In [None]:
# format to role, content format
messages_dicts = [[{"role": "user", 'content': p}] for p in prompts]
formatted_message = tokenizer.apply_chat_template(messages_dicts, tokenize=False, add_generation_prompt=True)

In [None]:
outputs = llm.generate(formatted_message, sampling_params)

In [None]:
[o.outputs[0].text for o in outputs]

In [None]:
import pandas as pd
import os
from tqdm import tqdm
from IPython.display import clear_output
from src.data_utils import download_raw_texts_from_urls, save_text_to, load_text_from    

In [None]:
directory_path = '/data/kai/forecasting/raw_urls'
file_names = os.listdir(directory_path)

ticker = "aapl"
df = pd.read_csv(directory_path + f'/{ticker}_text.csv')[::-1]

data_dir = "/data/kai/forecasting/summary"

for idx, date_str in enumerate(df["timestamp"].unique()):
    if os.path.exists(f"{data_dir}/{ticker}_{date_str}_final_summary.txt"):
        continue
    break

In [None]:
raw_path = f"{data_dir}/{ticker}_{date_str}_raw.txt"

raw_texts = load_text_from(raw_path)
raw_texts = [r for r in raw_texts if r != "<SEP>" and r!= ""]

In [None]:
summary_prompt = f"You are a helpful assistant that filters and summarizes stock news specifically for company with ticker symbol {ticker}."


summary_ending_prompt = f"Filter out irrelevant information and provide a concise summary including key numbers, growth trends, and the overall market outlook. Ensure to mention major stock movements, significant economic indicators, and any notable company-specific news. Do not make up false information. "
filter_prompt = "Keep the query the same, but please avoid any extraneous phrases or commentary such as 'Here is the filtered text' or 'I hope this helps.'"

combine_prompt = summary_prompt + " Combine the following summaries while preserving as much information as you can: "

In [None]:
raw_texts[5:10]

In [None]:
# prompts = [summary_prompt + text for text in raw_texts[5:8]]
prompts = ["what is your name in 10 words or less?"] * 10
# prompts = ["what is your name?"]
sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=100)

outputs = llm.generate(prompts, sampling_params)

# Print the outputs.
for output in outputs:
    prompt = output.prompt
    generated_text = output.outputs[0].text
    print(f"Generated text: {generated_text!r}")