##Make sure you are using a GPU Runtime

Install dependencies

In [None]:
!pip install torch transformers openai pandas sentencepiece accelerate

Collecting openai
  Downloading openai-1.3.7-py3-none-any.whl (221 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m221.4/221.4 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.25.0-py3-none-any.whl (265 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m26.6 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.25.2-py3-none-any.whl (74 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.0/75.0 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.2-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━

Imports

In [None]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification, LlamaTokenizer, LlamaForCausalLM
import json
import openai
import pandas as pd

Connect to drive

Note: Before running this, add the VIP folder as a shortcut to your drive

*Borrowed this from 4644 lol*

In [None]:
from google.colab import drive
drive.mount('/content/drive')

%load_ext autoreload
%autoreload 2

import os
#change to desired path
os.chdir("./drive/MyDrive/VIP/router")
%pwd

MessageError: ignored

Hyperparameters (from BERT Fine Tune)

In [None]:
MAX_LEN = 256

##Setup Models

FinMA-7B and FinMA-7B-Full

In [None]:
class FinMA7B():
    def __init__(self):
        self.tokenizer = LlamaTokenizer.from_pretrained('ChanceFocus/finma-7b-nlp')
        self.model = LlamaForCausalLM.from_pretrained('ChanceFocus/finma-7b-nlp', device_map='auto')

    def query(self, prompt):
        inputs = self.tokenizer(prompt, return_tensors="pt")
        output = self.model.generate(**inputs)
        response = self.tokenizer.decode(output[0], skip_special_tokens=True)
        return response

class FinMA7BFull():
    def __init__(self):
        self.tokenizer = LlamaTokenizer.from_pretrained('ChanceFocus/finma-7b-full')
        self.model = LlamaForCausalLM.from_pretrained('ChanceFocus/finma-7b-full', device_map='auto')

    def query(self, prompt):
        inputs = self.tokenizer(prompt, return_tensors="pt")
        output = self.model.generate(**inputs)
        response = self.tokenizer.decode(output[0], skip_special_tokens=True)
        return response


GPT-4

In [None]:
class GPT4():
    def __init__(self, api_key, max_len):
        openai.api_key = api_key
        self.max_len = max_len

    def query(self, prompt):
        response = openai.ChatCompletion.create(
            model='gpt-4',
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt},
            ],
            max_tokens=self.max_len
        )
        response = response['choices'][0]['message']['content']
        return response

Create model dictionary

In [None]:
models = {
    'FinMA-7B': FinMA7B(),
    'FinMA-7B-Full': FinMA7BFull(),
    'GPT-4': GPT4('', MAX_LEN)
}

## Setup Router

Define router

In [None]:
class Router():
    def __init__(self, model_path, tokenizer_path, label_dict_path, max_len):
        self.max_len = max_len
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = BertForSequenceClassification.from_pretrained(model_path)
        self.model = model.to(self.device)
        self.tokenizer = BertTokenizer.from_pretrained(tokenizer_path)
        with open(label_dict_path, 'r') as file:
            self.label_dict = json.load(file)

    def predict(self, prompt):
        self.model.eval()
        encoding = self.tokenizer.encode_plus(
            prompt,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            return_attention_mask=True,
            truncation=True,
            return_tensors='pt'
        )
        input_ids = encoding['input_ids'].to(device)
        attention_mask = encoding['attention_mask'].to(device)
        with torch.no_grad():
            outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        prediction_index = torch.argmax(logits, dim=-1).item()

        for label, index in label_dict.items():
            if index == prediction_index:
                return label


Create router

In [None]:
router = Router('./model', './model', 'label_dict.json')

Load routes

In [None]:
routes = pd.read_csv('routes.csv').set_index('Model')

Create prompt routing

In [None]:
def query(prompt):
    routed_model = routes[router.predict(prompt).replace('ChanceFocus/flare-', '')].idxmax()
    return routed_model, models[routed_model].query(prompt)

Test usage

In [None]:
model, response  = query("Analyze the sentiment of this statement extracted from a financial news article. Provide your answer as either negative, positive, or neutral. Text: `` We have analyzed Kaupthing Bank Sweden and found a business which fits well into Alandsbanken , '' said Alandsbanken 's chief executive Peter Wiklof in a statement . Answer:")
print(f"{model} - {response}")
model, response  = query("Look for indications that the price of gold is increasing. In the news headline, can you identify a FutureNews pertaining to gold? Your response should be Yes or No. Text: dec. gold up $2.50 at $1,053.10/oz on globex Answer:")
print(f"{model} - {response}")
model, response  = query("In the list of tokens, identify 'Person', 'Location', and 'Organisation' and label each accordingly. If the entity spans multiple tokens, use the prefix B-PER, B-LOC, or B-ORG for the first token, and I-PER, I-LOC, or I-ORG for the subsequent tokens of that entity. The beginning of each separate entity should always be labeled with a B-PER, B-LOC, or B-ORG prefix. If the token does not fit into any of the three named categories, or is not a named entity, label it as 'O'. Each line should contain one token and its corresponding label, separated by a colon. Do not combine tokens on your own. The format for each line should be: 'token:label'. Text: You idiot ! Answer:")
print(f"{model} - {response}")