In [1]:
import torch
from transformers import BertTokenizer, BertModel
import numpy as np
import pandas as pd
from darts.models import NLinearModel
from darts import TimeSeries
import glob
import os
from tqdm import tqdm
import sys
import os
import pandas as pd
import numpy as np
import wandb
import time
from loguru import logger
from utils import open_record_directory, open_result_directory, rmse, nmae
from finance_multimodal import getLLMTIMERMSE
from transformers import set_seed
from tqdm import tqdm
import glob
import re
import json

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def bert_model_inference(summaries):

    # Set float32 matmul precision to utilize Tensor Cores
    torch.set_float32_matmul_precision('high')  # You can also use 'medium' for less precision but potentially higher performance

    # Load pre-trained model and tokenizer
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    model = BertModel.from_pretrained('bert-base-uncased')

    # Use DataParallel to wrap the model if multiple GPUs are available
    if torch.cuda.device_count() > 1:
        print("Using {} GPUs".format(torch.cuda.device_count()))
        model = torch.nn.DataParallel(model)

    # Move model to the available GPU(s)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using device:", device)
    model = model.to(device)
    print(type(summaries[0]))

    # Tokenize summaries
    inputs = tokenizer(summaries, padding=True, truncation=True, return_tensors="pt")
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

    # Define batch size
    batch_size = 2

    # Function to get batches
    def get_batches(input_ids, attention_mask, batch_size):
        for i in range(0, len(input_ids), batch_size):
            yield input_ids[i:i + batch_size], attention_mask[i:i + batch_size]

    # Create batches
    batches = list(get_batches(input_ids, attention_mask, batch_size))

    # Perform inference on each batch and collect pooled outputs
    pooled_outputs = []
    model.eval()
    with torch.no_grad():
        for batch in batches:
            input_ids_batch, attention_mask_batch = batch
            input_ids_batch = input_ids_batch.to(device)
            attention_mask_batch = attention_mask_batch.to(device)
            outputs = model(input_ids_batch, attention_mask=attention_mask_batch)
            pooled_output = outputs.pooler_output.cpu().numpy()
            pooled_outputs.append(pooled_output)

    pooled_outputs = np.vstack(pooled_outputs)  # Shape: (num_samples, 768)

    return pooled_outputs


def nlinear_darts(train_input, train_output, test_input, historcial_window_size,train_embedding=None, test_embedding=None):
    # Convert to TimeSeries object required by Darts
    train_series = TimeSeries.from_values(train_input)
    train_output_series = TimeSeries.from_values(train_output)
    if train_embedding is not None:
        train_past_covariates = TimeSeries.from_values(train_embedding)
        test_past_covariates = TimeSeries.from_values(test_embedding)
    else:
        train_past_covariates = None
        test_past_covariates = None
    
    # Define and train the NLinearModel model
    model_NLinearModel = NLinearModel(input_chunk_length=historcial_window_size, output_chunk_length=historcial_window_size, n_epochs=100, pl_trainer_kwargs={"accelerator": "gpu", "devices": 1}, )
    model_NLinearModel.fit(train_series, past_covariates=train_past_covariates, future_covariates=train_output_series)

    pred_value = []
    test = np.array([])
    # Make predictions
    for i in range(len(test_input)):
        test = np.append(test, test_input[i])
        test_series = TimeSeries.from_values(test)
        print("input: ", test_series)
        predictions = model_NLinearModel.predict(n=historcial_window_size, series=test_series, past_covariates=test_past_covariates).all_values().flatten().tolist()
        str_res = ' '.join([str(round(num,2)) for num in predictions])
        print("Prediction: " + str_res)
        pred_value.append(str_res)
    print(pred_value)
    
    return pred_value