In [None]:
import os

from utils_paragraph import paraphrase_nlp, paraphrasing_predict_llm, paraphrase_initial, \
    paraphrase_seq2lan, recover_lan2seq, paraphrasing_predict_llama

os.environ['OMP_NUM_THREADS'] = '4'
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import openai

with open('config.json', 'r', encoding='utf-8') as f:
    config = json.load(f)

openai.api_key = config['OPENAI_API_KEY']
openai.api_base = config['OPENAI_API_BASE']


from data1.serialize import SerializerSettings
from sklearn import metrics
from models.darts import get_arima_predictions_data
from models.llmtime import get_llmtime_predictions_data
from data1.small_context import get_datasets, get_memorization_datasets, get_dataset
from models.validation_likelihood_tuning import get_autotuned_predictions_data
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

# load_ext autoreload
# autoreload 2


In [19]:
def plot_preds(train, test, pred_dict, model_name, ds_name, show_samples=False):
    pred = pred_dict['median']
    pred = pd.Series(pred, index=test.index)
    plt.figure(figsize=(8, 6), dpi=100)
    plt.plot(train)
    plt.plot(test, label='Truth', color='black')
    plt.plot(pred, label=model_name, color='purple')
    # shade 90% confidence interval
    samples = pred_dict['samples']
    lower = np.quantile(samples, 0.05, axis=0)
    upper = np.quantile(samples, 0.95, axis=0)
    plt.fill_between(pred.index, lower, upper, alpha=0.3, color='purple')
    if show_samples:
        samples = pred_dict['samples']
        # convert df to numpy array
        samples = samples.values if isinstance(samples, pd.DataFrame) else samples
        for i in range(min(10, samples.shape[0])):
            plt.plot(pred.index, samples[i], color='purple', alpha=0.3, linewidth=1)
    plt.legend(loc='upper left')
    if 'NLL/D' in pred_dict:
        nll = pred_dict['NLL/D']
        if nll is not None:
            plt.text(0.03, 0.85, f'NLL/D: {nll:.2f}', transform=plt.gca().transAxes,
                     bbox=dict(facecolor='white', alpha=0.5))
    plt.savefig(f'{ds_name}{model_name}givenname1.pdf', format='pdf')


def plot_preds2(train, test, pred_dict, model_name, ds_name, show_samples=False):
    pred = pred_dict['median']
    pred = pd.Series(pred, index=test.index)
    plt.figure(figsize=(8, 6), dpi=100)
    # plt.plot(train)
    plt.plot(test, label='Truth', color='black')
    plt.plot(pred, label=model_name, color='purple')
    # shade 90% confidence interval
    samples = pred_dict['samples']
    lower = np.quantile(samples, 0.05, axis=0)
    upper = np.quantile(samples, 0.95, axis=0)
    plt.fill_between(pred.index, lower, upper, alpha=0.3, color='purple')
    if show_samples:
        samples = pred_dict['samples']
        # convert df to numpy array
        samples = samples.values if isinstance(samples, pd.DataFrame) else samples
        for i in range(min(10, samples.shape[0])):
            plt.plot(pred.index, samples[i], color='purple', alpha=0.3, linewidth=1)
    plt.legend(loc='upper left')
    if 'NLL/D' in pred_dict:
        nll = pred_dict['NLL/D']
        if nll is not None:
            plt.text(0.03, 0.85, f'NLL/D: {nll:.2f}', transform=plt.gca().transAxes,
                     bbox=dict(facecolor='white', alpha=0.5))
    plt.savefig(f'{ds_name}{model_name}givenname2.pdf', format='pdf')

In [20]:
gpt4_hypers = dict(
    alpha=0.3,
    basic=True,
    temp=1.0,
    top_p=0.8,
    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-')
)

gpt3_hypers = dict(
    temp=0.7,
    alpha=0.95,
    beta=0.3,
    basic=False,
    settings=SerializerSettings(base=10, prec=3, signed=True, half_bin_correction=True)
)

promptcast_hypers = dict(
    temp=0.7,
    settings=SerializerSettings(base=10, prec=0, signed=True,
                                time_sep=', ',
                                bit_sep='',
                                plus_sign='',
                                minus_sign='-',
                                half_bin_correction=False,
                                decimal_point=''))

arima_hypers = dict(p=[12, 30], d=[1, 2], q=[0])

model_predict_fns = {
    'gpt-3.5-turbo-1106': get_llmtime_predictions_data,
    # 'gpt-4-0125-preview': get_llmtime_predictions_data,
    # 'llama2-13b-chat': get_llmtime_predictions_data,
}

model_names = list(model_predict_fns.keys())
print("Model_names:", model_names)

# Initial out dict

datasets_list = [
    'AirPassengersDataset',
    # 'AusBeerDataset',
    # 'GasRateCO2Dataset',
    'MonthlyMilkDataset',
    'SunspotsDataset',
    'WineDataset',
    # 'WoolyDataset',
    # 'HeartRateDataset',

    # 'IstanbulTraffic',
    # 'TSMCStock',
    # 'TurkeyPower',
    # 'ETTh1Dataset',
    # 'ETTm2Dataset',
]

Model_names: ['gpt-3.5-turbo-1106']


In [22]:
out = {}
datasets = get_datasets()
num_samples = 1

for model in model_names:  # GPT-4 takes a about a minute to run
    print("Model name: ", model)
    steps = 500  # predict steps
    for dataset_name in datasets_list:
        mse_amount = 0.0
        mae_amount = 0.0
        mape_amount = 0.0
        rsquare_amount = 0.0
        print("dataset_name: ", dataset_name)
        for i in range(num_samples):
            print("Round: ", i+1)
            desp = paraphrase_initial(dataset_name)
            data = datasets[dataset_name]
            train, test = data
            Train_lan = paraphrase_seq2lan(train, desp)
            Test_lan = paraphrase_seq2lan(test, desp)
            seq_test = recover_lan2seq(Test_lan)
            seq_pred = paraphrasing_predict_llm(desp, Train_lan, steps, model)

            print("test len:", test.shape)
            print("Seq_pred:", seq_pred.shape)
            if seq_pred.shape >= test.shape:
                seq_pred = seq_pred[:len(test)]
            else:
                print("Not enough sequences for prediction")
                break
            mse = mean_squared_error(test, seq_pred)
            mae = mean_absolute_error(test, seq_pred)
            mape = metrics.mean_absolute_percentage_error(test, seq_pred)*100
            r2 = r2_score(test, seq_pred)

            mse_amount += mse
            mae_amount += mae
            mape_amount += mape
            rsquare_amount += r2

        mse_mean = mse_amount/num_samples
        mae_mean = mae_amount/num_samples
        mape_mean = mape_amount/num_samples
        r2_mean = rsquare_amount/num_samples
    
        # print and plot values
        print("\n")
        print(f'MSE: {mse_mean}, MAE: {mae_mean}, MAPE: {mape_mean}, R²: {r2_mean}')
        print("\n")
    print('-------------------------New Model')

Model name:  gpt-3.5-turbo-1106
dataset_name:  AirPassengersDataset
Round:  1
Test_lan: 495.0,496.0,497.0,502.0,506.0,511.0,512.0,507.0,511.0,514.0,516.0,518.0,519.0,520.0,521.0,520.0,521.0,522.0,520.0,515.0,514.0,511.0,507.0,505.0,503.0,501.0,502.0,500.0,498.0,497.0,496.0,495.0,496.0,495.0,497.0,495.0,496.0,497.0,499.0,501.0,503.0,505.0,503.0,501.0,503.0,501.0,502.0,503.0,502.0,501.0,503.0,505.0
test len: (29,)
Seq_pred: (27,)
Not enough sequences for prediction


MSE: 0.0, MAE: 0.0, MAPE: 0.0, R²: 0.0


dataset_name:  MonthlyMilkDataset
Round:  1
Test_lan: 706.0,724.0,668.0,629.0,675.0,694.0,673.0,751.0,771.0,835.0,807.0,763.0,719.0,680.0,682.0,648.0,692.0,708.0,665.0,767.0,787.0,851.0,822.0,778.0,734.0
test len: (34,)
Seq_pred: (14,)
Not enough sequences for prediction


MSE: 0.0, MAE: 0.0, MAPE: 0.0, R²: 0.0


dataset_name:  SunspotsDataset
Round:  1
Test_lan: 57.0, 70.0, 77.0, 64.4, 50.0, 65.8, 69.3, 66.1, 57.0, 68.4, 68.4, 60.3, 58.3, 47.0, 33.9, 50.0, 21.2, 14.4, 33.0, 85.4, 106

In [25]:
data = datasets["SunspotsDataset"]
train, test = data
test

Month
1937-01-01    132.5
1937-05-01    116.7
1937-09-01    100.7
1938-01-01     98.4
1938-05-01    127.4
              ...  
1982-05-01     82.2
1982-09-01    118.8
1983-01-01     84.3
1983-05-01     99.2
1983-09-01     50.3
Freq: 4MS, Name: Sunspots, Length: 141, dtype: float64