## Experiments (baseline)
This notebook is used for conduct the baseline prediction with the pre-trained LLMs. Adapted from [Large Language Models Are Zero Shot Time Series Forecasters
](https://github.com/ngruver/llmtime)

We predict both univariate and multivariate time series (with each prediction focusing on a single sequence). Additionally, we offer the option to apply Moving Average (MA) for smoothing the input time series.

In [None]:
from darts import TimeSeries, concatenate
from darts.utils.callbacks import TFMProgressBar
from darts.models import NBEATSModel
from darts.dataprocessing.transformers import Scaler, MissingValuesFiller
from darts.metrics import mape, r2_score
from darts.datasets import EnergyDataset
from darts import concatenate


: 

In [None]:
import time

from statsmodels.datasets import co2
import matplotlib.pyplot as plt
import pandas as pd
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
from statsmodels.tsa.seasonal import STL
import numpy as np
import copy
from sklearn.preprocessing import StandardScaler

import matplotlib.font_manager as fm
import seaborn as sns

In [2]:
import os
os.environ['OMP_NUM_THREADS'] = '4'

from data1.serialize import SerializerSettings
from data1.small_context import get_datasets,get_memorization_datasets

In [None]:
from models.utils import grid_iter
from models.validation_likelihood_tuning import get_autotuned_predictions_data
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from models import llmtime
from models.llmtime import get_llmtime_predictions_data

In [4]:
import pickle

In [None]:
import json
import openai

with open(r'config.json', 'r', encoding='utf-8') as f:
    config = json.load(f)
    
openai.api_key = config['OPENAI_API_KEY']
openai.api_base = config['OPENAI_API_BASE']

### Dataset Preparation

In [None]:
def dataset_obtain(testfrac=0.2):
    '''
    获得 numpy 结构数据集
    '''
    datasets_path = {
    'us_births': 'datasets/monash/us_births.pkl',  # daily
    'saugeenday': 'datasets/monash/saugeenday.pkl'  # daily
    }
    datas = []
    for name, data_path in datasets_path.items():
        with open(data_path, 'rb') as file:
            # 加载数据
            data = pickle.load(file)
        series = pd.Series(np.concatenate((data[0][0][0], data[0][0][1])))
        splitpoint = int(len(series)*(1-testfrac))
        train = series.iloc[:splitpoint]  # Only test the last couples of samples
        test = series.iloc[splitpoint:]
        datas.append((train,test))
    return dict(zip(datasets_path.keys(),datas))

In [None]:
# Retrieve the seasonal period for a given dataset.
def period_get(name):
    '''
    Returns the known or manually estimated seasonal period for a specified dataset.

    Args:
        name (str): The name of the dataset.

    Returns:
        period (int): The seasonal period length. Default is 365 (e.g., for daily yearly data).
    '''
    period = 365  # Default period (e.g., daily data with yearly seasonality)
    
    if name == "HeartRateDataset":
        period = 144
    if name == "GasRateCO2Dataset":
        period = 39
    if name == "AirPassengersDataset":
        period = 12
    if name == "AusBeerDataset":
        period = 4
    if name == "MonthlyMilkDataset":
        period = 12
    if name == "SunspotsDataset":
        period = 12
    if name == "WineDataset":
        period = 12
    if name == "WoolyDataset":
        period = 4
    if name == "IstanbulTraffic":
        period = 24
    if name == "TurkeyPower":
        period = 7    # Weekly cycle; manually estimated, not from official documentation
    if name == "us_birth":
        period = 7    # Weekly cycle; manually estimated, not from official documentation
    if name == "saugeenday":
        period = 7    # Weekly cycle; manually estimated, not from official documentation

    return period

## Univariate Timeseries

### Data-preprocessing

In [None]:
def preprocessing_data(train, test):
    train_reshaped = train.values.reshape(-1, 1)
    test_reshaped = test.values.reshape(-1, 1)
    scaler = StandardScaler()
    scaler.fit(train_reshaped)
    standardized_train = scaler.transform(train_reshaped)
    train = pd.Series(standardized_train.flatten(), index=train.index)
    standardized_test = scaler.transform(test_reshaped)
    test = pd.Series(standardized_test.flatten(), index=test.index)    
    return train, test

In [None]:
datasets = get_datasets()  
datasets_tmp = get_memorization_datasets()
datasets.update(datasets_tmp)
datasets_tmp = dataset_obtain()
datasets.update(datasets_tmp)

for name in datasets.keys():
    train, test = datasets[name]
    train, test = preprocessing_data(train, test)
    dataset = (train, test)
    datasets[name] = dataset


### Forecasting

In [None]:
gpt4_hypers = dict(
    alpha=0.3,
    basic=True,
    temp=1.0,
    top_p=0.8,
    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-')
)

gpt3_hypers = dict(
    temp=0.7,
    alpha=0.95,
    beta=0.3,
    basic=False,
    settings=SerializerSettings(base=10, prec=3, signed=True, half_bin_correction=True)
)

promptcast_hypers = dict(
    temp=0.7,
    settings=SerializerSettings(base=10, prec=0, signed=True,
                                time_sep=', ',
                                bit_sep='',
                                plus_sign='',
                                minus_sign='-',
                                half_bin_correction=False,
                                decimal_point='')
)


gemini_hypers = {
    'temp': 0.2,  # recommended: [0.2, 0.4]
    'alpha': 0.95,
    'beta': 0.3,
    'basic': [False],
    'settings': [SerializerSettings(base=10, prec=3, signed=True,half_bin_correction=True)],  # recommended: prec=3
}

In [None]:
model_hypers = {
    'LLMTime GPT-3.5': {'model': 'gpt-3.5-turbo-instruct', **gpt3_hypers},
    # 'LLMTime GPT-4': {'model': 'gpt-4', **gpt4_hypers},
    # 'LLMTime GPT-3': {'model': 'text-davinci-003', **gpt3_hypers},
    # 'PromptCast GPT-3': {'model': 'text-davinci-003', **promptcast_hypers},
    # 'ARIMA': arima_hypers,
    'gemini-1.0-pro': {'model': 'gemini-1.0-pro', **gemini_hypers},
}

model_predict_fns = {
    'LLMTime GPT-3.5': get_llmtime_predictions_data,
    # 'LLMTime GPT-4': get_llmtime_predictions_data,
    # 'PromptCast GPT-3': get_promptcast_predictions_data,
    # 'ARIMA': get_arima_predictions_data,
    'gemini-1.0-pro': get_llmtime_predictions_data, 
}

In [32]:
model_names = list(model_predict_fns.keys())

### Datasets(Darts and Monash_Not commonly used datasets)

In [33]:
# Selected datasets
# name_list = ['GasRateCO2Dataset', 'WineDataset', 'SunspotsDataset', 'AusBeerDataset']

# only for partial test 
name_list = ['AirPassengersDataset', 'AusBeerDataset', 'GasRateCO2Dataset', 'MonthlyMilkDataset', 'SunspotsDataset', 'WineDataset', 'WoolyDataset', 'HeartRateDataset', 'IstanbulTraffic', 'TSMCStock', 'TurkeyPower', 'us_births', 'saugeenday']

In [None]:
def MA_process(name, train, test, test_length=96):
    period = period_get(name)
    data_tmp = pd.concat([train, test])
    out = data_tmp.rolling(window=period).mean().dropna()
    train_tmp = out[:-test_length]
    test_tmp = out[-test_length:]
    return train_tmp, test_tmp

def metrics_computation(test, median_pred):
    mse = mean_squared_error(test, median_pred)
    mae = mean_absolute_error(test, median_pred)
    mape = mean_absolute_percentage_error(test, median_pred) * 100
    r2 = r2_score(test, median_pred)        
    return mse, mae, mape, r2

def MA_recover(data_pred, median_pred, record_tmp, record_list_tmp, period=8, whether_MA=False):
    data_pred_tmp = copy.deepcopy(data_pred)
    median_pred_tmp = copy.deepcopy(median_pred)
    if whether_MA is True:
        for index in range(data_pred.shape[1]):
            if index >= period:
                data_pred.iloc[:, index] = period * (data_pred_tmp.iloc[:, index]-data_pred_tmp.iloc[:, index-1])+data_pred.iloc[:, index-period]
                median_pred.iloc[index] = period * (median_pred_tmp.iloc[index]-median_pred_tmp.iloc[index-1])+median_pred.iloc[index-period]
            else:
                if index == 0:
                    data_pred.iloc[:, index] = period * (data_pred_tmp.iloc[:, index]-record_tmp)+record_list_tmp.iloc[index]
                    median_pred.iloc[index] = period * (median_pred_tmp.iloc[index]-record_tmp)+record_list_tmp.iloc[index] 
                else:
                    data_pred.iloc[:, index] = period * (data_pred_tmp.iloc[:, index]-data_pred_tmp.iloc[:, index-1])+record_list_tmp.iloc[index]
                    median_pred.iloc[index] = period * (median_pred_tmp.iloc[index]-median_pred_tmp.iloc[index-1])+record_list_tmp.iloc[index]
    return data_pred, median_pred
 

In [None]:
def forecasting_w_LLMs(datasets, name_list, model_names, whether_MA=False, pred_period=8, whether_multi=False):
    
    for name, dataset in datasets.items():
        if name not in name_list:  
            continue
    
        data = datasets[name]
        train, test = data  

        if name in ['us_births', 'saugeenday']:  
            train = train.iloc[-3080:]  
            test = test.iloc[:96]  
        
        test_original = copy.deepcopy(test)
        record_list_tmp = train[-pred_period:]
        
        if whether_MA:
            train, test = MA_process(name=name, train=train, test=test, test_length=len(test))
        record_tmp = train.iloc[-1]
    
        for model in model_names: 
            print(model)
            if model == 'gemini-1.0-pro':
                whether_blank = False
            else:
                whether_blank = True
            model_hypers[model].update({'dataset_name': name})  
            hypers = list(grid_iter(model_hypers[model]))  
            num_samples = 10  
            
            print(type(train))
            pred_dict = get_autotuned_predictions_data(train, test, hypers, num_samples, model_predict_fns[model], verbose=False, parallel=False, whether_blanket=whether_blank, whether_multi=whether_multi) 
            data_pred = pred_dict['samples']  
            median_pred = pred_dict['median'] 
            
            data_pred, median_pred = MA_recover(data_pred=data_pred, median_pred=median_pred, record_tmp=record_tmp, record_list_tmp=record_list_tmp, period=pred_period, whether_MA=whether_MA)
                            
            pred_dict['samples'] = data_pred  
            pred_dict['median'] = median_pred
    
            mse, mae, mape, r2 = metrics_computation(test_original, median_pred)


In [None]:
whether_MA = False
forecasting_w_LLMs(datasets, name_list, model_names, whether_MA=whether_MA)


#### with MA

In [None]:
whether_MA = True
forecasting_w_LLMs(datasets, name_list, model_names, whether_MA=whether_MA)

## Multi-variate datasets

In [82]:
def get_datasets_multi(n=-1, testfrac=0.2):
    datasets = [
        'electricity',
        'ETTh1',  
        'ETTh2',
        'ETTm1',
        'ETTm2', 
        'exchange_rate',
        'national_illness',
        'traffic',
        'weather'
    ]
    datas = []
    for i, dsname in enumerate(datasets):
        path = f'datasets/multi_variate_datasets/{dsname}.csv'
        series = pd.read_csv(path)
        splitpoint = int(series.shape[0]*(1-testfrac))
        train = series.iloc[:splitpoint]  # Only test the last couples of samples
        test = series.iloc[splitpoint:]
        datas.append((train,test))
        if i+1==n:
            break
    return dict(zip(datasets,datas))

In [83]:
datasets_tmp = get_datasets_multi()

#### Predict only 1 sequence each time

In [None]:
pred_dict_list = []

for name, dataset in datasets_tmp.items():
    def metrics_computation(test, median_pred):
        mse = mean_squared_error(test, median_pred)
        mae = mean_absolute_error(test, median_pred)
        mape = mean_absolute_percentage_error(test, median_pred) * 100
        r2 = r2_score(test, median_pred)        
        return mse, mae, mape, r2
        
    train, test = dataset 
    train = train[len(train)-1024:]['OT']  # We here cutoff 1024 numbers for the training set (in terms of context length)
    test = test[:96]['OT']  # next 96 numbers
    
    for model in model_names: 
        model_hypers[model].update({'dataset_name': name})  # for promptcast, dict 添加元素
        hypers = list(grid_iter(model_hypers[model]))  # 简单理解为传参
        num_samples = 10  # 表示重复的预测的次数, 原本需要为10, 此处暂时选择 2 来进行测试
        pred_dict = get_autotuned_predictions_data(train, test, hypers, num_samples, model_predict_fns[model], verbose=False, parallel=False, whether_blanket=False) 
        pred_dict_list.append(pred_dict)

Sampling with best hyper... defaultdict(<class 'dict'>, {'model': 'gpt-4', 'alpha': 0.3, 'basic': True, 'temp': 1.0, 'top_p': 0.8, 'settings': SerializerSettings(base=10, prec=3, signed=True, fixed_length=False, max_val=10000000.0, time_sep=', ', bit_sep='', plus_sign='', minus_sign='-', half_bin_correction=True, decimal_point='', missing_str=' Nan'), 'dataset_name': 'electricity'}) 
 with NLL inf




  0%|          | 0/1 [00:00<?, ?it/s][A[A

943, 951, 970, 1018, 1075, 1171, 1250, 1226, 1231, 1307, 1274, 1313, 1410, 1450, 1501, 1421, 1306, 1284, 1204, 1151, 1121, 1086, 1063, 1037, 1016, 996, 1020, 961, 1030, 1123, 1186, 1198, 1320, 1337, 1376, 1384, 1369, 1408, 1459, 1385, 1334, 1301, 1141, 1167, 1109, 1025, 1049, 1009, 1007, 975, 1003, 936, 1046, 1158, 1180, 1237, 1266, 1307, 1305, 1361, 1457, 1528, 1488, 1324, 1259, 1261, 1181, 1154, 1136, 1101, 1048, 1055, 1023, 993, 970, 975, 1059, 1163, 1204, 1256, 1329, 1353, 1377, 1542, 1575, 1661, 1772, 1610, 1512, 1364, 1242, 1182, 1147, 1130, 1105, 1069, 1051, 1046, 1049, 1038, 1078, 1131, 1151, 1191, 1203, 1156, 1150, 1179, 1215, 1242, 1216, 1249, 1239, 1176, 1124, 1119, 1077, 1035, 1045, 1029, 1023, 1015, 982, 998, 936, 986, 964, 1031, 1034, 1010, 1024, 1068, 1036, 1080, 1050, 1021, 987, 999, 951, 973, 920, 937, 887, 923, 874, 915, 898, 913, 734, 1013, 952, 937, 992, 988, 944, 1005, 957, 990, 953, 950, 970, 945, 912, 950, 884, 909, 912, 899, 887, 890, 890, 972, 985, 1095, 1112, 



100%|██████████| 1/1 [00:17<00:00, 17.81s/it][A[A


Sampling with best hyper... defaultdict(<class 'dict'>, {'model': 'gpt-4', 'alpha': 0.3, 'basic': True, 'temp': 1.0, 'top_p': 0.8, 'settings': SerializerSettings(base=10, prec=3, signed=True, fixed_length=False, max_val=10000000.0, time_sep=', ', bit_sep='', plus_sign='', minus_sign='-', half_bin_correction=True, decimal_point='', missing_str=' Nan'), 'dataset_name': 'ETTh1'}) 
 with NLL inf




  0%|          | 0/1 [00:00<?, ?it/s][A[A

2096, 2257, 2257, 2290, 2128, 2225, 1838, 2032, 2000, 2160, 2451, 2709, 2354, 3612, 3902, 3451, 3612, 3160, 2935, 2451, 2419, 2225, 2193, 2193, 2225, 2290, 2386, 2290, 2257, 2160, 2225, 2354, 2451, 2419, 2806, 3580, 3419, 4257, 4451, 3612, 3580, 3386, 3032, 2870, 2870, 2741, 2354, 2515, 2483, 2322, 2741, 2290, 2386, 2612, 2483, 2290, 2322, 2935, 2999, 3419, 3483, 3612, 3773, 2967, 2838, 3032, 2612, 2709, 2612, 2128, 1677, 1677, 1645, 1677, 2000, 1838, 1902, 1354, 1419, 1193, 1419, 1096, 1354, 2064, 1322, 1773, 1935, 1612, 1677, 2128, 2290, 2128, 2290, 2193, 2128, 1935, 2064, 1773, 2096, 2419, 2451, 2290, 2257, 2193, 2451, 2225, 2386, 2709, 2515, 3225, 3419, 2870, 2741, 2773, 2386, 2160, 2096, 2225, 2193, 2032, 1967, 2290, 2225, 1967, 1870, 1967, 1773, 1902, 2032, 2160, 1870, 2225, 2451, 2483, 2515, 2515, 2515, 2709, 2644, 2612, 2354, 1967, 1806, 1709, 1515, 1483, 1354, 1193, 1193, 1000, 870, 1064, 1000, 1000, 1096, 1128, 1290, 1419, 1290, 1322, 1386, 1580, 1515, 1515, 1709, 1870, 1741,



100%|██████████| 1/1 [00:19<00:00, 19.64s/it][A[A


Sampling with best hyper... defaultdict(<class 'dict'>, {'model': 'gpt-4', 'alpha': 0.3, 'basic': True, 'temp': 1.0, 'top_p': 0.8, 'settings': SerializerSettings(base=10, prec=3, signed=True, fixed_length=False, max_val=10000000.0, time_sep=', ', bit_sep='', plus_sign='', minus_sign='-', half_bin_correction=True, decimal_point='', missing_str=' Nan'), 'dataset_name': 'ETTh2'}) 
 with NLL inf




  0%|          | 0/1 [00:00<?, ?it/s][A[A

1368, 1246, 1124, 1003, 911, 820, 759, 698, 668, 1094, 1581, 2098, 2494, 2737, 3072, 3072, 2951, 2707, 2464, 2220, 1977, 1855, 1824, 1824, 1824, 1763, 1611, 1490, 1368, 1307, 1307, 1246, 1185, 1581, 2007, 2494, 2981, 3316, 3681, 3681, 3651, 3377, 3011, 2707, 2494, 2281, 2159, 2098, 2098, 2098, 2098, 2037, 2007, 1977, 1916, 1885, 1855, 1916, 2220, 2555, 2798, 3072, 3285, 3346, 3255, 3103, 2981, 2859, 2798, 2738, 2677, 2646, 2555, 2494, 2433, 2342, 2220, 2159, 2037, 1977, 1916, 1977, 2129, 2342, 2585, 2707, 2737, 2737, 2677, 2494, 2311, 2129, 1946, 1794, 1672, 1611, 1490, 1368, 1216, 1094, 1003, 911, 820, 759, 729, 1033, 1611, 2129, 2524, 2768, 3042, 2951, 2677, 2433, 2159, 1916, 1733, 1611, 1490, 1368, 1276, 1185, 1185, 1185, 1094, 1033, 972, 911, 850, 1094, 1550, 2159, 2677, 3011, 3255, 3255, 3164, 2920, 2646, 2342, 2190, 2068, 1977, 1916, 1855, 1703, 1581, 1459, 1429, 1307, 1216, 1185, 1216, 1398, 1703, 2098, 2433, 2555, 2585, 2585, 2524, 2403, 2342, 2311, 2281, 2281, 2281, 2281, 2250



100%|██████████| 1/1 [00:21<00:00, 21.63s/it][A[A


Sampling with best hyper... defaultdict(<class 'dict'>, {'model': 'gpt-4', 'alpha': 0.3, 'basic': True, 'temp': 1.0, 'top_p': 0.8, 'settings': SerializerSettings(base=10, prec=3, signed=True, fixed_length=False, max_val=10000000.0, time_sep=', ', bit_sep='', plus_sign='', minus_sign='-', half_bin_correction=True, decimal_point='', missing_str=' Nan'), 'dataset_name': 'ETTm1'}) 
 with NLL inf




  0%|          | 0/1 [00:00<?, ?it/s][A[A

5185, 4998, 4873, 4998, 5122, 5060, 5060, 5060, 4935, 4685, 4685, 4248, 3998, 3810, 4248, 3936, 4185, 4373, 4436, 4436, 4436, 4373, 3748, 3061, 3123, 3748, 3623, 3186, 3248, 3810, 3936, 4248, 3186, 3623, 4123, 4061, 4436, 4436, 4436, 4498, 4873, 5122, 5498, 5247, 5122, 5309, 5622, 5560, 5435, 4685, 4436, 4248, 4498, 4623, 4248, 4498, 4498, 4436, 4310, 4623, 4873, 4747, 5060, 4873, 4747, 4747, 4810, 4998, 5060, 5247, 5435, 4747, 4873, 4685, 4810, 4685, 4685, 4623, 4810, 4747, 4747, 4747, 4623, 4623, 4373, 4498, 4373, 4436, 4185, 4248, 4185, 4436, 4436, 4623, 4436, 4623, 4498, 3623, 4373, 4373, 4248, 3998, 3810, 4123, 3936, 3998, 3685, 3248, 2873, 2936, 3310, 3936, 3936, 2748, 2436, 2248, 2061, 1999, 1999, 1624, 1499, 1374, 1374, 1436, 1811, 2936, 3061, 3373, 3310, 3873, 2936, 2248, 2311, 2373, 2436, 2248, 2373, 2561, 2436, 2311, 2248, 1936, 2186, 2311, 2373, 2624, 2499, 2624, 2686, 2999, 2561, 2436, 2811, 2624, 2686, 2624, 2124, 2499, 2624, 2811, 2561, 2624, 2561, 2311, 2124, 1936, 2061



100%|██████████| 1/1 [00:19<00:00, 19.11s/it][A[A


Sampling with best hyper... defaultdict(<class 'dict'>, {'model': 'gpt-4', 'alpha': 0.3, 'basic': True, 'temp': 1.0, 'top_p': 0.8, 'settings': SerializerSettings(base=10, prec=3, signed=True, fixed_length=False, max_val=10000000.0, time_sep=', ', bit_sep='', plus_sign='', minus_sign='-', half_bin_correction=True, decimal_point='', missing_str=' Nan'), 'dataset_name': 'ETTm2'}) 
 with NLL inf




  0%|          | 0/1 [00:00<?, ?it/s][A[A

3794, 3794, 3794, 3794, 3794, 3794, 3794, 3722, 3722, 3722, 3651, 3579, 3507, 3292, 3292, 3149, 3149, 3149, 3006, 3006, 2934, 3006, 3006, 2934, 2934, 2934, 2862, 2862, 2862, 2862, 2862, 2862, 2862, 2862, 2934, 3006, 3149, 3364, 3507, 3794, 4009, 4295, 4511, 4654, 4654, 4797, 4797, 4797, 4940, 4940, 4940, 4940, 4940, 4940, 4869, 4869, 5012, 5084, 5156, 5299, 5370, 5442, 5514, 5514, 5514, 5514, 5514, 5514, 5442, 5370, 5227, 5084, 4940, 4797, 4654, 4511, 4367, 4367, 4224, 4081, 3937, 3937, 3794, 3794, 3651, 3651, 3507, 3507, 3507, 3507, 3507, 3507, 3507, 3507, 3579, 3579, 3579, 3579, 3651, 3651, 3651, 3651, 3651, 3651, 3651, 3651, 3651, 3651, 3651, 3651, 3651, 3651, 3651, 3651, 3651, 3579, 3579, 3579, 3579, 3507, 3507, 3507, 3507, 3507, 3436, 3436, 3293, 3293, 3293, 3149, 3149, 3149, 3149, 3149, 3149, 3149, 3149, 3149, 3149, 3149, 3149, 3221, 3221, 3364, 3364, 3507, 3507, 3507, 3507, 3507, 3579, 3579, 3579, 3579, 3579, 3507, 3507, 3507, 3436, 3364, 3364, 3364, 3221, 3221, 3221, 3149, 3077



100%|██████████| 1/1 [00:18<00:00, 18.63s/it][A[A


Sampling with best hyper... defaultdict(<class 'dict'>, {'model': 'gpt-4', 'alpha': 0.3, 'basic': True, 'temp': 1.0, 'top_p': 0.8, 'settings': SerializerSettings(base=10, prec=3, signed=True, fixed_length=False, max_val=10000000.0, time_sep=', ', bit_sep='', plus_sign='', minus_sign='-', half_bin_correction=True, decimal_point='', missing_str=' Nan'), 'dataset_name': 'exchange_rate'}) 
 with NLL inf




  0%|          | 0/1 [00:00<?, ?it/s][A[A

968, 985, 994, 981, 979, 981, 981, 983, 981, 990, 981, 983, 981, 981, 993, 1004, 1003, 1020, 1001, 1002, 1002, 1005, 1005, 1009, 1018, 1011, 1012, 1012, 1031, 1021, 1038, 1032, 1035, 1033, 1033, 1025, 1018, 996, 1005, 984, 984, 984, 988, 981, 992, 983, 993, 992, 992, 1014, 1014, 1011, 1004, 1016, 1017, 1017, 1022, 1015, 1021, 997, 992, 992, 992, 1005, 990, 998, 978, 972, 971, 971, 976, 996, 991, 989, 980, 981, 980, 975, 972, 975, 998, 991, 990, 990, 997, 988, 988, 972, 970, 974, 974, 969, 960, 965, 964, 966, 966, 966, 970, 983, 987, 993, 990, 990, 990, 1003, 1002, 1006, 1003, 1003, 1008, 1008, 1015, 1014, 1011, 1016, 1010, 1010, 1010, 1011, 1010, 985, 976, 975, 975, 971, 978, 970, 967, 967, 965, 964, 959, 966, 973, 968, 941, 936, 936, 936, 941, 950, 946, 959, 951, 953, 953, 955, 964, 963, 960, 951, 952, 952, 959, 952, 948, 939, 958, 958, 958, 956, 952, 950, 940, 952, 951, 951, 957, 960, 967, 957, 959, 960, 958, 957, 968, 980, 978, 969, 969, 970, 965, 965, 961, 969, 962, 962, 962, 969, 



100%|██████████| 1/1 [00:18<00:00, 18.82s/it][A[A


Sampling with best hyper... defaultdict(<class 'dict'>, {'model': 'gpt-4', 'alpha': 0.3, 'basic': True, 'temp': 1.0, 'top_p': 0.8, 'settings': SerializerSettings(base=10, prec=3, signed=True, fixed_length=False, max_val=10000000.0, time_sep=', ', bit_sep='', plus_sign='', minus_sign='-', half_bin_correction=True, decimal_point='', missing_str=' Nan'), 'dataset_name': 'national_illness'}) 
 with NLL inf




  0%|          | 0/1 [00:00<?, ?it/s][A[A

991, 980, 1225, 1211, 1254, 1255, 1155, 1236, 1223, 987, 1308, 1314, 1276, 1186, 955, 1080, 1337, 1318, 1224, 1268, 1214, 1213, 1187, 1199, 1160, 1124, 1138, 1107, 1104, 1136, 1092, 1101, 1050, 1017, 980, 914, 851, 879, 851, 859, 850, 746, 855, 820, 825, 767, 822, 821, 822, 849, 848, 918, 931, 992, 1344, 1353, 1333, 1380, 1375, 1382, 1366, 1377, 1047, 1344, 1262, 1281, 967, 1070, 1277, 1345, 1237, 1280, 1265, 1222, 1278, 1310, 1244, 1249, 1263, 1266, 1255, 1273, 1216, 1212, 1205, 1160, 1113, 1068, 982, 992, 999, 933, 937, 846, 945, 931, 930, 949, 952, 923, 1000, 1006, 910, 1094, 1095, 1075, 1323, 1303, 1330, 1356, 1345, 1349, 1328, 1307, 1055, 1381, 1342, 1316, 1042, 1092, 1214, 1246, 1258, 1241, 1280, 1294, 1174, 1278, 1242, 1267, 1264, 1260, 1213, 1235, 1259, 1190, 1198, 1177, 1124, 1066, 969, 959, 961, 947, 931, 841, 928, 925, 912, 892, 910, 911, 959, 999, 974, 918, 1024, 1029, 1023, 1271, 1273, 1297, 1275, 1330, 1300, 1318, 1006, 1324, 1295, 1232, 964, 1023, 1238, 1226, 1169, 1301,



100%|██████████| 1/1 [00:20<00:00, 20.47s/it][A[A


Sampling with best hyper... defaultdict(<class 'dict'>, {'model': 'gpt-4', 'alpha': 0.3, 'basic': True, 'temp': 1.0, 'top_p': 0.8, 'settings': SerializerSettings(base=10, prec=3, signed=True, fixed_length=False, max_val=10000000.0, time_sep=', ', bit_sep='', plus_sign='', minus_sign='-', half_bin_correction=True, decimal_point='', missing_str=' Nan'), 'dataset_name': 'traffic'}) 
 with NLL inf




  0%|          | 0/1 [00:00<?, ?it/s][A[A

190, 283, 495, 948, 1206, 1578, 1903, 2207, 2475, 2480, 2248, 2176, 2186, 2197, 2016, 1815, 1505, 1340, 1103, 737, 479, 221, 216, 273, 453, 556, 840, 1727, 2186, 2099, 2222, 2661, 2712, 2743, 2702, 3249, 3357, 3305, 2784, 2331, 1732, 1495, 1046, 706, 428, 293, 237, 309, 443, 974, 1629, 2078, 2336, 2258, 2068, 2599, 2851, 2790, 3362, 3388, 3305, 3476, 2862, 2558, 2186, 1640, 1191, 727, 531, 340, 283, 345, 562, 1088, 1531, 2119, 2449, 2140, 2258, 2743, 2810, 2970, 3264, 3408, 3372, 3543, 3094, 2630, 2047, 1727, 1227, 768, 556, 330, 221, 293, 520, 1072, 1542, 2109, 2377, 2227, 2227, 2573, 2996, 2975, 3671, 3733, 3424, 3548, 3078, 2614, 2145, 1815, 1227, 959, 665, 350, 319, 366, 531, 1057, 1521, 2129, 2377, 2413, 2573, 3063, 3264, 3099, 2697, 3759, 3599, 3445, 2903, 2552, 2078, 1722, 1444, 1005, 758, 438, 319, 309, 293, 453, 541, 1077, 1562, 1897, 2305, 2738, 2831, 2748, 2748, 2748, 2692, 2454, 2207, 1903, 1650, 1521, 1376, 1010, 737, 371, 257, 226, 170, 257, 479, 773, 1108, 1438, 1810, 20



100%|██████████| 1/1 [00:20<00:00, 20.05s/it][A[A


Sampling with best hyper... defaultdict(<class 'dict'>, {'model': 'gpt-4', 'alpha': 0.3, 'basic': True, 'temp': 1.0, 'top_p': 0.8, 'settings': SerializerSettings(base=10, prec=3, signed=True, fixed_length=False, max_val=10000000.0, time_sep=', ', bit_sep='', plus_sign='', minus_sign='-', half_bin_correction=True, decimal_point='', missing_str=' Nan'), 'dataset_name': 'weather'}) 
 with NLL inf




  0%|          | 0/1 [00:00<?, ?it/s][A[A

987, 986, 984, 985, 987, 987, 987, 986, 996, 1001, 996, 986, 990, 1008, 1015, 1006, 1010, 1007, 1005, 999, 1004, 1032, 1017, 1008, 1010, 1012, 1014, 1023, 1019, 1016, 1028, 1025, 1034, 1033, 1039, 1036, 1034, 1044, 1055, 1064, 1064, 1074, 1075, 1070, 1072, 1067, 1059, 1053, 1057, 1054, 1059, 1080, 1086, 1080, 1077, 1085, 1075, 1078, 1066, 1068, 1073, 1072, 1079, 1082, 1081, 1074, 1072, 1075, 1074, 1075, 1092, 1080, 1078, 1081, 1089, 1099, 1097, 1101, 1099, 1094, 1092, 1086, 1087, 1093, 1084, 1080, 1092, 1099, 1093, 1107, 1103, 1113, 1114, 1108, 1116, 1120, 1124, 1113, 1115, 1120, 1120, 1114, 1095, 1086, 1045, 1027, 1034, 1037, 1035, 1025, 1018, 1020, 1018, 1013, 1013, 1011, 1001, 1001, 1005, 1006, 1007, 1005, 1005, 1000, 998, 999, 996, 996, 996, 997, 1006, 1006, 1006, 1007, 1007, 1005, 1006, 1002, 1001, 998, 993, 988, 989, 990, 990, 993, 996, 997, 997, 993, 990, 991, 993, 992, 992, 994, 1001, 998, 998, 999, 1008, 1016, 1004, 1001, 1000, 999, 1000, 1003, 1005, 1005, 1005, 1005, 1007, 10



100%|██████████| 1/1 [00:19<00:00, 19.21s/it][A[A
