# Import

In [1]:
import os, re, pandas as pd
import numpy as np

# Extract result

## Patterns

In [2]:
result_pattern = re.compile(r'mse:(?P<mse>\d+.\d+), mae:(?P<mae>\d+.\d+)\n')
memory_pattern = re.compile(r'Allocated (?P<allocated>\d+.\d+) MB, Max allocated (?P<max_allocated>\d+.\d+) MB\n')
memory_pattern2 = re.compile(r'Allocated (?P<allocated>\d+.\d+) MB\n')
time_pattern = re.compile(r'Time per epoch: (?P<time>\d+.\d+) sec.\n')

## Utils

In [4]:
def find_and_add(
    rows, root, lines, verbose=False
):
    parameters = {
        'seq_len': 96,
        'pred_len': 24,
        'percent': 100,
    }

    splits = root.split('\\')
    iter_no = int(splits[-1])
    dataset = splits[-3]
    
    if dataset == 'Financial_Aid':
        return
    model_folder = splits[-2]
    model = model_folder.split('_')[0]

    values = model_folder.split('_')[2::2]
    for i, item in enumerate(model_folder.split('_')[1::2]):
        if item == 'pl':
            parameters['pred_len'] = int(values[i])
        elif item == 'sl':
            parameters['seq_len'] = int(values[i])
        elif item == 'p':
            parameters['percent'] = int(values[i])
        elif item == 'zeroshot':
            parameters['percent'] = 0

    result_found = memory_found = time_found = False

    for line in lines[::-1]:
        # search and extract numbers after mse and mae if the line starts with test scaled 
        if line.startswith('test scaled -- '):
            results = result_pattern.search(line).groupdict()
            mae = float(results['mae'])
            mse = float(results['mse'])
            if verbose:print(f'MAE {mae}, MSE {mse}')
            result_found = True
            
        elif line.startswith('Memory usage: '):
            memory = memory_pattern.search(line)
            if memory is None:
                memory = memory_pattern2.search(line)
                max_allocated = None
            else:
                memory = memory.groupdict()
                max_allocated = float(memory['max_allocated'])
            
            allocated = float(memory['allocated'])
            if verbose:
                print(f'Allocated {allocated} MB, Max allocated {max_allocated} MB.')
            memory_found = True
        elif line.startswith('Time per epoch: '):
            time_result = time_pattern.search(line).groupdict()
            time_spent = float(time_result['time'])
            if verbose:print(f'Time spent {time_spent} sec.')
            
            time_found = True
            
        # stop searching if both result and memory are found
        if result_found and memory_found and time_found:
            break
            
    if result_found:
        rows['dataset'].append(dataset)
        rows['model'].append(model)
        rows['iteration'].append(iter_no)
        rows['mae'].append(mae)
        rows['mse'].append(mse)
        rows['seq_len'].append(parameters['seq_len'])
        rows['pred_len'].append(parameters['pred_len'])
        rows['percent'].append(parameters['percent'])
        
        # for zeroshot cases
        if memory_found and time_found:
            rows['memory'].append(allocated)
            rows['time'].append(time_spent)
        else:
            rows['memory'].append(None)
            rows['time'].append(None)

## Extraction

In [5]:
rows = {
    col:[] for col in [
        'dataset', 'model', 'iteration', 'mae', 
        'mse', 'seq_len', 'pred_len', 'percent',
        'memory', 'time'
    ]
}

for root, dirs, files in os.walk(u"./results"):
    path = root.split(os.sep)
    # print((len(path) - 1) * '---', os.path.basename(root))
    for file in files:
        # print(len(path) * '---', file)
        if file == 'results.txt':
            # print(root, ' ', file)
            
            with open(os.path.join(root, file), 'r') as f:
                lines = f.readlines()
                find_and_add(rows, root, lines)

./results\Apple\CALF_sl_96_pl_24_id_ori\1   results.txt
./results\Apple\CALF_sl_96_pl_24_id_ori\2   results.txt
./results\Apple\CALF_sl_96_pl_24_id_ori\3   results.txt
./results\Apple\CALF_sl_96_pl_24_id_ori_p_10\1   results.txt
./results\Apple\CALF_sl_96_pl_24_id_ori_p_10\2   results.txt
./results\Apple\CALF_sl_96_pl_24_id_ori_p_10\3   results.txt
./results\Apple\CALF_sl_96_pl_24_id_ori_zeroshot\1   results.txt
./results\Apple\CALF_sl_96_pl_24_id_ori_zeroshot\2   results.txt
./results\Apple\CALF_sl_96_pl_24_id_ori_zeroshot\3   results.txt
./results\Apple\CALF_sl_96_pl_48_id_ori\1   results.txt
./results\Apple\CALF_sl_96_pl_48_id_ori\2   results.txt
./results\Apple\CALF_sl_96_pl_48_id_ori\3   results.txt
./results\Apple\CALF_sl_96_pl_96_id_ori\1   results.txt
./results\Apple\CALF_sl_96_pl_96_id_ori\2   results.txt
./results\Apple\CALF_sl_96_pl_96_id_ori\3   results.txt
./results\Apple\DLinear_sl_96_pl_24\1   results.txt
./results\Apple\DLinear_sl_96_pl_24\2   results.txt
./results\Appl

## Dump

In [6]:
rows_df = pd.DataFrame(rows)
rows_df.to_csv('results/results.csv', index=False)

# Data

## Load

In [7]:
# rows_df = pd.read_csv('results/results.csv')
rows_df.fillna(0, inplace=True)

metrics = ['mae', 'mse', 'memory', 'time']
groupby_columns = [col for col in rows_df.columns if col not in ['iteration']+metrics]
rows_df = rows_df.groupby(groupby_columns)[metrics].mean().reset_index()

## Datasets and models

In [24]:
models = [
    'DLinear', 'PatchTST', 'TimesNet', 
    'TimeMixer', 'iTransformer', 'TimeLLM', 'CALF', 'OFA'
]
llm_models = ['TimeLLM', 'CALF', 'OFA']
datasets = [
    'SPX500', 'Apple', 'MSFT', 'Crude_Oil', 
    'Gold','Natural_Gas','Exchange_Rate_Report', 
    'Financial_Aid_State'
]

best_color = r' & \cellcolor[HTML]{2B7BBA}\textbf{'
second_best_color = r' & \cellcolor[HTML]{89BEDC}\underline{' 
llm_color = r' & \cellcolor[HTML]{A9A9A9}' 
end_bracket = r'} '

datasets_map = {
    'SPX500': 'S\&P 500',
    'Apple': 'Apple',
    'MSFT': 'Microsoft',
    'Crude_Oil': 'Crude Oil',
    'Gold': 'Gold',
    'Natural_Gas': 'Natural Gas',
    'Exchange_Rate_Report': 'Exchange',
    'Financial_Aid_State': 'Financial Aid'
}

# Utils

In [26]:
def get_mins(mses, maes):
    maes = np.array(maes)
    mses = np.array(mses)
    
    min_mae = np.nanargmin(maes)
    min_mse = np.nanargmin(mses)
    
    prev_value = maes[min_mae]
    maes[min_mae] = np.inf
    min_mae_2nd = np.nanargmin(maes)
    maes[min_mae] = prev_value
    
    prev_value = mses[min_mse]
    mses[min_mse] = np.inf
    min_mse_2nd = np.nanargmin(mses)
    mses[min_mse] = prev_value
    
    return min_mae, min_mse, min_mae_2nd, min_mse_2nd

def print_rows(maes, mses):
    min_mae, min_mse, min_mae_2nd, min_mse_2nd = get_mins(mses=mses, maes=maes)
    
    for i in range(len(maes)):
        if i == min_mse:
            print(best_color + f'{mses[i]:.2f} '+end_bracket, end='')
        elif i == min_mse_2nd:
            print(second_best_color + f'{mses[i]:.2f}'+ end_bracket, end='')
        elif models[i] in llm_models:
            print(llm_color + f'{mses[i]:.2f}', end='')
        else:
            print(f' & {mses[i]:.2f} ', end='')
            
        if i == min_mae:
            print(best_color + f'{maes[i]:.2f}'+ end_bracket, end='')
        elif i == min_mae_2nd:
            print(second_best_color + f'{maes[i]:.2f}'+ end_bracket, end='')
        elif models[i] in llm_models:
            print(llm_color + f'{maes[i]:.2f}', end='')
        else:
            print(f' & {maes[i]:.2f}', end='')
            
    print(f' \\\\ \\hline\n')
            
    # for i in range(len(models)):
    #     if i == min_mse:
    #         print(r' & \textcolor{red}{\textbf{' + f'{mses[i]:.2f} '+r'}}', end='')
    #     elif i == min_mse_2nd:
    #         print(r' & \textcolor{blue}{\underline{' + f'{mses[i]:.2f} '+r'}}', end='')
    #     else:
    #         print(f' & {mses[i]:.2f} ', end='')
            
    #     if i == min_mae:
    #         print(r' & \textcolor{red}{\textbf{' + f'{maes[i]:.2f} '+r'}}', end='')
    #     elif i == min_mae_2nd:
    #         print(r' & \textcolor{blue}{\underline{' + f'{maes[i]:.2f} '+r'}}', end='')
    #     else:
    #         print(f' & {maes[i]:.2f} ', end='')

# RQ1

In [9]:
df = rows_df[(rows_df['pred_len'].isin([1, 24])) & (rows_df['percent']==100)]

In [19]:
for dataset in datasets:
    print(f'{datasets_map[dataset]} ', end='')
    maes, mses = [], []
    for model in models:
        result = df[
            (df['dataset'] == dataset) & (df['model'] == model)
        ][['mse', 'mae']].values
        
        if len(result) == 0:
            # print(f' & & ', end='')
            maes.append(np.nan)
            mses.append(np.nan)
            continue
            
        mse, mae = result[0][0], result[0][1]
        # print(f' & {mse:.2f} & {mae:.2f}', end='')
        maes.append(mae)
        mses.append(mse)
    
    print_rows(maes=maes, mses=mses)

S&P 500  & 1.10  & 0.81 & \cellcolor[HTML]{89BEDC}\underline{0.99}  & 0.79 & 1.18  & 0.84 & 1.00  & \cellcolor[HTML]{89BEDC}\underline{0.75}  & 1.27  & 0.87 & \cellcolor[HTML]{A9A9A9}1.10 & \cellcolor[HTML]{A9A9A9}0.81 & \cellcolor[HTML]{A9A9A9}1.16 & \cellcolor[HTML]{A9A9A9}0.84 & \cellcolor[HTML]{2B7BBA}\textbf{0.79 }  & \cellcolor[HTML]{2B7BBA}\textbf{0.68}  \\ \hline

Apple  & \cellcolor[HTML]{2B7BBA}\textbf{1.58 }  & \cellcolor[HTML]{2B7BBA}\textbf{0.96}  & 2.03  & 1.06 & 1.75  & \cellcolor[HTML]{89BEDC}\underline{1.01}  & 1.91  & 1.06 & 1.94  & 1.10 & \cellcolor[HTML]{89BEDC}\underline{1.74}  & \cellcolor[HTML]{A9A9A9}1.02 & \cellcolor[HTML]{A9A9A9}1.79 & \cellcolor[HTML]{A9A9A9}1.04 & \cellcolor[HTML]{A9A9A9}1.75 & \cellcolor[HTML]{A9A9A9}1.02 \\ \hline

Microsoft  & 1.89  & 0.96 & \cellcolor[HTML]{2B7BBA}\textbf{1.55 }  & \cellcolor[HTML]{89BEDC}\underline{0.87}  & 1.60  & 0.92 & \cellcolor[HTML]{89BEDC}\underline{1.55}  & \cellcolor[HTML]{2B7BBA}\textbf{0.86}  & 1.63  & 0.91 &

# RQ2

In [20]:
df = rows_df[(rows_df['pred_len'].isin([1, 24])) & (rows_df['percent']==10)]

In [25]:
for dataset in datasets:
    print(f'{datasets_map[dataset]} ', end='')
    maes, mses = [], []
    for model in models:
        result = df[
            (df['dataset'] == dataset) & (df['model'] == model)
        ][['mse', 'mae']].values
        
        if len(result) == 0:
            # print(f' & & ', end='')
            maes.append(np.nan)
            mses.append(np.nan)
            continue
            
        mse, mae = result[0][0], result[0][1]
        # print(f' & {mse:.2f} & {mae:.2f}', end='')
        maes.append(mae)
        mses.append(mse)
    
    print_rows(maes=maes, mses=mses)

S\&P 500  & 2.08  & 1.14 & \cellcolor[HTML]{89BEDC}\underline{2.03}  & \cellcolor[HTML]{2B7BBA}\textbf{1.14}  & 2.43  & 1.18 & 2.49  & 1.25 & 2.19  & 1.19 & \cellcolor[HTML]{2B7BBA}\textbf{1.94 }  & \cellcolor[HTML]{89BEDC}\underline{1.14}  & \cellcolor[HTML]{A9A9A9}2.37 & \cellcolor[HTML]{A9A9A9}1.19 & \cellcolor[HTML]{A9A9A9}3.07 & \cellcolor[HTML]{A9A9A9}1.40 \\ \hline

Apple  & 2.78  & 1.30 & \cellcolor[HTML]{89BEDC}\underline{2.36}  & \cellcolor[HTML]{2B7BBA}\textbf{1.21}  & 3.22  & 1.41 & 3.44  & 1.46 & 3.05  & 1.39 & \cellcolor[HTML]{A9A9A9}3.00 & \cellcolor[HTML]{A9A9A9}1.36 & \cellcolor[HTML]{2B7BBA}\textbf{2.33 }  & \cellcolor[HTML]{89BEDC}\underline{1.21}  & \cellcolor[HTML]{A9A9A9}2.80 & \cellcolor[HTML]{A9A9A9}1.29 \\ \hline

Microsoft  & 2.51  & 1.10 & \cellcolor[HTML]{2B7BBA}\textbf{2.13 }  & \cellcolor[HTML]{2B7BBA}\textbf{1.06}  & 3.55  & 1.43 & 2.85  & 1.18 & 2.49  & 1.15 & \cellcolor[HTML]{89BEDC}\underline{2.40}  & \cellcolor[HTML]{89BEDC}\underline{1.10}  & \cellco

# RQ3
Zero shot 

In [27]:
df = rows_df[(rows_df['pred_len'].isin([1, 24])) & (rows_df['percent']==0)]

In [29]:
for dataset in datasets:
    print(f'{datasets_map[dataset]} ', end='')
    maes, mses = [], []
    for model in models[-3:]:
        result = df[
            (df['dataset'] == dataset) & (df['model'] == model)
        ][['mse', 'mae']].values
        
        if len(result) == 0:
            # print(f' & & ', end='')
            maes.append(np.nan)
            mses.append(np.nan)
            continue
            
        mse, mae = result[0][0], result[0][1]
        # print(f' & {mse:.2f} & {mae:.2f}', end='')
        maes.append(mae)
        mses.append(mse)
        
    print_rows(maes=maes, mses=mses)

S\&P 500  & 5.04  & 1.91 & \cellcolor[HTML]{89BEDC}\underline{3.98}  & \cellcolor[HTML]{2B7BBA}\textbf{1.74}  & \cellcolor[HTML]{2B7BBA}\textbf{3.89 }  & \cellcolor[HTML]{89BEDC}\underline{1.76}  \\ \hline

Apple  & 4.17  & 1.61 & \cellcolor[HTML]{89BEDC}\underline{3.36}  & \cellcolor[HTML]{89BEDC}\underline{1.44}  & \cellcolor[HTML]{2B7BBA}\textbf{3.05 }  & \cellcolor[HTML]{2B7BBA}\textbf{1.36}  \\ \hline

Microsoft  & 5.13  & 1.81 & \cellcolor[HTML]{89BEDC}\underline{4.12}  & \cellcolor[HTML]{89BEDC}\underline{1.62}  & \cellcolor[HTML]{2B7BBA}\textbf{3.96 }  & \cellcolor[HTML]{2B7BBA}\textbf{1.59}  \\ \hline

Crude Oil  & 3.05  & 1.39 & \cellcolor[HTML]{89BEDC}\underline{2.21}  & \cellcolor[HTML]{89BEDC}\underline{1.18}  & \cellcolor[HTML]{2B7BBA}\textbf{1.89 }  & \cellcolor[HTML]{2B7BBA}\textbf{1.08}  \\ \hline

Gold  & 6.15  & 1.95 & \cellcolor[HTML]{89BEDC}\underline{5.12}  & \cellcolor[HTML]{2B7BBA}\textbf{1.76}  & \cellcolor[HTML]{2B7BBA}\textbf{5.00 }  & \cellcolor[HTML]{89BEDC

# RQ4

Forecast at 48 and 96 

In [30]:
for dataset in datasets[:-1]:
    for horizon in [48, 96]:
        df = rows_df[(rows_df['pred_len']==horizon) & (rows_df['percent']==100)]
        print(f'{datasets_map[dataset]} & {horizon}', end='')
        maes, mses = [], []
        for model in models:
            result = df[
                (df['dataset'] == dataset) & (df['model'] == model)
            ][['mse', 'mae']].values
            
            if len(result) == 0:
                # print(f' & & ', end='')
                maes.append(np.nan)
                mses.append(np.nan)
                continue
                
            mse, mae = result[0][0], result[0][1]
            # print(f' & {mse:.2f} & {mae:.2f}', end='')
            maes.append(mae)
            mses.append(mse)
            
        print_rows(maes=maes, mses=mses)

S\&P 500 & 48 & 2.24  & 1.15 & 2.05  & 1.08 & 2.30  & 1.13 & \cellcolor[HTML]{2B7BBA}\textbf{1.51 }  & \cellcolor[HTML]{2B7BBA}\textbf{0.93}  & 2.62  & 1.26 & \cellcolor[HTML]{A9A9A9}2.10 & \cellcolor[HTML]{A9A9A9}1.11 & \cellcolor[HTML]{A9A9A9}1.98 & \cellcolor[HTML]{A9A9A9}1.08 & \cellcolor[HTML]{89BEDC}\underline{1.63}  & \cellcolor[HTML]{89BEDC}\underline{0.94}  \\ \hline

S\&P 500 & 96 & 9.84  & 2.20 & 5.31  & \cellcolor[HTML]{89BEDC}\underline{1.65}  & 6.17  & 1.78 & 5.72  & 1.71 & 6.54  & 1.87 & \cellcolor[HTML]{89BEDC}\underline{5.25}  & \cellcolor[HTML]{A9A9A9}1.67 & \cellcolor[HTML]{A9A9A9}5.93 & \cellcolor[HTML]{A9A9A9}1.73 & \cellcolor[HTML]{2B7BBA}\textbf{4.11 }  & \cellcolor[HTML]{2B7BBA}\textbf{1.43}  \\ \hline

Apple & 48 & \cellcolor[HTML]{89BEDC}\underline{3.24}  & \cellcolor[HTML]{89BEDC}\underline{1.40}  & 3.99  & 1.51 & 4.24  & 1.58 & 3.63  & 1.46 & 3.82  & 1.55 & \cellcolor[HTML]{2B7BBA}\textbf{3.07 }  & \cellcolor[HTML]{2B7BBA}\textbf{1.37}  & \cellcolor[HTML]{A9

# RQ5

Execution time and memory

In [32]:
df = rows_df[(rows_df['pred_len'].isin([1, 24])) & (rows_df['percent']==100)]

In [34]:
for dataset in datasets:
    print(f'{datasets_map[dataset]} ', end='')
    times, memories = [], []
    for model in models:
        result = df[
            (df['dataset'] == dataset) & (df['model'] == model)
        ][['time', 'memory']].values
        
        if len(result) == 0:
            # print(f' & & ', end='')
            times.append(np.nan)
            memories.append(np.nan)
            continue
            
        time_spent, memory = result[0][0], result[0][1]
        # print(f' & {mse:.2f} & {mae:.2f}', end='')
        times.append(time_spent)
        memories.append(memory)
        
    min_memory, min_time, min_memory_2nd, min_time_2nd = get_mins(mses=times, maes=memories)
    
    for i in range(len(models)):
        if i == min_time:
            print(best_color + f'{times[i]:.1f} '+ end_bracket, end='')
        elif i == min_time_2nd:
            print(second_best_color + f'{times[i]:.1f} '+ end_bracket, end='')
        elif models[i] in llm_models:
            print(llm_color + f'{times[i]:.1f}', end='')
        else:
            print(f' & {times[i]:.1f} ', end='')
            
        if i == min_memory:
            print(best_color + f'{memories[i]:.0f} '+ end_bracket, end='')
        elif i == min_memory_2nd:
            print(second_best_color + f'{memories[i]:.0f} '+ end_bracket, end='')
        elif models[i] in llm_models:
            print(llm_color + f'{memories[i]:.0f}', end='')
        else:
            print(f' & {memories[i]:.0f} ', end='')
        
    print(f' \\\\ \\hline')
    print()

S\&P 500  & \cellcolor[HTML]{2B7BBA}\textbf{2.0 }  & \cellcolor[HTML]{2B7BBA}\textbf{16 }  & 3.4  & 19  & 19.5  & 170  & 4.3  & 18  & \cellcolor[HTML]{89BEDC}\underline{2.8 }  & \cellcolor[HTML]{89BEDC}\underline{18 }  & \cellcolor[HTML]{A9A9A9}37.5 & \cellcolor[HTML]{A9A9A9}1127 & \cellcolor[HTML]{A9A9A9}12.1 & \cellcolor[HTML]{A9A9A9}1473 & \cellcolor[HTML]{A9A9A9}5.9 & \cellcolor[HTML]{A9A9A9}349 \\ \hline

Apple  & \cellcolor[HTML]{2B7BBA}\textbf{2.2 }  & \cellcolor[HTML]{2B7BBA}\textbf{16 }  & 4.1  & 19  & 24.0  & 167  & 4.7  & 18  & \cellcolor[HTML]{89BEDC}\underline{3.3 }  & \cellcolor[HTML]{89BEDC}\underline{18 }  & \cellcolor[HTML]{A9A9A9}53.5 & \cellcolor[HTML]{A9A9A9}1127 & \cellcolor[HTML]{A9A9A9}13.1 & \cellcolor[HTML]{A9A9A9}1470 & \cellcolor[HTML]{A9A9A9}5.4 & \cellcolor[HTML]{A9A9A9}349 \\ \hline

Microsoft  & \cellcolor[HTML]{2B7BBA}\textbf{2.6 }  & \cellcolor[HTML]{2B7BBA}\textbf{16 }  & 3.3  & 19  & 17.4  & 170  & 5.0  & 18  & \cellcolor[HTML]{89BEDC}\underline{3.3 }