In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict

In [2]:
def parse_results(filename:str) -> pd.DataFrame:
    "Parse results text filename and creates a pandas DataFrame"
    
    data = defaultdict(list)
    with open(filename) as file:
        for line in file:
#             print(line)
            if line == '\n':
                continue
            if line[:3] == 'mse':
                result = line.replace(" ", "").split(',')
                data['mse'].append(round(float(result[0].split(':')[1]),8))
                data['mae'].append(round(float(result[1].split(':')[1]),8))
                data['rmse'].append(round(float(result[2].split(':')[1]),8))
                data['mape'].append(round(float(result[3].split(':')[1]),8))
                data['mspe'].append(round(float(result[4].split(':')[1]),8))
                data['rse'].append(round(float(result[5].split(':')[1]),8))
                data['R2'].append(round(float(result[6].split(':')[1]),8))
            else:
                attributes = line.split(sep='_')
#                 print(attributes[0])
                data['model'].append(attributes[0])
                data['lookback'].append(int(attributes[1].split('-')[1]))
                data['future'].append(int(attributes[3].split('-')[1]))
                data['pos'].append(attributes[5].split('-')[1])
                data['val'].append(attributes[6].split('-')[1])
                data['temp'].append(attributes[7].split('-')[1].split(' ')[0])

    return pd.DataFrame.from_dict(data)

def parse_lstms(filename:str) -> pd.DataFrame:
    "Parse results text filename and creates a pandas DataFrame"
    
    data = defaultdict(list)
    with open(filename) as file:
        for line in file:
            if line == '\n':
                continue
            if line[:3] == 'mse':
                result = line.replace(" ", "").split(',')
                data['mse'].append(round(float(result[0].split(':')[1]),8))
                data['mae'].append(round(float(result[1].split(':')[1]),8))
                data['rmse'].append(round(float(result[2].split(':')[1]),8))
                data['mape'].append(round(float(result[3].split(':')[1]),8))
                data['R2'].append(round(float(result[4].split(':')[1]),8))
            else:
                attributes = line.split(sep='_')
                model = attributes[0]
                data['lookback'].append(int(attributes[2].split('-')[1]))
                data['future'].append(int(attributes[3].split('-')[1]))

    return pd.DataFrame.from_dict(data)

In [3]:
formers_org = parse_results('result-former-all-combinations.txt')
formers_red = parse_results('reduced_data_result-former-all-combinations.txt')
lstms_org = parse_lstms('result-lstm-original-data.txt')
lstms_red = parse_lstms('result-lstm-reduced-data.txt')

In [11]:
formers_org.shape

(1460, 13)

In [12]:
formers_red.shape

(1435, 13)

In [None]:
# formers_org.model.unique()
# formers_org.sort_values(by=['lookback','future']).head(20)
# formers_org.sort_values(by=['model'], asc = 'False').head(15)

In [None]:
formers_org.shape

In [19]:
def merge_dfs(formers, lstms):
    df = pd.merge(formers, lstms, how="left", on=["lookback", "future"],suffixes=('_T', '_L'))
    cols = ['model', 'lookback', 'future','pos','val','temp','mse_T','mse_L','mae_T','mae_L',
           'rmse_T','rmse_L','mape_T','mape_L','R2_T','R2_L','mspe','rse']
    df = df[cols]
    return df

df_org = merge_dfs(formers_org, lstms_org)
df_org['data_type'] = 'original'
df_red = merge_dfs(formers_red, lstms_red)
df_red['data_type'] = 'reduced'

In [24]:
df_merged = pd.concat([df_org, df_red], ignore_index=True, sort=False)
df_merged.head()

Unnamed: 0,model,lookback,future,pos,val,temp,mse_T,mse_L,mae_T,mae_L,rmse_T,rmse_L,mape_T,mape_L,R2_T,R2_L,mspe,rse,data_type
0,Transformer,6,1,False,False,False,0.002091,0.008213,0.032362,0.071466,0.04573,0.090624,0.012369,0.029562,0.991758,0.967568,0.000276,0.090785,original
1,Transformer,6,12,False,False,False,0.008831,0.013306,0.071367,0.086602,0.093975,0.115353,0.029184,0.035288,0.964754,0.946917,0.001467,0.187563,original
2,Transformer,6,24,False,False,False,0.011372,0.108465,0.079201,0.290589,0.106638,0.32934,0.033029,0.128159,0.953927,0.562542,0.002017,0.214054,original
3,Transformer,6,96,False,False,False,0.034951,0.546846,0.141772,0.68089,0.186953,0.739491,0.059572,0.307729,0.843872,-1.378255,0.006445,0.389026,original
4,Transformer,6,192,False,False,False,0.064917,0.738369,0.201917,0.790637,0.254789,0.859284,0.086723,0.359409,0.676272,-2.498568,0.012703,0.55305,original


In [26]:
df_merged.to_csv('results.csv',index=False)

In [25]:
print(df_org.shape)
print(df_red.shape)
print(df_merged.shape)

(1460, 19)
(1435, 19)
(2895, 19)


In [14]:
df_org.to_csv('./result_org.csv')
df_red.to_csv('./result_red.csv')

In [None]:
# # lookback = [6 12 24 48 72 96 120 144 168 192 336 504 672 720]
# fig, axes = plt.subplots(1, 2, figsize=(15, 5))
# lookback = 72
# pos = "True"
# temp = "True"
# val = "True"
# df1 = df[(df['lookback'] == lookback) & (df['pos'] == pos) & (df['temp'] == temp) & (df['val'] == val) & (df['future'] <=96)]
# df2 = df[(df['lookback'] == lookback) & (df['pos'] == pos) & (df['temp'] == temp) & (df['val'] == val) & (df['future'] >96)]
# df1.plot(ax=axes[0], x='future', y=['mse_T', 'mse_L'])
# df2.plot(ax=axes[1], x='future', y=['mse_T', 'mse_L'])

In [None]:
# df[(df['lookback'] == lookback) & (df['pos'] == pos) & (df['temp'] == temp) & (df['val'] == val)]

In [18]:
lookback = 6
future = 1
model = 'Transformer'
pos = "True"
temp = "True"
val = "True"
df_org[(df_org['model'] == model) & (df_org['lookback'] == lookback) & (df_org['pos'] == pos) & (df_org['temp'] == temp) 
       & (df_org['val'] == val) & (df_org['future'] == future) ]

Unnamed: 0,model,lookback,future,pos,val,temp,mse_T,mse_L,mae_T,mae_L,rmse_T,rmse_L,mape_T,mape_L,R2_T,R2_L,mspe,rse
641,Transformer,6,1,True,True,True,0.00221,0.008213,0.033092,0.071466,0.047012,0.090624,0.012612,0.029562,0.99129,0.967568,0.000288,0.093329
