In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import os
from ts_modelling.data_producer import DataProducer
from ts_modelling.simple_experiment import SimpleExp
from argparse import Namespace
import yaml
import torch
%matplotlib inline

with open('args.yaml', 'r') as file:
    args_dict = yaml.safe_load(file)
args = Namespace(**args_dict)

## first experiment setup:
We want to grid test a bunch of training/testing setups. For different flavours of data:
- odd sine 
- even sine 
- trends
- combinations of the above
- "joker": other trends + other frequencies


In [2]:
import time
exp_time_start = time.time()

length = 364*24

odd_sine_producer = DataProducer(length=length, n_vars=1, path=args.root_path, noise_amp=0.5)
even_sine_producer = DataProducer(length=length, n_vars=1, path=args.root_path, noise_amp=0.5)
trends_producer = DataProducer(length=length, n_vars=1, path=args.root_path, noise_amp=0.5)

odd_sine_producer.add_sine([(3/(24*7), 1), (5/(24*7), 1)])
even_sine_producer.add_sine([(2/(24*7), 1), (4/(24*7), 1)])
trends_producer.add_trend(5, 15/length)

combination_producer = DataProducer(length=length, n_vars=1, path=args.root_path, noise_amp=0.5)
combination_producer.add_sine([(3/(24*7), 1), (5/(24*7), 1), (2/(24*7), 1), (4/(24*7), 1)])
combination_producer.add_trend(5, 15/length)
combination_producer.generate_csv('combination.csv')

joker_producer = DataProducer(length=length, n_vars=1, path=args.root_path, noise_amp=0.5)
joker_producer.add_sine([(1/(24), 1), (1/(24*5), 1), (1/(24*20), 1), (1/(24*180), 1)])
joker_producer.add_trend(7, 30/length)
joker_producer.generate_csv('joker.csv')

data_dict = {
    'odd_sine': odd_sine_producer,
    'even_sine': even_sine_producer,
    'trends': trends_producer
}

for data in data_dict:
    data_dict[data].generate_csv(data_path=data+'.csv')

pretrain_epoch_list = [1, 5, 10]
finetune_epoch_list = [1, 5, 10]
train_head_epochs = 5

total_number_of_experiments = len(pretrain_epoch_list)*len(finetune_epoch_list)*len(data_dict)*len(data_dict)
counter = 0
for pretrain_epochs in pretrain_epoch_list:
    for finetune_epochs in finetune_epoch_list:
        for pretrain_data_name in data_dict:
            args.pretrain_data = {pretrain_data_name+'.csv': pretrain_epochs}
            
            for finetune_data_name in data_dict:
                counter += 1
                print(f'Experiment number: {counter}/{total_number_of_experiments}')
                args.train_head_data = {finetune_data_name+'.csv': train_head_epochs}
                args.finetune_data = {finetune_data_name+'.csv': finetune_epochs}
                
                exp = SimpleExp(args)
                model_name = f'PTST_pt-{pretrain_data_name}-{pretrain_epochs}_ft-{finetune_data_name}-{finetune_epochs}'
                exp.change_model_name(model_name)
                
                print()                
                exp.pretrain_model()
                
                print()                
                exp.train_predict_head()
                
                print()
                exp.finetune_model()
                
                for test_data_name in data_dict:
                    print()
                    exp.test(test_data_name+'.csv')

                
                print()                
                exp.test('combination.csv')

                print() 
                exp.test('joker.csv')
                    
                print(10*'-')

                exp.plot_preds(show=False)

total_time = time.time()-exp_time_start
minutes = total_time // 60
seconds = total_time % 60

print(f'Total experiment time: {int(minutes)} minutes {seconds} seconds')

Use CPU
self_supervised head
Trainable parameters:        1476
Total parameters:            1477
Training on data: odd_sine.csv
train 2090
val 242
test 577
Total training time: 0 minutes 0.7890269756317139 seconds
Trainable parameters:       17376
Total parameters:           18697
Training on data: odd_sine.csv
train 2090
val 242
test 577
Total training time: 0 minutes 0.5637621879577637 seconds
Trainable parameters:       18697
Total parameters:           18697
Training on data: odd_sine.csv
train 2090
val 242
test 577
Total training time: 0 minutes 0.6203978061676025 seconds
test 577
Use CPU
supervised head
Trainable parameters:        1476
Total parameters:            1477
Training on data: odd_sine.csv
train 2090
val 242
test 577
Total training time: 0 minutes 0.5353710651397705 seconds
Trainable parameters:       17376
Total parameters:           18697
Training on data: odd_sine.csv
train 2090
val 242
test 577
Total training time: 0 minutes 0.271435022354126 seconds
Trainable para

In [5]:
import pandas as pd
df = pd.read_csv('./test_results/metrics.csv')
df

Unnamed: 0,model_name,pretrain_data,train_head_data,finetune_data,test_data,mae,mse,rmse,mape,mspe,RSE
0,PTST_pt-odd_sine_ft-odd_sine,odd_sine-1,odd_sine-1,odd_sine-1,odd_sine,0.88685,1.185746,1.08892,6.070765,3294.414,1.067823
1,PTST_pt-odd_sine_ft-odd_sine,odd_sine-1,odd_sine-1,odd_sine-1,even_sine,0.920424,1.283475,1.132905,3.226424,198.36586,1.124267
2,PTST_pt-odd_sine_ft-even_sine,odd_sine-1,even_sine-1,even_sine-1,odd_sine,0.905887,1.24731,1.11683,6.064373,3495.892,1.095192
3,PTST_pt-odd_sine_ft-even_sine,odd_sine-1,even_sine-1,even_sine-1,even_sine,0.854129,1.121322,1.058925,2.937047,170.28098,1.050851
4,PTST_pt-even_sine_ft-odd_sine,even_sine-1,odd_sine-1,odd_sine-1,odd_sine,0.864889,1.126819,1.061517,5.947396,3240.2332,1.040951
5,PTST_pt-even_sine_ft-odd_sine,even_sine-1,odd_sine-1,odd_sine-1,even_sine,0.918734,1.281816,1.132173,3.208324,198.39885,1.12354
6,PTST_pt-even_sine_ft-even_sine,even_sine-1,even_sine-1,even_sine-1,odd_sine,0.906754,1.249866,1.117974,6.344046,3850.8723,1.096314
7,PTST_pt-even_sine_ft-even_sine,even_sine-1,even_sine-1,even_sine-1,even_sine,0.851091,1.11335,1.055154,3.019632,183.3237,1.047109


In [6]:
df.groupby('model_name')['mse'].mean().sort_values()

Use CPU
supervised head
Trainable parameters:      106508
Total parameters:          106509
Training on data: even_sine.csv
train 5606
val 686
test 1561
Validation loss decreased (inf --> 0.789066).  Saving model ...
Updating learning rate to 2.8083122651248297e-05
Total training time: 0 minutes 4.100135087966919 seconds
Trainable parameters:      712896
Total parameters:          817857
Training on data: even_sine.csv
train 5606
val 686
test 1561
Validation loss decreased (inf --> 0.032377).  Saving model ...
Updating learning rate to 2.8083122651248297e-05
Total training time: 0 minutes 2.50844407081604 seconds
Trainable parameters:      817857
Total parameters:          817857
Training on data: even_sine.csv
train 5606
val 686
test 1561
Validation loss decreased (inf --> 0.030351).  Saving model ...
Updating learning rate to 2.8083122651248297e-05
Total training time: 0 minutes 4.934921979904175 seconds


In [8]:
import time
exp_time_start = time.time()

data_list = ['odd_sine', 'even_sine', 'trends']

train_epoch_list = [1, 3, 5, 10]

for train_epochs in train_epoch_list:
    for train_data in data_list:
        exp = SimpleExp(args)
        model_name = f'PTST_train_only-{train_data}-{train_epochs}'
        exp.change_model_name(model_name)
        
        print()
        print(f'Training: {model_name} on data: {train_data} for {train_epochs} epochs')
        exp.train()

        for test_data_name in data_list:
            print()
            print(f'Testing {model_name} on data: {test_data_name}')
            args.test_data = test_data_name+'.csv'
            exp.test()


        args.test_data = 'combination.csv'
        print()
        print(f'Testing {model_name} on data: combination.csv')
        exp.test()

        args.test_data = 'joker.csv'
        print()
        print(f'Testing {model_name} on data: joker.csv')
        exp.test()

        print(10*'-')

        exp.plot_preds(show=False)

total_time = time.time()-exp_time_start
minutes = total_time // 60
seconds = total_time % 60

print(f'Total experiment time: {int(minutes)} minutes {seconds} seconds')

test 1561
