# setup

In [None]:
import torch
if 'google.colab' in str(get_ipython()):
  from google.colab import drive
  drive.mount('/content/drive')
  mydrive='/content/drive/MyDrive/volrt/'
  source_path=mydrive+"git/Crossformer"
  tables = ['volvN.csv', 'volvT.csv', 'volvA.csv', 'volvG.csv', ]
  batch_size=int(torch.cuda.get_device_properties(0).total_memory/1e9+.5)*64
  import os,sys
  if not os.path.exists(source_path):
    %cd $mydrive/git
    !git clone https://github.com/jerronl/Crossformer.git
    %cd $source_path
  else:
    %cd $source_path
    !git reset --hard HEAD
    !git pull origin master
  !pip install einops #scikit-learn
  sys.path.append( source_path)
else:
  tables = ['volvA.csv']
  mydrive= 'E:/mydoc/git/trade/analyics/'
  batch_size=32
%cd $mydrive


import argparse
import torch

from cross_exp.exp_crossformer import Exp_crossformer
from utils.tools import string_split


parser = argparse.ArgumentParser(description="CrossFormer")

parser.add_argument("--data", type=str, default="vols", help="data")
parser.add_argument(
    "--root_path", type=str, default=mydrive, help="root path of the data file"
)
parser.add_argument("--data_path", type=list, default=tables, help="data file")
parser.add_argument(
    "--data_split",
    type=str,
    default="0.7,0.1,0.2",
    help="train/val/test split, can be ratio or number",
)
parser.add_argument(
    "--checkpoints",
    type=str,
    default="./checkpoints/",
    help="location to store model checkpoints",
)

parser.add_argument("--in_len", type=int, default=20, help="input MTS length (T)")
parser.add_argument("--out_len", type=int, default=1, help="output MTS length (\tau)")
parser.add_argument("--seg_len", type=int, default=5, help="segment length (L_seg)")
parser.add_argument(
    "--win_size", type=int, default=2, help="window size for segment merge"
)
parser.add_argument(
    "--factor",
    type=int,
    default=10,
    help="num of routers in Cross-Dimension Stage of TSA (c)",
)

parser.add_argument(
    "--d_model", type=int, default=256, help="dimension of hidden states (d_model)"
)
parser.add_argument(
    "--d_ff", type=int, default=512, help="dimension of MLP in transformer"
)
parser.add_argument("--n_heads", type=int, default=4, help="num of heads")
parser.add_argument("--e_layers", type=int, default=3, help="num of encoder layers (N)")
parser.add_argument("--dropout", type=float, default=0.2, help="dropout")

parser.add_argument(
    "--baseline",
    action="store_true",
    help="whether to use mean of past series as baseline for prediction",
    default=False,
)

parser.add_argument(
    "--num_workers", type=int, default=0, help="data loader num workers"
)
parser.add_argument(
    "--batch_size", type=int, default=32, help="batch size of train input data"
)
parser.add_argument("--train_epochs", type=int, default=20, help="train epochs")
parser.add_argument("--patience", type=int, default=3, help="early stopping patience")
parser.add_argument(
    "--learning_rate", type=float, default=1e-4, help="optimizer initial learning rate"
)
parser.add_argument("--lradj", type=str, default="type1", help="adjust learning rate")
parser.add_argument("--itr", type=int, default=5, help="experiments times")

parser.add_argument(
    "--save_pred",
    action="store_true",
    help="whether to save the predicted future MTS",
    default=False,
)

parser.add_argument("--use_gpu", type=bool, default=True, help="use gpu")
parser.add_argument("--resume", type=bool, default=True, help="resume")
parser.add_argument("--cutday", type=str, default=None, help="resume")
# parser.add_argument("--use_gpu", type=bool, default=False, help="use gpu")
parser.add_argument("--gpu", type=int, default=0, help="gpu")
parser.add_argument(
    "--use_multi_gpu", action="store_true", help="use multiple gpus", default=False
)
parser.add_argument(
    "--devices", type=str, default="0,1,2,3", help="device ids of multile gpus"
)

args = parser.parse_args(args=[])

args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False

if args.use_gpu and args.use_multi_gpu:
    args.devices = args.devices.replace(" ", "")
    device_ids = args.devices.split(",")
    args.device_ids = [int(id_) for id_ in device_ids]
    args.gpu = args.device_ids[0]
    print(args.gpu)


def update_args(itr):
    if args.data in data_parser.keys():
        data_info = data_parser[args.data]
        for k,v in data_info.items():
            args.__setattr__(k,v)
    if isinstance(args.data_split, str) :
        args.data_split = string_split(args.data_split)

    print("Args in experiment:")
    print(args)
    setting = "Crossformer_il{}_ol{}_sl{}_win{}_fa{}_dm{}_nh{}_el{}_itr{}".format(
        args.in_len,
        args.out_len,
        args.seg_len,
        args.win_size,
        args.factor,
        args.d_model,
        args.n_heads,
        args.e_layers,
        itr,
    )
    return setting
from cross_exp.exp_crossformer import Exp_crossformer
import seaborn as sns, numpy as np,math
import matplotlib.pyplot as plt


def regplot(cols,figsize=(16, 16)):
  cnt=len(dep_var)-dep_var.count('_')
  figs = min(cnt, cols)
  _, axes = plt.subplots(math.ceil(cnt / figs), figs, figsize=figsize)
  j=0

  for i, name in enumerate(dep_var):
      if name!='_':
        axs = axes.flat[j] if figs > 1 else axes
        j=j+1
        left,right = 999,-999
        for ii in range(len(tables)):
          preds, trues = results[ii]
          sns.regplot(
              ax=axs,
              x=trues[:, i],
              y=preds[:, i],
              scatter_kws={"color": f"C{ii}", "alpha": 0.3},
              line_kws={"color": f"C{ii}", "alpha": 0.3},
              label=tables[ii],
          )
          mask = ~np.isnan(trues[:, i])
          if not dep_var[i][:3] in ["dtm", "pmc"]:
              left=min(left,max(np.min(trues[:, i][mask]), -2.5))
              right=max(right,min(np.max(trues[:, i][mask]), 2.5))
          else:
              left=min(left,np.min(trues[:, i][mask]))
              right=max(right,np.max(trues[:, i][mask]))
        axs.set_title(name)
        axs.set_xlim(left=left, right=right)
        axs.legend()

  plt.show()    

Mounted at /content/drive
/content/drive/MyDrive/volrt/git/Crossformer


# train

In [None]:
from cross_exp.exp_crossformer import Exp_crossformer
data_parser = {
    "vols": {
        "patience":40,
        "train_epochs":500,
        'learning_rate':2e-3,
        'data_split':[0.8,0.1,0.1],
        'batch_size':batch_size*2//5,
        'e_layers':5,
        'd_model':512,
        'lradj':'type2',
    },
    }
for i in range(3):
    for ii in range(args.itr):
        # setting record of experiments
        setting = update_args(ii)

        exp = Exp_crossformer(args)  # set experiments
        print(f">>>>>>>start training : {setting}>>>>>>>>>>>>>>>>>>>>>>>>>>")
        exp.train(setting, "vols")

        print(f">>>>>>>testing : {setting}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
        preds, trues = exp.test(setting, 'vols', True, data_path=[tables[-1]], inverse=True)
        print(preds.shape, trues.shape)

        exp.train(setting, "prcs")
        preds, trues = exp.test(setting, 'prcs', True, data_path=[tables[-1]], inverse=True)
        print(preds.shape, trues.shape)


In [None]:
from google.colab import runtime
runtime.unassign()

# test

In [None]:

data_parser = {
    "vols": {
        'batch_size':batch_size,
        'cutday':'#2024-02-01',
    },
    }

setting=update_args(0)
exp = Exp_crossformer(args) 
print(f">>>>>>>testing : {setting}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
dep_var=['level0', 'slope0', 'curve0', 'level1', 'slope1', 'curve1', 'level2', 'slope2', 'curve2', 'level3', 'slope3', 'curve3']
results = []
for table in tables:
    results.append(exp.test(setting, 'vols', True, data_path=[table], inverse=True))
regplot(3)

In [None]:

data_parser = {
    "vols": {
        'batch_size':batch_size,
        'cutday':'#2023-09-01',
    },
    }
for i in range(5):
  setting=update_args(i)
  exp = Exp_crossformer(args) 
  print(f">>>>>>>testing : {setting}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
  dep_var=['level0', '_', '_', 'level1', '_', '_', 'level2', '_', '_', 'level3', '_', '_']
  results = []
  for table in tables:
      results.append(exp.test(setting, 'vols', True, data_path=[table], inverse=True))
  regplot(4,(20,5))

In [None]:

data_parser = {
    "vols": {
        'batch_size':batch_size,
        'cutday':'#2024-02-01',
    },
    }

setting=update_args(2)
exp = Exp_crossformer(args) 
print(f">>>>>>>testing : {setting}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
dep_var=['level0', 'slope0', 'curve0', 'level1', 'slope1', 'curve1', 'level2', 'slope2', 'curve2', 'level3', 'slope3', 'curve3']
results = []
for table in tables:
    results.append(exp.test(setting, 'prcs', True, data_path=[table], inverse=True))
regplot(2)
for i,result in enumerate(results):
  preds, trues = result
  preds=preds[:,-22:].argmax(1)
  sns.jointplot(x=trues[:,0] , y=preds,kind="kde",fill=True,rug=True, color=f'C{i+1}')

In [None]:
from google.colab import runtime
runtime.unassign()