In [16]:
class trainer:
    def __init__(
        self,
        scaler,
        channel,
        num_nodes,
        seq_len,
        pred_len,
        dropout_n,
        d_llm,
        e_layer,
        head,
        lrate,
        wdecay,
        feature_w,
        fcst_w,
        recon_w,
        att_w,
        device,
        epochs
    ):
        self.model = Dual(
            device=device, channel=channel, num_nodes=num_nodes, seq_len=seq_len, pred_len=pred_len, 
            dropout_n=dropout_n, d_llm=d_llm, e_layer=e_layer, head=head
        )
        
        self.optimizer = optim.AdamW(self.model.parameters(), lr=lrate, weight_decay=wdecay)
        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max=min(epochs, 100), eta_min=1e-8, verbose=True)
        self.MSE = MSE
        self.MAE = MAE
        self.clip = 5
        self.scaler = scaler
        self.device = device

        self.feature_loss = 'smooth_l1'  
        self.fcst_loss = 'smooth_l1'
        self.recon_loss = 'smooth_l1'
        self.att_loss = 'smooth_l1'   
        self.fcst_w = 1
        self.recon_w = 0.5
        self.feature_w = 0.1     
        self.att_w = 0.01
        self.criterion = KDLoss(self.feature_loss, self.fcst_loss, self.recon_loss, self.att_loss,  self.feature_w,  self.fcst_w,  self.recon_w,  self.att_w)

        # print("The number of trainable parameters: {}".format(self.model.count_trainable_params()))
        print("The number of parameters: {}".format(self.model.param_num()))
        print(self.model)

    def train(self, x, y, emb):
        self.model.train()
        self.optimizer.zero_grad()
        ts_enc, prompt_enc, ts_out, prompt_out, ts_att, prompt_att = self.model(x, emb)
        loss = self.criterion(ts_enc, prompt_enc, ts_out, prompt_out, ts_att, prompt_att, y)
        loss.backward()
        if self.clip is not None:
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip) 
        self.optimizer.step() 
        mse = self.MSE(ts_out, y) 
        mae = self.MAE(ts_out, y)
        return loss.item(), mse.item(), mae.item()

    def eval(self, x, y, emb):
        self.model.eval()
        with torch.no_grad():
            ts_enc, prompt_enc, ts_out, prompt_out, ts_att, prompt_att = self.model(x, emb)
            loss = self.criterion(ts_enc, prompt_enc, ts_out, prompt_out, ts_att, prompt_att, y)
            mse = self.MSE(ts_out, y)
            mae = self.MAE(ts_out, y)
        return loss.item(), mse.item(), mae.item()
        

In [22]:
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--device", type=str, default="cuda:6", help="")
    parser.add_argument("--data_path", type=str, default="ETTh1", help="data path")
    parser.add_argument("--channel", type=int, default=512, help="number of features")
    parser.add_argument("--num_nodes", type=int, default=7, help="number of nodes")
    parser.add_argument("--seq_len", type=int, default=96, help="seq_len")
    parser.add_argument("--pred_len", type=int, default=96, help="out_len")
    parser.add_argument("--batch_size", type=int, default=32, help="batch size")
    parser.add_argument("--lrate", type=float, default=1e-4, help="learning rate")
    parser.add_argument("--dropout_n", type=float, default=0.2, help="dropout rate of neural network layers")
    parser.add_argument("--d_llm", type=int, default=768, help="hidden dimensions")
    parser.add_argument("--e_layer", type=int, default=1, help="layers of transformer encoder")
    parser.add_argument("--head", type=int, default=8, help="heads of attention")
    parser.add_argument("--model_name", type=str, default="gpt2", help="llm")
    parser.add_argument("--weight_decay", type=float, default=1e-3, help="weight decay rate")
    parser.add_argument("--feature_w", type=float, default=0.01, help="weight of feature kd loss")
    parser.add_argument("--fcst_w", type=float, default=1, help="weight of forecast loss")
    parser.add_argument("--recon_w", type=float, default=0.5, help="weight of reconstruction loss")
    parser.add_argument("--att_w", type=float, default=0.01, help="weight of attention kd loss")
    parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers')
    parser.add_argument("--epochs", type=int, default=100, help="")
    parser.add_argument('--seed', type=int, default=2036, help='random seed')
    parser.add_argument(
        "--es_patience",
        type=int,
        default=50,
        help="quit if no improvement after this many iterations",
    )
    parser.add_argument(
        "--save",
        type=str,
        default="./logs/" + str(time.strftime("%Y-%m-%d-%H:%M:%S")) + "-",
        help="save path",
    )
    return parser.parse_args()
    

In [5]:
def load_data(args):
    data_map = {
        'ETTh1': Dataset_ETT_hour,
        'ETTh2': Dataset_ETT_hour,
        'ETTm1': Dataset_ETT_minute,
        'ETTm2': Dataset_ETT_minute
        }
    data_class = data_map.get(args.data_path, Dataset_Custom)
    train_set = data_class(flag='train', scale=True, size=[args.seq_len, 0, args.pred_len], data_path=args.data_path)
    val_set = data_class(flag='val', scale=True, size=[args.seq_len, 0, args.pred_len], data_path=args.data_path)
    test_set = data_class(flag='test', scale=True, size=[args.seq_len, 0, args.pred_len], data_path=args.data_path)
    
    scaler = train_set.scaler

    train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=False, drop_last=True, num_workers=args.num_workers)
    val_loader = DataLoader(val_set, batch_size=args.batch_size, shuffle=False, drop_last=True, num_workers=args.num_workers)
    test_loader = DataLoader(test_set, batch_size=1, shuffle=False, drop_last=False, num_workers=args.num_workers)

    return train_loader, val_loader, test_loader, scaler


In [18]:
import torch
from torch import optim
import numpy as np
import argparse
import time
import os
import random
from torch.utils.data import DataLoader

In [7]:
# from data_provider.data_loader_emb import Dataset_ETT_hour #, Dataset_ETT_minute, Dataset_Custom
# from model.TimeKD import Dual
# from utils.kd_loss import KDLoss
# from utils.metrics import MSE, MAE, metric
# import faulthandler
# faulthandler.enable()
# torch.cuda.empty_cache()
# os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:150"

In [33]:
import argparse
# args = parse_args()

### test h5 for 3D matrix



In [1]:
import h5py
import torch
import pandas as pd
from torch.utils.data import DataLoader

In [5]:

class Amex_Dataset:
    # def __init__(self,df_series,df_feature,uidxs,df_y=None):
    def __init__(self,df_series,uidxs,df_y=None,label_name = 'target',id_name = 'customer_ID'):
        self.df_series = df_series
        # self.df_feature = df_feature
        self.df_y = df_y
        self.uidxs = uidxs
        self.label_name = label_name
        self.id_name = id_name

    def __len__(self):
        return (len(self.uidxs))

    def __getitem__(self, index):
        i1,i2,idx = self.uidxs[index]
        series = self.df_series.iloc[i1:i2+1,1:].drop(['S_2'],axis=1).values
        time_ref = self.df_series.iloc[i1:i2+1,1:]['S_2']
        # series = self.df_series.iloc[i1:i2+1,1:].drop(['year_month','S_2'],axis=1).values

        if len(series.shape) == 1:
            series = series.reshape((-1,)+series.shape[-1:])
        # series_ = series.copy()
        # series_[series_!=0] = 1.0 - series_[series_!=0] + 0.001
        # feature = self.df_feature.loc[idx].values[1:]
        # feature_ = feature.copy()
        # feature_[feature_!=0] = 1.0 - feature_[feature_!=0] + 0.001
        
        emb_path = f"/export/home2/zongqi001/004_TimeKD/TimeKD/amex_emb/train/"
        file_path = os.path.join(emb_path, f"{idx}.h5")

        with h5py.File(file_path, 'r') as hf:
            emb_data = hf['stacked_embeddings'][:]
            emb_tensor = torch.from_numpy(emb_data)

        if self.df_y is not None:
            label = self.df_y.loc[idx,[self.label_name]].values
            return {
                    'SERIES': series,#np.concatenate([series,series_],axis=1),
                    # 'FEATURE': np.concatenate([feature,feature_]),
                    'LABEL': label,
                    'time_ref': time_ref,
                    'idx': idx,
                    'emb_tensor': emb_tensor,
                    }
        else:
            return {
                    'SERIES': series,#np.concatenate([series,series_],axis=1),
                    # 'FEATURE': np.concatenate([feature,feature_]),
                    'time_ref': time_ref,
                    'idx': idx,
                    }

    def collate_fn(self, batch):
        """
        Padding to same size.
        """

        batch_size = len(batch)
        batch_series = torch.zeros((batch_size, 13, batch[0]['SERIES'].shape[1]))
        batch_mask = torch.zeros((batch_size, 13))
        # batch_feature = torch.zeros((batch_size, batch[0]['FEATURE'].shape[0]))
        batch_y = torch.zeros(batch_size)
        batch_time_ref = np.array([sample['time_ref'] for sample in batch])
        batch_idx = np.array([sample['idx'] for sample in batch])
        batch_emb_tensor = None

        for i, item in enumerate(batch):
            v = item['SERIES']
            batch_series[i, :v.shape[0], :] = torch.tensor(v).float()
            batch_mask[i,:v.shape[0]] = 1.0
            # v = item['FEATURE'].astype(np.float32)
            # batch_feature[i] = torch.tensor(v).float()
            if self.df_y is not None:
                v = item['LABEL'].astype(np.float32)
                batch_y[i] = torch.tensor(v).float()
                batch_emb_tensor = torch.stack([sample['emb_tensor'] for sample in batch], dim=0) 

        return {'batch_series':batch_series
                ,'batch_mask':batch_mask
                # ,'batch_feature':batch_feature
                ,'batch_y':batch_y
                ,'batch_time_ref':batch_time_ref
                ,'batch_idx':batch_idx
                ,'batch_emb_tensor':batch_emb_tensor
                }

In [2]:
input_path = '/export/home2/zongqi001/000_data/amex/13month_0.1pct'


In [3]:
train_series     = pd.read_feather(f'{input_path}/df_nn_series_train.feather')
train_series_idx = pd.read_feather(f'{input_path}/df_nn_series_idx_train.feather').values

In [6]:
train_y = pd.read_csv(f'{input_path}/train_labels.csv')
train_dataset = Amex_Dataset(train_series,train_series_idx,train_y)
train_dataloader = DataLoader(train_dataset,batch_size=4,shuffle=True, drop_last=False, collate_fn=train_dataset.collate_fn,num_workers=4)

In [7]:
train_y.shape

(218, 2)

In [8]:
train_y.head(2)

Unnamed: 0,customer_ID,target
0,000f8675ede66cc6affd4c048db11a00246d7ee623f453...,0
1,00def60d36bbb3f6a51dcf0e8a999ab2c383813ec7e8ca...,1


In [15]:
train_series_idx[2]

array([26, 38,  2])

In [6]:
sampling = None
sampling = '1pct'

s = f'data_{sampling}' if sampling else f'data'
print(s)

data_1pct


In [23]:
from sklearn.model_selection import KFold, StratifiedKFold,GroupKFold
skf = StratifiedKFold(n_splits = 2, shuffle=True, random_state=42)
fold1, fold2 = skf.split(train_y,train_y['target'])
trn_index, val_index = fold1[0], fold1[1]

In [24]:
trn_index

array([  0,   3,   5,   6,   7,   9,  12,  13,  15,  17,  19,  22,  23,
        29,  34,  39,  40,  41,  42,  45,  46,  48,  49,  50,  51,  52,
        53,  55,  58,  60,  61,  62,  65,  66,  68,  69,  70,  72,  74,
        77,  81,  86,  90,  92,  96,  97,  99, 100, 102, 105, 107, 110,
       114, 117, 120, 121, 122, 123, 124, 126, 130, 131, 133, 136, 137,
       138, 139, 142, 144, 152, 156, 157, 158, 160, 161, 162, 163, 165,
       166, 169, 170, 172, 174, 175, 176, 178, 179, 183, 185, 186, 189,
       192, 193, 194, 196, 199, 200, 201, 202, 203, 204, 205, 207, 208,
       209, 210, 214, 216, 217])

In [25]:
val_index

array([  1,   2,   4,   8,  10,  11,  14,  16,  18,  20,  21,  24,  25,
        26,  27,  28,  30,  31,  32,  33,  35,  36,  37,  38,  43,  44,
        47,  54,  56,  57,  59,  63,  64,  67,  71,  73,  75,  76,  78,
        79,  80,  82,  83,  84,  85,  87,  88,  89,  91,  93,  94,  95,
        98, 101, 103, 104, 106, 108, 109, 111, 112, 113, 115, 116, 118,
       119, 125, 127, 128, 129, 132, 134, 135, 140, 141, 143, 145, 146,
       147, 148, 149, 150, 151, 153, 154, 155, 159, 164, 167, 168, 171,
       173, 177, 180, 181, 182, 184, 187, 188, 190, 191, 195, 197, 198,
       206, 211, 212, 213, 215])

In [11]:
from tqdm import tqdm
import os
import numpy as np

# for iter, data in enumerate(tqdm(train_dataloader)):
#     print(iter, len(data['batch_series']))

In [9]:
test_series     = pd.read_feather(f'{input_path}/df_nn_series_test.feather')
test_series_idx = pd.read_feather(f'{input_path}/df_nn_series_idx_test.feather').values
test_y = pd.read_csv(f'{input_path}/test_labels.csv')['target']

test_dataset = Amex_Dataset(test_series,test_series_idx)
test_dataloader = DataLoader(test_dataset,batch_size=4,shuffle=True, drop_last=False, collate_fn=test_dataset.collate_fn,num_workers=4)

In [10]:
test_y.head(2)

0    0
1    0
Name: target, dtype: int64

In [13]:
test_series_idx[1]

array([13, 25,  1])

In [13]:
for iter1, data in enumerate(tqdm(test_dataloader)):
    print(iter1, len(data['batch_series']))

100%|██████████| 19/19 [00:00<00:00, 21.99it/s]

0 4
1 4
2 4
3 4
4 4
5 4
6 4
7 4
8 4
9 4
10 4
11 4
12 4
13 4
14 4
15 4
16 4
17 4
18 1





In [14]:
from utils.tools import StandardScaler
from model.TimeKD import Dual
from utils.kd_loss import KDLoss
from utils.metrics import MSE, MAE, metric



In [19]:
engine = trainer(
    scaler=StandardScaler,
    channel=512,
    num_nodes=220,
    seq_len=13,
    pred_len=1,
    dropout_n=0.2,
    d_llm=768,
    e_layer=1,
    head=8,
    lrate=1e-4,
    wdecay=1e-3,
    feature_w=0.01,
    fcst_w=1,
    recon_w=0.5,
    att_w=0.01,
    device="cpu",
    epochs=100
    )

  from .autonotebook import tqdm as notebook_tqdm


Adjusting learning rate of group 0 to 1.0000e-04.
The number of parameters: 8809260
Dual(
  (normalize_layers): Normalize()
  (length_to_feature): Linear(in_features=13, out_features=512, bias=True)
  (token_to_feature): Linear(in_features=768, out_features=512, bias=True)
  (ts_encoder): Encoder(
    (attn_layers): ModuleList(
      (0): EncoderLayer(
        (attention): AttentionLayer(
          (inner_attention): FullAttention(
            (dropout): Dropout(p=0.2, inplace=False)
          )
          (query_projection): Linear(in_features=512, out_features=512, bias=True)
          (key_projection): Linear(in_features=512, out_features=512, bias=True)
          (value_projection): Linear(in_features=512, out_features=512, bias=True)
          (out_projection): Linear(in_features=512, out_features=512, bias=True)
        )
        (conv1): Conv1d(512, 3072, kernel_size=(1,), stride=(1,))
        (conv2): Conv1d(3072, 512, kernel_size=(1,), stride=(1,))
        (norm1): LayerNorm((5

In [None]:
# for iter1, data_last in enumerate(tqdm(train_dataloader)):
#     pass

100%|██████████| 55/55 [00:01<00:00, 47.49it/s]


In [48]:
for iter1, data_last in enumerate(tqdm(test_dataloader)):
    pass

100%|██████████| 19/19 [00:00<00:00, 22.59it/s]


In [21]:
data['batch_series'].shape

torch.Size([2, 13, 220])

In [22]:
y = data['batch_y']
x = data['batch_series']
emb_tensor = data['batch_emb_tensor']
device = 'cpu'
trainx = torch.Tensor(x).to(device).float()
trainy = torch.Tensor(y).to(device).float()
emb = torch.Tensor(emb_tensor).to(device).float()


In [23]:
metrics = engine.train(trainx, trainy, emb)

In [24]:
metrics

(0.12315793335437775, 0.2463158667087555, 0.47641435265541077)

In [33]:
aaa = next(iter(test_dataloader))
aaa['batch_series'].shape

torch.Size([4, 13, 220])

In [35]:
for item in aaa:
    print(item)

batch_series
batch_mask
batch_y
batch_time_ref
batch_idx
batch_emb_tensor


In [42]:
path = '/export/home2/zongqi001/004_TimeKD/TimeKD/logs/Amex/1_512_1_0.0001_0.2_42_0.01/best_model.pth'
engine.model.load_state_dict(torch.load(path, map_location=torch.device('cpu')), strict=False)

<All keys matched successfully>

In [47]:
data = aaa

x = data['batch_series']

testx = torch.Tensor(x).to(device).float()

print('testx shape:', testx.shape)

with torch.no_grad():
    preds = engine.model(testx, None)
print('preds len:', preds[2].shape)
print('preds:', preds)

testx shape: torch.Size([4, 13, 220])
preds len: torch.Size([4])
preds: (tensor([[[ 0.6451, -0.0119, -0.9287,  ..., -0.2492,  0.7468, -0.9597],
         [ 0.6600,  0.2520, -0.7379,  ...,  0.0831,  0.3457, -0.6377],
         [ 1.6661, -0.3234, -0.7198,  ...,  1.0208,  0.3167,  0.0920],
         ...,
         [ 1.6447,  0.2347, -0.8127,  ...,  0.1912,  0.0680, -0.5167],
         [ 1.4680,  0.5246, -0.4868,  ...,  0.2213,  0.1872,  0.2498],
         [ 1.4589, -0.3334,  0.4941,  ...,  0.5517,  0.3376, -0.2136]],

        [[ 0.4480,  0.4861, -0.2290,  ..., -1.9047,  0.5211,  0.2954],
         [ 0.6513, -0.6885,  0.6580,  ...,  0.0853, -0.3880,  0.2479],
         [ 2.8270,  0.2287, -1.4148,  ...,  0.9289, -0.4948, -0.1103],
         ...,
         [ 0.8056,  0.3727, -0.4812,  ...,  0.4020,  0.3636,  0.3260],
         [ 1.7412,  0.3133,  0.4320,  ...,  0.5747,  0.4761, -0.4457],
         [ 1.8720, -0.1302, -0.9207,  ...,  0.2930,  0.5951, -0.2599]],

        [[ 0.5499, -0.1574, -0.6122,  ..., 

In [54]:
testx.shape[0]

1

In [51]:
# data = aaa
data = data_last

x = data['batch_series']

testx = torch.Tensor(x).to(device).float()

print('testx shape:', testx.shape)

with torch.no_grad():
    preds = engine.model(testx, None)
print('preds len:', preds[2].shape)
print('preds:', preds)

testx shape: torch.Size([1, 13, 220])
preds len: torch.Size([])
preds: (tensor([[[ 0.0096, -1.4032,  1.0565,  ..., -0.2951, -0.7156, -0.4563],
         [-0.1042,  0.0216,  0.9074,  ..., -1.5325,  0.0642, -1.1931],
         [ 0.0074, -1.0180, -0.1519,  ..., -1.1090,  0.3688, -0.7764],
         ...,
         [ 0.1152, -0.1989,  0.3614,  ..., -0.8293,  0.0494, -0.7065],
         [ 0.7021, -0.8486, -0.1324,  ..., -0.6438, -0.6686, -0.5303],
         [-0.0576, -0.7737, -0.8084,  ..., -2.5327,  1.8250, -0.4494]]]), None, tensor(0.9931), None, None, None)


In [57]:
if testx.shape[0] == 1:
    pred_y =  torch.tensor([preds[2]])
else:
    pred_y = preds[2]

pred_y

tensor([0.9931])

In [11]:
file_path = '/export/home2/zongqi001/004_TimeKD/TimeKD/ETTm1/24/train_batch/batch.h5'

with h5py.File(file_path, 'r') as hf:
    data = hf['stacked_embeddings'][:]
    tensor = torch.from_numpy(data)

tensor.shape

torch.Size([34441, 7, 768])

In [18]:
file_path = '/export/home2/zongqi001/004_TimeKD/TimeKD/amex_emb/train/0.h5'

with h5py.File(file_path, 'r') as hf:
    data = hf['stacked_embeddings'][:]
    tensor = torch.from_numpy(data)

tensor.shape

torch.Size([220, 768])

In [20]:
embeddings_stack = [tensor.squeeze(0).detach(), tensor.squeeze(0).detach()]
stacked_embeddings = torch.stack(embeddings_stack, dim=0)
stacked_embeddings.shape

torch.Size([2, 220, 768])

In [3]:
embeddings_stack = []
embeddings = tensor
embeddings_stack.append(tensor.squeeze(0).detach())
embeddings_stack.append(tensor.squeeze(0).detach())

stacked_embeddings = torch.stack(embeddings_stack, dim=0)
stacked_embeddings.shape

torch.Size([2, 7, 768])

In [None]:
# file_path = '/home/zongqi/004_TimeKD/TimeKD/test_h5.h5'
# with h5py.File(file_path, 'w') as hf:
#     hf.create_dataset('embeddings', data=stacked_embeddings.detach().cpu().numpy())

In [None]:
# with h5py.File(file_path, 'r') as hf:
#     data = hf['embeddings'][:]
#     tensor = torch.from_numpy(data)

# data.shape

(2, 7, 768)

In [11]:
temp = stacked_embeddings

temp_list = []
temp_list.append(temp)
temp_list.append(temp)

stacked_temp = torch.cat(temp_list, dim=0)
stacked_temp.shape

torch.Size([4, 7, 768])

In [15]:
file_path = '/home/zongqi/004_TimeKD/TimeKD/ETTm1/24/train_batch/batch.h5'

with h5py.File(file_path, 'r') as hf:
    data = hf['stacked_embeddings'][:]
    tensor = torch.from_numpy(data)

tensor.shape

torch.Size([22, 7, 768])