In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms
import os
import argparse

import pandas as pd
from torch.autograd import Variable
import importlib.util
from torch.utils.data import DataLoader
from datasets import Dataset
module_path = r"D:/timeseries/package/state-spaces-simple/src/models/sequence/ss/standalone/s4.py"

spec = importlib.util.spec_from_file_location("S4", module_path)
S4 = importlib.util.module_from_spec(spec)
spec.loader.exec_module(S4)
from torch.optim import AdamW

CUDA extension for cauchy multiplication not found. Install by going to extensions/cauchy/ and running `python setup.py install`. This should speed up end-to-end training by 10-50%
Falling back on slow Cauchy kernel. Install at least one of pykeops or the CUDA extension for efficiency.


In [2]:
data=pd.read_csv('D:/timeseries/data/data_3.csv')


In [3]:
data=data[:2000]

In [4]:
def normalize(data):
    mean_list=[]
    std_list=[]
    for i in data.columns:
   
        mean=data[i].mean()
       
        std=data[i].std()
        data[i]=(data[i]-mean)/std
        # for j in range(len(data[i])):
          # if data[i][j]!=0:
          #   first=data[i][j]
          #   data[i]=data[i]/first+5
          #   break

        
        mean_list.append(mean)
        std_list.append(std)   
    # return data,first
    return data,mean_list,std_list

def get_mask(data):
    """
    data should in the form of pd.df
    gen a tenor with 0 and 1 to represent missing data
    """
    mask = ~data.isnan().values
 
    mask_tensor = torch.tensor(mask, dtype=torch.float32)
    
    mask_tensor= mask_tensor.transpose(0,1)
    return mask_tensor

def mape(A,F,maskf_sub):
  sum=0
  length=0
  for i in range(len(A)):
   
    if maskf_sub[i]!=0:
      sum+=abs(A[i] - F[i]) / abs(A[i])
      length+=1
  if length>0:
    return 100/length*sum
  
  return 0
def smape(A, F,maskf_sub):
  sum=0
  length=0
  for i in range(len(A)):
    if maskf_sub[i]!=0:
      sum+=2 * abs(F[i] - A[i]) / (abs(A[i]) + abs(F[i]))
      length+=1
  if length>0:

    return 100/length * sum
  return 0

In [29]:

def create_inputs(data, context_length, prediction_length):
    num_days, num_products = data.shape
    num_samples = num_days - context_length - prediction_length + 1
   

    samples = torch.zeros((num_samples, context_length,num_products))

    for i in range(num_samples):
        samples[i,:,:] = data[i:i+context_length]

    return samples,num_samples
def create_targets(data,context_length,prediction_length):
    num_days, num_products = data.shape
    num_samples = num_days - context_length - prediction_length + 1
    

    targets = torch.zeros((num_samples, prediction_length,num_products))

    for i in range(num_samples):
        targets[i, :,:] = data[i+context_length:i+prediction_length+context_length]

    return targets




def split_train_val(data,prediction_period,batchs,context_length):
 
    whole,m,std=normalize(data.iloc[:,1:])
    whole=torch.tensor(whole.values)
    # whole=whole.transpose(0,1)
   
    inputs,period=create_inputs(whole,context_length,prediction_period)
    target=create_targets(whole,context_length,prediction_period)
    inputs=inputs.reshape(period,1,200*context_length)
    target=target.reshape(period,1,prediction_period*200)
    # print(inputs.shape)
    # train_input=inputs[:period-prediction_period].transpose(0,1)
    # test_input=inputs[:period-prediction_period].transpose(0,1)
    # train_target=target[:period-prediction_period].transpose(0,1)
    # test_target=target[period-prediction_period:].transpose(0,1)
    train_input=inputs[:period-prediction_period]
    test_input=inputs[-1:]
    train_target=target[:period-prediction_period]
    test_target=target[-1:]
    print(test_input.shape,test_target.shape)
    
    traindict={'target':train_target,'input':train_input}
    testdict={'target':test_target,'input':test_input}
    train=Dataset.from_dict(traindict)
    train=train.with_format('torch')
    test=Dataset.from_dict(testdict)
    test=test.with_format('torch')
    

    train_loader = DataLoader(train, batch_size=batchs, shuffle=False)
    test_loader = DataLoader(test, batch_size=batchs, shuffle=False)
    return train_loader, test_loader,m,std

class S4Model(nn.Module):

    def __init__(
        self, 
        d_input, 
        d_output=10, 
        d_model=256, 
        n_layers=4, 
        dropout=0.2,
        prenorm=False,
    ):
        super().__init__()

        self.prenorm = prenorm

        # Linear encoder (d_input = 1 for grayscale and 3 for RGB)
        self.encoder = nn.Linear(d_input, d_model)

        # Stack S4 layers as residual blocks
        self.s4_layers = nn.ModuleList()
        self.norms = nn.ModuleList()
        self.dropouts = nn.ModuleList()
        for _ in range(n_layers):
            self.s4_layers.append(
                S4.S4(
                    d_model=d_model, 
                    l_max=1024, 
                    bidirectional=True,
                    postact='glu',
                    dropout=dropout, 
                    transposed=True,
                )
            )
            self.norms.append(nn.LayerNorm(d_model))
            self.dropouts.append(nn.Dropout2d(dropout))

        # Linear decoder
        self.decoder = nn.Linear(d_model, d_output)

    def forward(self, x):
        """
        Input x is shape (B, L, d_input)
        """
        x = self.encoder(x)  # (B, L, d_input) -> (B, L, d_model)
        
        x = x.transpose(-1, -2)  # (B, L, d_model) -> (B, d_model, L)
        for layer, norm, dropout in zip(self.s4_layers, self.norms, self.dropouts):
            # Each iteration of this loop will map (B, d_model, L) -> (B, d_model, L)

            z = x
            if self.prenorm:
                # Prenorm
                z = norm(z.transpose(-1, -2)).transpose(-1, -2)
            
            # Apply S4 block: we ignore the state input and output
            z, _ = layer(z)

            # Dropout on the output of the S4 block
            z = dropout(z)

            # Residual connection
            x = z + x

            if not self.prenorm:
                # Postnorm
                x = norm(x.transpose(-1, -2)).transpose(-1, -2)

        x = x.transpose(-1, -2)

        # Pooling: average pooling over the sequence length
        x = x.mean(dim=1)

        # Decode the outputs
        x = self.decoder(x)  # (B, d_model) -> (B, d_output)

        return x

In [26]:
def train_model(epoch,train,ct,pt):
    model = S4Model(
    d_input=200*ct, 
    d_output=pt*200, 
    d_model=64, 
    n_layers=4, 

    dropout=0.1,
    prenorm=False
    )
    device='cpu'
    model = model.to(device)
    optimizer = AdamW(model.parameters(), lr=0.001,)

    model.train()
    for epoch in range(epoch):
        for ind,batch in enumerate(train):
            optimizer.zero_grad()
            target=batch['target']
            inputs=batch['input']
            target_mask = ~torch.isnan(target)
            # target_mask = target_mask.view(target.shape[0],target.shape[1],target.shape[2])
            valid_target = target[target_mask]
            # input_mask=target_mask.reshape()
            # valid_inputs = inputs[target_mask]
        
            
            # inputs=Variable(inputs,requires_grad=True)
            # input_mask = ~torch.isnan(inputs)
            # input_mask=input_mask.reshape(inputs.shape[0],inputs.shape[1],inputs.shape[2])
            # print(input_mask.shape)
            # valid_inputs = inputs[input_mask]
            # print(valid_inputs.shape)
            inputs=inputs.nan_to_num()
            inputs=Variable(inputs,requires_grad=True)
            
            outputs = model(inputs)
            # print(outputs)
            output_mask=target_mask.reshape(outputs.shape[0],outputs.shape[1])
            criterion = nn.MSELoss()
            valid_outputs=outputs[output_mask]
            # print(valid_outputs)
            # break
            loss =criterion(valid_outputs,valid_target)

            loss.backward()
            optimizer.step()
        
        if epoch % 2 == 0:
            print(loss.item())

        model.eval()
    return model

In [31]:
def predict(model,test,pt):
    pred=[]
    for ind,batch in enumerate(test):
        inputs=batch['input']
        inputs=inputs.nan_to_num()
        print(inputs,inputs.shape)
        out=model(inputs)
        out=out.reshape(pt,200)
        out=out.T
        pred=pred+list(out)
        
    return pred
            
def denormalize(data,mean,std):
    for i in range(len(data)):
        print(data[i],std[i],mean[i])
        data[i]=data[i]*std[i]+mean[i]
    return data       


In [32]:
def S4_run(data,pred_length,batch_size,context_l,epoch):
    train,test,m,std=split_train_val(data,pred_length,batch_size,context_l)
    model=train_model(epoch,train,context_l,pred_length)
    pred=predict(model,test,pred_length)
    denormalize_pred=denormalize(pred,m,std)
    return model,pred,denormalize_pred,test,m,std
    

    

In [40]:
get=S4_run(data,1,50,2,20)


torch.Size([1, 1, 400]) torch.Size([1, 1, 200])
1.4655784368515015
1.0363974571228027
0.795599639415741
0.8695045709609985
0.8285287618637085
0.6728562116622925
0.5595833659172058
0.475406676530838
0.6038219332695007
0.5538503527641296
tensor([[[-1.3807,  0.0000, -2.2558, -2.7749, -2.3478, -2.0596, -2.4736,
          -6.5312, -2.4400, -1.2093, -1.9307, -1.3097, -0.7255,  0.0000,
          -1.6630,  0.3356, -2.3994, -0.9367, -1.4892, -1.2223, -0.4660,
          -1.3072, -1.2659, -1.8598, -1.2002, -2.3777, -2.6416, -1.2747,
          -2.2976, -2.4221, -1.4364, -2.1847, -2.5928, -2.0102,  2.3824,
          -2.2012, -4.3677, -1.6630, -1.1870, -2.0778, -2.5532, -2.1227,
          -1.6650, -3.8979, -1.0991, -0.5008, -1.2060,  1.9325, -1.4256,
          -1.6206,  2.0791, -1.5203, -1.8614, -1.5339, -0.7087, -2.1707,
          -0.4593, -2.2442, -1.7802, -1.7730, -2.3163, -2.6467, -1.5884,
          -1.7526, -2.2285, -2.2102, -1.1118, -2.3826, -1.5347, -1.5440,
          -1.4871, -1.5623, -2.070

In [19]:

def mape(A,F,maskf_sub):
  sum=0
  length=0
  for i in range(len(A)):
    # print(A[i],"A",maskf_sub[i],"ma")
    if not torch.isnan(A[i]):
    # if maskf_sub[i]!=0:
      sum+=abs(A[i] - F[i]) / abs(A[i])
      length+=1
  if length>0:
    return 100/length*sum
  
  return 0
def smape(A, F,maskf_sub):
  sum=0
  length=0
  for i in range(len(A)):
    if not torch.isnan(A[i]):
    # if maskf_sub[i]!=0:
      sum+=2 * abs(F[i] - A[i]) / (abs(A[i]) + abs(F[i]))
      length+=1
  if length>0:

    return 100/length * sum
  return 0

In [18]:
act_mask

tensor([ True,  True,  True,  ..., False,  True,  True])

In [41]:
smpl=[]
mpl=[]
m=get[4]
std=get[5]
act=next(iter(get[3]))
act=(act['target']).reshape(1,200)
act=act.T
deno_pred=get[2]
act=denormalize(act,m,std)
act_mask=(~torch.isnan(act))
act_mask=act_mask.reshape(200*1)



for i in range(200):

    mp=mape(act[i],deno_pred[i],act_mask)
    smp=smape(act[i],deno_pred[i],act_mask)
    
    smpl.append(float(smp))
    mpl.append(float(mp))

tensor([-1.0417]) 22.566338194292367 54.660836036
tensor([-1.5064]) 21.352693682121906 71.59599817895273
tensor([-3.0452]) 15.31383884856238 46.5442520624942
tensor([-2.1719]) 10.513081911778668 33.70292012136068
tensor([-2.2360]) 10.982356564803199 50.82534551553106
tensor([-2.0331]) 14.698812830526599 34.944050536168085
tensor([-2.4536]) 10.493594970680324 30.787306447587937
tensor([-6.2491]) 0.8862187721802829 0.08809326210609945
tensor([-2.3508]) 14.248636550049314 38.55614075761523
tensor([-1.2246]) 262.49632385764136 1104.4498448810755
tensor([-1.9424]) 12.18852233023381 58.42064347979275
tensor([-1.2680]) 17.730894021658784 149.1290710984456
tensor([-0.7283]) 103.6610553176997 392.6060791139896
tensor([-1.5229]) 20.97140193198374 71.63655011983889
tensor([-1.6660]) 12.258903970249484 49.575400147012985
tensor([-1.5479]) 13.92103561011443 39.35825250155078
tensor([-2.4305]) 11.275746424417642 31.905509165365363
tensor([-0.2563]) 12.874735493777337 44.10011888110944
tensor([-1.564

In [42]:
for i in range(len(deno_pred)):
    for j in range(len(deno_pred[i])):
        deno_pred[i][j]=float(deno_pred[i][j])
deno_pred


[tensor([40.5384], grad_fn=<CopySlices>),
 tensor([44.0737], grad_fn=<CopySlices>),
 tensor([33.2871], grad_fn=<CopySlices>),
 tensor([19.4688], grad_fn=<CopySlices>),
 tensor([37.1147], grad_fn=<CopySlices>),
 tensor([18.1134], grad_fn=<CopySlices>),
 tensor([16.0869], grad_fn=<CopySlices>),
 tensor([-0.9569], grad_fn=<CopySlices>),
 tensor([15.2743], grad_fn=<CopySlices>),
 tensor([907.1180], grad_fn=<CopySlices>),
 tensor([50.1646], grad_fn=<CopySlices>),
 tensor([129.1405], grad_fn=<CopySlices>),
 tensor([365.9274], grad_fn=<CopySlices>),
 tensor([45.4108], grad_fn=<CopySlices>),
 tensor([36.6822], grad_fn=<CopySlices>),
 tensor([27.9767], grad_fn=<CopySlices>),
 tensor([11.5234], grad_fn=<CopySlices>),
 tensor([35.5931], grad_fn=<CopySlices>),
 tensor([180.6326], grad_fn=<CopySlices>),
 tensor([69.2079], grad_fn=<CopySlices>),
 tensor([41.3855], grad_fn=<CopySlices>),
 tensor([50.1941], grad_fn=<CopySlices>),
 tensor([9.6958], grad_fn=<CopySlices>),
 tensor([309.9542], grad_fn=<Co

In [43]:
new_pred=[]
for i in range(1):
    new_pred.append([])
    for j in range(200):
        new_pred[i].append(float(deno_pred[j][i]))

In [44]:
act=act.transpose(0,1)
act.shape

torch.Size([1, 200])

tensor([[-1.3829,     nan, -2.3504,  ...,     nan,  0.7469,     nan],
        [-1.8033, -1.4076, -3.9960,  ...,  3.6882,  0.8189, -1.4243],
        [-1.2896, -1.4015, -2.2819,  ...,  3.7455,  0.8489, -1.4094],
        ...,
        [-1.3807,     nan, -2.2558,  ...,  3.4600,  0.9728,     nan],
        [-1.0184, -1.4947, -2.0200,  ...,  3.2164,  0.8149, -1.5013],
        [-1.0417, -1.5064, -3.0452,  ...,     nan,  0.5111, -1.5087]])

In [39]:
# act=act.T
result= pd.DataFrame({'S4_mape':mpl,'S4_smape':smpl})
result.to_csv('D:/timeseries/result/temp_2.csv')
res2= pd.DataFrame({'name':data.columns[1:],'day1/13_pred':act[0], 'day1/1_actu':new_pred[0]
                       })
res2.to_csv('D:/timeseries/result/temp.csv')