In [None]:
import torch
import torch.nn as nn
import math
import numpy as np
import torch.nn.functional as F
from torch.autograd import Variable
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import ReduceLROnPlateau
import pickle
import tqdm
import pandas as pd

In [None]:
class TemporalDependency(nn.Module):
    def __init__(self,data_size,layer_size,layer_num,batch_size,cuda):
        super(TemporalDependency,self).__init__()
        self.batch_size = batch_size
        self.hidden_size = layer_size
        self.layer_num = layer_num
        self.cuda = cuda
        self.data_size = data_size
        self.Dependency = nn.LSTM(batch_first=True,
                                  input_size=self.data_size,
                                  num_layers=self.layer_num,
                                  hidden_size=self.hidden_size)
    def init_hidden(self,cuda):
        if cuda:
            return (Variable(torch.zeros(self.layer_num,
                                         self.batch_size,
                                         self.Dependency.hidden_size),
                             requires_grad=True).cuda(),
                    Variable(torch.zeros(self.layer_num,
                                         self.batch_size,
                                         self.Dependency.hidden_size),
                             requires_grad=True).cuda())
        else:
            return (Variable(torch.zeros(self.layer_num,
                                         self.batch_size,
                                         self.Dependency.hidden_size),
                             requires_grad=True),
                    Variable(torch.zeros(self.layer_num,
                                         self.batch_size,
                                         self.Dependency.hidden_size),
                             requires_grad=True))
        
    def forward(self,inputs):
        hidden = self.init_hidden(self.cuda)
        output, _ = self.Dependency(inputs,hidden)
        return output
    
class ContentBasedAttention(nn.Module):
    def __init__(self,period_length,data_length,layer_size,batch_size,cuda):
        super(ContentBasedAttention,self).__init__()
        self.batch_size = batch_size
        self.hidden_size = layer_size
        self.cuda = cuda
        self.period_length = period_length
        self.data_length = data_length
        self.Attention = nn.LSTMCell(input_size=self.data_length,
                                    hidden_size=layer_size)
        
        self.Wh = torch.nn.Parameter(torch.zeros(batch_size,layer_size,layer_size))
        self.Wx = torch.nn.Parameter(torch.zeros(batch_size,layer_size,layer_size))
        self.bx = torch.nn.Parameter(torch.zeros(batch_size,layer_size,1))
        self.v = torch.nn.Parameter(torch.zeros(batch_size,layer_size))
        
        self.softmax = nn.Softmax(dim=0)
        
    def init_hidden(self,cuda):
        if cuda:
            return (Variable(torch.zeros(self.batch_size,
                                         self.Attention.hidden_size),
                             requires_grad=True).cuda(),
                    Variable(torch.zeros(self.batch_size,
                                         self.Attention.hidden_size),
                             requires_grad=True).cuda())
        else:
            return (Variable(torch.zeros(self.batch_size,
                                         self.Attention.hidden_size),
                             requires_grad=True),
                    Variable(torch.zeros(self.batch_size,
                                         self.Attention.hidden_size),
                             requires_grad=True))
    
    def forward(self,inputs,target):
        hidden = self.init_hidden(self.cuda)
        score = []
        h_ps = []
        for p in range(self.period_length):
            h_p, cell = self.Attention(inputs[p],hidden)
            scoring = torch.bmm(self.v.unsqueeze(1),(torch.tanh(
                torch.bmm(self.Wh,h_p.view(h_p.shape[0],h_p.shape[1],1))
                +torch.bmm(self.Wx,target.view(target.shape[0],target.shape[1],1))
                +self.bx))).squeeze()
            score.append(scoring)
            h_ps.append(h_p)
            hidden = tuple((h_p,cell))
        score = torch.stack(score)
        h_ps = torch.stack(h_ps).permute(1,0,2)
        softmax_s = self.softmax(score).permute(1,0)
        represent = softmax_s.unsqueeze(1).bmm(h_ps)
        
        return represent
    
class Regressor(nn.Module):
    def __init__(self,input_shape,output_shape,cuda):
        super(Regressor,self).__init__()
        self.input_shape = input_shape
        self.output_shape = output_shape
        self.cuda = cuda
        self.layer = nn.Linear(self.input_shape,self.output_shape)
        self.activation = nn.ReLU()
        
    def forward(self,inputs):
        outputs = self.layer(inputs)
        result = self.activation(outputs)
        return result
    
class TemporalDynamicNetwork(nn.Module):
    def __init__(self,data_size,layer_size,period_length,term_length,batch_size,PSAM,cuda):
        super(TemporalDynamicNetwork,self).__init__()
        self.data_size = data_size
        self.layer_size = layer_size
        self.period_length = period_length
        self.term_length = term_length
        self.batch_size = batch_size
        self.PSAM = PSAM
        self.cuda = cuda
        
        self.ShortTerm = TemporalDependency(data_size=data_size,layer_size=layer_size,layer_num=1,batch_size=batch_size,cuda=cuda)
        
        self.LongTerm = TemporalDependency(data_size=layer_size,layer_size=layer_size,layer_num=1,batch_size=batch_size,cuda=cuda)
        
        self.Attention = ContentBasedAttention(period_length=term_length,data_length=data_size,layer_size=layer_size,batch_size=batch_size,cuda=cuda)
        
        self.regressor = Regressor(input_shape=layer_size,output_shape=data_size,cuda=cuda)
        
    def forward(self,att,x,y):
        short_rep = self.ShortTerm.forward(x)[:,-1,:]
        if self.PSAM:
            att_rep = []
            for a in att:
                attention = self.Attention.forward(a,short_rep)
                att_rep.append(attention)
            att_rep = torch.stack(att_rep).permute(1,0,2,3)
            long_rep = self.LongTerm.forward(att_rep.squeeze())[:,-1,:]
            pred = self.regressor.forward(short_rep+long_rep)
        else:
            pred = self.regressor.forward(short_rep)
        
        loss_fn = torch.nn.MSELoss(reduction='mean')
        loss = loss_fn(y, pred)
        
        return loss,pred.data.cpu().numpy()
    
def Train(total_train_batches,model,optimizer,dataLoader):
    total_loss = 0
    if torch.cuda.is_available():
        device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
        model.to(device)
    for i in range(total_train_batches):
        att,x,y = dataLoader.sample_train_batch()
        att = Variable(torch.from_numpy(att)).float()
        att = att.permute(1,2,0,3)
        x = Variable(torch.from_numpy(x)).float()
        y = Variable(torch.from_numpy(y),requires_grad=False).float()

        loss,pred = model.forward(att.cuda(),x.cuda(),y.cuda())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.data.cpu().numpy()/total_train_batches
    return total_loss,pred

def Test(total_batches,model,optimizer,dataLoader):
    total_loss = 0
    if torch.cuda.is_available():
        device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
        model.to(device)
    for i in range(total_batches):
        att,x,y = dataLoader.sample_test_batch()
        att = Variable(torch.from_numpy(att)).float()
        att = att.permute(1,2,0,3)
        x = Variable(torch.from_numpy(x)).float()
        y = Variable(torch.from_numpy(y),requires_grad=False).float()

        loss,pred = model.forward(att.cuda(),x.cuda(),y.cuda())
        total_loss += loss.data.cpu().numpy()/total_batches

    return total_loss,pred

In [None]:
class NYCLoader():
    def __init__(self,batch_size,period_length,term_length,interval_length,seed,data_source):
        self.batch_size = batch_size
        self.period_length = period_length
        self.term_length = term_length
        self.interval_length = interval_length
        self.seed = seed
        self.source = data_source
        self.pointer = {'train': 0, 'test': 0}
        np.random.seed(self.seed)
        
    def init_data(self):
        self.x = np.random.randint(10)
        self.y = np.random.randint(20)
        print('selected region: ({},{})'.format(self.x,self.y))
        if self.source=='taxi':
            self.train_data = np.load(open('volume_train.npz','rb'))['volume'][:,self.x,self.y,:]
            
            self.test_data = np.load(open('volume_test.npz','rb'))['volume'][:,self.x,self.y,:]
            
        else:
            self.train_data = np.load(open('bike_volume_train.npz','rb'))['volume'][:,self.x,self.y,:]
            
            self.test_data = np.load(open('bike_volume_train.npz','rb'))['volume'][:,self.x,self.y,:]
        
        train_data_length = 1920 - self.interval_length - int(48*self.period_length) - int(self.term_length/2) - 1
        test_data_length = 960 - self.interval_length - int(48*self.period_length) - int(self.term_length/2) - 1
        
        return train_data_length,test_data_length
        
    def sample_train_batch(self):
        available_start = int(48*self.period_length) + int(self.term_length/2)
        available_end = 1920 - self.interval_length - 1
        rand_samples = np.random.randint(available_start,available_end,self.batch_size)
        
        attention_samples = []
        prediction_samples = []
        result = []
        
        for i in rand_samples:
            prediction_samples.append(self.train_data[i:i+self.interval_length])
            result.append(self.train_data[i+self.interval_length+1])
            attention =[]
            for d in range(1,self.period_length+1):
                attention.append(self.train_data[i-int(48*d)-int(self.term_length/2):i-int(48*d)+int(self.term_length/2)+1])
                attention.reverse()
            
            attention_samples.append(attention)
                
        attention_samples = np.array(attention_samples)
        prediction_samples = np.array(prediction_samples)
        result = np.array(result)
                
        return attention_samples,prediction_samples,result
    
    def sample_test_batch(self):
        available_start = int(48*self.period_length) + int(self.term_length/2)
        available_end = 960 - self.interval_length - 1
        samples = np.arange(available_start + self.pointer['test'],available_start + self.pointer['test'] + self.batch_size)
        self.pointer['test'] += self.batch_size
        if available_start + self.pointer['test'] + self.batch_size > available_end:
            self.pointer['test'] = 0
        
        attention_samples = []
        prediction_samples = []
        result = []
        
        for i in samples:
            prediction_samples.append(self.test_data[i:i+self.interval_length])
            result.append(self.test_data[i+self.interval_length+1])
            attention =[]
            for d in range(1,self.period_length+1):
                attention.append(self.test_data[i-int(48*d)-int(self.term_length/2):i-int(48*d)+int(self.term_length/2)+1])
                attention.reverse()
            
            attention_samples.append(attention)
            
        attention_samples = np.array(attention_samples)
        prediction_samples = np.array(prediction_samples)
        result = np.array(result)
                
        return attention_samples,prediction_samples,result

In [None]:
class ElectricityLoader():
    def __init__(self,batch_size,period_length,term_length,interval_length,seed):
        self.batch_size = batch_size
        self.period_length = period_length
        self.term_length = term_length
        self.interval_length = interval_length
        self.seed = seed
        self.pointer = {'train': 0, 'test': 0}
        np.random.seed(self.seed)
    
    def init_data(self):
        self.user = np.random.randint(370)
        print('selected user: {}'.format(self.user))
        
        raw_data = pd.read_table('Electricity.txt',sep=';',index_col=0).iloc[-35041:-1,self.user].values
        data = np.array([item.replace(',', '.') for item in raw_data], dtype=np.float32)
        data = np.array([np.sum(data[i:i+4]) for i in range(0,len(data),4)])
        
        self.train_data = data[:int(len(data)*0.7)]
        self.train_data = self.train_data.reshape(self.train_data.shape[0],1)
        
        self.test_data = data[int(len(data)*0.7):]
        self.test_data = self.test_data.reshape(self.test_data.shape[0],1)
        
        train_data_length = 6132 - self.interval_length - int(24*self.period_length) - int(self.term_length/2) - 1
        test_data_length = 2628 - self.interval_length - int(24*self.period_length) - int(self.term_length/2) - 1
        
        return train_data_length,test_data_length
    
    def sample_train_batch(self):
        available_start = int(24*self.period_length) + int(self.term_length/2)
        available_end = 6132 - self.interval_length - 1
        rand_samples = np.random.randint(available_start,available_end,self.batch_size)
        
        attention_samples = []
        prediction_samples = []
        result = []
        
        for i in rand_samples:
            prediction_samples.append(self.train_data[i:i+self.interval_length])
            result.append(self.train_data[i+self.interval_length+1])
            attention =[]
            for d in range(1,self.period_length+1):
                attention.append(self.train_data[i-int(24*d)-int(self.term_length/2):i-int(24*d)+int(self.term_length/2)+1])
                attention.reverse()
            
            attention_samples.append(attention)
                
        attention_samples = (np.array(attention_samples)-np.min(attention_samples))/(np.max(attention_samples)-np.min(attention_samples))
        prediction_samples = (np.array(prediction_samples)-np.min(prediction_samples))/(np.max(prediction_samples)-np.min(prediction_samples))
        result = np.array(result)
                
        return attention_samples,prediction_samples,result
    
    def sample_test_batch(self):
        available_start = int(24*self.period_length) + int(self.term_length/2)
        available_end = 2628 - self.interval_length - 1
        samples = np.arange(available_start + self.pointer['test'],available_start + self.pointer['test'] + self.batch_size)
        self.pointer['test'] += self.batch_size
        if available_start + self.pointer['test'] + self.batch_size > available_end:
            self.pointer['test'] = 0
        
        attention_samples = []
        prediction_samples = []
        result = []
        
        for i in samples:
            prediction_samples.append(self.test_data[i:i+self.interval_length])
            result.append(self.test_data[i+self.interval_length+1])
            attention =[]
            for d in range(1,self.period_length+1):
                attention.append(self.test_data[i-int(24*d)-int(self.term_length/2):i-int(24*d)+int(self.term_length/2)+1])
                attention.reverse()
            
            attention_samples.append(attention)
                
        attention_samples = (np.array(attention_samples)-np.min(attention_samples))/(np.max(attention_samples)-np.min(attention_samples))
        prediction_samples = (np.array(prediction_samples)-np.min(prediction_samples))/(np.max(prediction_samples)-np.min(prediction_samples))
        result = np.array(result)
                
        return attention_samples,prediction_samples,result

In [None]:
class SolarLoader():
    def __init__(self,batch_size,period_length,term_length,interval_length,seed):
        self.batch_size = batch_size
        self.period_length = period_length
        self.term_length = term_length
        self.interval_length = interval_length
        self.seed = seed
        self.pointer = {'train': 0, 'test': 0}
        np.random.seed(self.seed)
        
    def init_data(self):
        raw_data = pd.read_csv('NY_Solar.csv',index_col=0).values
        data = np.array([np.sum(raw_data[i:i+6]) for i in range(0,len(raw_data),6)])
        
        self.train_data = data[:int(len(data)*0.7)]
        self.train_data = self.train_data.reshape(self.train_data.shape[0],1)
        
        self.test_data = data[int(len(data)*0.7):]
        self.test_data = self.test_data.reshape(self.test_data.shape[0],1)
        
        train_data_length = 12264 - self.interval_length - int(48*self.period_length) - int(self.term_length/2) - 1
        test_data_length = 5256 - self.interval_length - int(48*self.period_length) - int(self.term_length/2) - 1
        
        return train_data_length,test_data_length
    
    def sample_train_batch(self):
        available_start = int(48*self.period_length) + int(self.term_length/2)
        available_end = 12264 - self.interval_length - 1
        rand_samples = np.random.randint(available_start,available_end,self.batch_size)
        
        attention_samples = []
        prediction_samples = []
        result = []
        
        for i in rand_samples:
            prediction_samples.append(self.train_data[i:i+self.interval_length])
            result.append(self.train_data[i+self.interval_length+1])
            attention =[]
            for d in range(1,self.period_length+1):
                attention.append(self.train_data[i-int(48*d)-int(self.term_length/2):i-int(48*d)+int(self.term_length/2)+1])
                attention.reverse()
            
            attention_samples.append(attention)
                
        attention_samples = (np.array(attention_samples)-np.min(attention_samples))/(np.max(attention_samples)-np.min(attention_samples))
        prediction_samples = (np.array(prediction_samples)-np.min(prediction_samples))/(np.max(prediction_samples)-np.min(prediction_samples))
        result = np.array(result)
                
        return attention_samples,prediction_samples,result
    
    def sample_test_batch(self):
        available_start = int(48*self.period_length) + int(self.term_length/2)
        available_end = 5256 - self.interval_length - 1
        samples = np.arange(available_start + self.pointer['test'],available_start + self.pointer['test'] + self.batch_size)
        self.pointer['test'] += self.batch_size
        if available_start + self.pointer['test'] + self.batch_size > available_end:
            self.pointer['test'] = 0
        
        attention_samples = []
        prediction_samples = []
        result = []
        
        for i in samples:
            prediction_samples.append(self.test_data[i:i+self.interval_length])
            result.append(self.test_data[i+self.interval_length+1])
            attention =[]
            for d in range(1,self.period_length+1):
                attention.append(self.test_data[i-int(48*d)-int(self.term_length/2):i-int(48*d)+int(self.term_length/2)+1])
                attention.reverse()
            
            attention_samples.append(attention)
                
        attention_samples = (np.array(attention_samples)-np.min(attention_samples))/(np.max(attention_samples)-np.min(attention_samples))
        prediction_samples = (np.array(prediction_samples)-np.min(prediction_samples))/(np.max(prediction_samples)-np.min(prediction_samples))
        result = np.array(result)
                
        return attention_samples,prediction_samples,result

In [None]:
NYC = NYCLoader(batch_size=64,period_length=7,term_length=5,interval_length=7,seed=111,data_source='taxi')
train_length, test_length = NYC.init_data()
print('train_length:{}'.format(train_length))
print('test_length:{}'.format(test_length))

TTD_train_loss_history =[]
predictions = []
test_losses = []
for _ in range(10):
    TDNet = TemporalDynamicNetwork(data_size=2,layer_size=32,period_length=7,term_length=5,batch_size=64,PSAM=True,cuda=True)
    total_train_batches = 128
    episode = 500
    TDNet_optimizer = torch.optim.Adam(TDNet.parameters(), lr=1e-3)
    TD_train_loss = []
    for epi in range(episode):
        print('current episode: {}'.format(epi))
        train,_ = Train(total_train_batches,TDNet,TDNet_optimizer,NYC)
        TD_train_loss.append(np.sqrt(train))
        
    test,pred = Test(9,TDNet,TDNet_optimizer,NYC)
    test_losses.append(test)
    predictions.append(pred)
    
    TTD_train_loss_history.append(TD_train_loss)
    plt.plot(np.arange(500),TD_train_loss)
    plt.show()

In [None]:
NYC = NYCLoader(batch_size=64,period_length=7,term_length=5,interval_length=7,seed=111,data_source='taxi')
train_length, test_length = NYC.init_data()
print('train_length:{}'.format(train_length))
print('test_length:{}'.format(test_length))

TT_train_loss_history =[]
predictions = []
test_losses = []
for _ in range(10):
    TNet = TemporalDynamicNetwork(data_size=2,layer_size=32,period_length=7,term_length=5,batch_size=64,PSAM=False,cuda=True)
    total_train_batches = 128
    episode = 500
    TNet_optimizer = torch.optim.Adam(TNet.parameters(), lr=1e-3)
    T_train_loss = []
    for epi in range(episode):
        print('current episode: {}'.format(epi))
        train,_ = Train(total_train_batches,TNet,TNet_optimizer,NYC)
        T_train_loss.append(np.sqrt(train))
        
    test,pred = Test(9,TNet,TNet_optimizer,NYC)
    test_losses.append(test)
    predictions.append(pred)
    
    TT_train_loss_history.append(T_train_loss)
    plt.plot(np.arange(500),T_train_loss)
    plt.show()

In [None]:
NYC = NYCLoader(batch_size=64,period_length=7,term_length=5,interval_length=7,seed=1111,data_source='bike')
train_length, test_length = NYC.init_data()
print('train_length:{}'.format(train_length))
print('test_length:{}'.format(test_length))

TTB_train_loss_history =[]
predictions = []
test_losses = []
for _ in range(10):
    TDNet = TemporalDynamicNetwork(data_size=2,layer_size=32,period_length=7,term_length=5,batch_size=64,PSAM=True,cuda=True)
    total_train_batches = 128
    episode = 500
    TDNet_optimizer = torch.optim.Adam(TDNet.parameters(), lr=1e-3)
    TD_train_loss = []
    for epi in range(episode):
        print('current episode: {}'.format(epi))
        train,_ = Train(total_train_batches,TDNet,TDNet_optimizer,NYC)
        TD_train_loss.append(np.sqrt(train))
        
    test,pred = Test(9,TDNet,TDNet_optimizer,NYC)
    test_losses.append(test)
    predictions.append(pred)
    
    TTB_train_loss_history.append(TD_train_loss)
    plt.plot(np.arange(500),TD_train_loss)
    plt.show()

In [None]:
NYC = NYCLoader(batch_size=64,period_length=7,term_length=5,interval_length=7,seed=1111,data_source='bike')
train_length, test_length = NYC.init_data()
print('train_length:{}'.format(train_length))
print('test_length:{}'.format(test_length))

TB_train_loss_history =[]
predictions = []
test_losses = []
for _ in range(10):
    TNet = TemporalDynamicNetwork(data_size=2,layer_size=32,period_length=7,term_length=5,batch_size=64,PSAM=False,cuda=True)
    total_train_batches = 128
    episode = 500
    TNet_optimizer = torch.optim.Adam(TNet.parameters(), lr=1e-3)
    T_train_loss = []
    for epi in range(episode):
        print('current episode: {}'.format(epi))
        train,_ = Train(total_train_batches,TNet,TNet_optimizer,NYC)
        T_train_loss.append(np.sqrt(train))
        
    test,pred = Test(9,TNet,TNet_optimizer,NYC)
    test_losses.append(test)
    predictions.append(pred)
    
    TB_train_loss_history.append(T_train_loss)
    plt.plot(np.arange(500),T_train_loss)
    plt.show()

In [None]:
Ele = ElectricityLoader(batch_size=64,period_length=7,term_length=5,interval_length=7,seed=1111)
train_length, test_length = Ele.init_data()
print('train_length:{}'.format(train_length))
print('test_length:{}'.format(test_length))

TTE_train_loss_history =[]
predictions = []
test_losses = []
for _ in range(10):
    TDNet = TemporalDynamicNetwork(data_size=1,layer_size=32,period_length=7,term_length=5,batch_size=64,PSAM=True,cuda=True)
    total_train_batches = 128
    episode = 500
    TDNet_optimizer = torch.optim.Adam(TDNet.parameters(), lr=1e-3)
    TD_train_loss = []
    for epi in range(episode):
        print('current episode: {}'.format(epi))
        train,_ = Train(total_train_batches,TDNet,TDNet_optimizer,Ele)
        TD_train_loss.append(np.sqrt(train))
        
    test,pred = Test(38,TDNet,TDNet_optimizer,Ele)
    test_losses.append(test)
    predictions.append(pred)
    
    TTE_train_loss_history.append(TD_train_loss)
    plt.plot(np.arange(500),TD_train_loss)
    plt.show()

In [None]:
Ele = ElectricityLoader(batch_size=64,period_length=7,term_length=5,interval_length=7,seed=1111)
train_length, test_length = Ele.init_data()
print('train_length:{}'.format(train_length))
print('test_length:{}'.format(test_length))

TE_train_loss_history =[]
predictions = []
test_losses = []
for _ in range(10):
    TNet = TemporalDynamicNetwork(data_size=1,layer_size=32,period_length=7,term_length=5,batch_size=64,PSAM=False,cuda=True)
    total_train_batches = 128
    episode = 500
    TNet_optimizer = torch.optim.Adam(TNet.parameters(), lr=1e-3)
    T_train_loss = []
    for epi in range(episode):
        print('current episode: {}'.format(epi))
        train,_ = Train(total_train_batches,TNet,TNet_optimizer,Ele)
        T_train_loss.append(np.sqrt(train))
        
    test,pred = Test(38,TNet,TNet_optimizer,Ele)
    test_losses.append(test)
    predictions.append(pred)
    
    TE_train_loss_history.append(T_train_loss)
    plt.plot(np.arange(500),T_train_loss)
    plt.show()

In [None]:
Solar = SolarLoader(batch_size=64,period_length=7,term_length=5,interval_length=7,seed=547)
train_length, test_length = Solar.init_data()
print('train_length:{}'.format(train_length))
print('test_length:{}'.format(test_length))

TTS_train_loss_history =[]
predictions = []
test_losses = []
for _ in range(10):
    TDNet = TemporalDynamicNetwork(data_size=1,layer_size=32,period_length=7,term_length=5,batch_size=64,PSAM=True,cuda=True)
    total_train_batches = 128
    episode = 500
    TDNet_optimizer = torch.optim.Adam(TDNet.parameters(), lr=1e-3)
    TD_train_loss = []
    for epi in range(episode):
        print('current episode: {}'.format(epi))
        train,_ = Train(total_train_batches,TDNet,TDNet_optimizer,Solar)
        TD_train_loss.append(np.sqrt(train))
        
    test,pred = Test(76,TDNet,TDNet_optimizer,Solar)
    test_losses.append(test)
    predictions.append(pred)
    
    TTS_train_loss_history.append(TD_train_loss)
    plt.plot(np.arange(500),TD_train_loss)
    plt.show()

In [None]:
Solar = SolarLoader(batch_size=64,period_length=7,term_length=5,interval_length=7,seed=547)
train_length, test_length = Solar.init_data()
print('train_length:{}'.format(train_length))
print('test_length:{}'.format(test_length))

TS_train_loss_history =[]
predictions = []
test_losses = []
for _ in range(10):
    TNet = TemporalDynamicNetwork(data_size=1,layer_size=32,period_length=7,term_length=5,batch_size=64,PSAM=False,cuda=True)
    total_train_batches = 128
    episode = 500
    TNet_optimizer = torch.optim.Adam(TNet.parameters(), lr=1e-3)
    T_train_loss = []
    for epi in range(episode):
        print('current episode: {}'.format(epi))
        train,_ = Train(total_train_batches,TNet,TNet_optimizer,Solar)
        T_train_loss.append(np.sqrt(train))
        
    test,pred = Test(76,TNet,TNet_optimizer,Solar)
    test_losses.append(test)
    predictions.append(pred)
    
    TS_train_loss_history.append(T_train_loss)
    plt.plot(np.arange(500),T_train_loss)
    plt.show()