In [2]:
'''
Author: Wenbo Yu
This project is used to do regression of sensor data collected by vehicles.
Sensors are divided in to control sensors which are X, and dependent sensors which are Y.
The network used is LSTM. Besides, CNN and NN examples are provided.
The script could be directly ran with the provided picke data.

Version information:
PyTorch = 1.4.0
'''
import time
import os
import numpy as np
import torch.nn as nn
import pickle
import random
import torch
from torch.autograd import Variable

# Pytorch LSTM network
class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, batch_size=1, num_layers=2):
        super(LSTM, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.num_layers = num_layers
        self.output_dim = output_dim
        self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers)
        # Define output layer
        self.linear = nn.Linear(self.hidden_dim, self.output_dim)
        self.tanh = nn.Tanh()

    def init_hidden(self): # Not required
        return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim).cuda(), torch.zeros(self.num_layers, self.batch_size, self.hidden_dim).cuda())

    def point_grad_to(self, target):
        '''
        #### This function is important for meta learning. If other model is used, this one could be directly copied into other models ###
        Set .grad attribute of each parameter to be proportional
        to the difference between self and target
        '''
        for p, target_p in zip(self.parameters(), target.parameters()):
            if p.grad is None:
                # if self.is_cuda():
                p.grad = Variable(torch.zeros(p.size())).cuda()
                # else:
                #     p.grad = Variable(torch.zeros(p.size()))
            p.grad.data.zero_()  # not sure this is required
            p.grad.data.add_(p.data - target_p.data)

    def forward(self, input, hidden_state):
        # shape of input  (seq_length, batch, input_dim)
        # shape of output (seq_length, batch, hidden_dim)
        lstm_out, hidden_state = self.lstm(input)
        self.hidden = hidden_state
        y_pred = self.linear(lstm_out)
        y_pred = self.tanh(y_pred)
        return y_pred, hidden_state

###### Example of Convolutional network ######
# class Generator(nn.Module):
#     def __init__(self):
#         super(Generator, self).__init__()
#         self.l1 = nn.Sequential(
#             nn.Linear(35, 256),
#             # nn.BatchNorm1d(256),
#             nn.LeakyReLU(0.01),
#             )
#         self.l2 = nn.Sequential(
#             nn.Linear(256, 256),
#             # nn.BatchNorm1d(256),
#             nn.LeakyReLU(0.01),
#             )
#         self.l3 = nn.Sequential(
#             nn.Linear(256, 64),
#             # nn.BatchNorm1d(256),
#             nn.LeakyReLU(0.01),
#             )
#         self.l4 = nn.Sequential(
#             nn.Linear(64, 1),
#             nn.Tanh()
#             )
#
#     def forward(self, input):
#         x = self.l1(input)
#         x = self.l2(x)
#         x = self.l3(x)
#         output = self.l4(x)
#         return output

###### Example of Linear network ######
# class NN(nn.Module):
#     def __init__(self, features):
#         super(NN, self).__init__()
#         self.n = 3000
#         self.features = features
#         self.l1 = nn.Sequential(
#             nn.Linear(self.features, self.n),
#             # nn.BatchNorm1d(256),
#             nn.LeakyReLU(0.01),
#         )
#         self.l2 = nn.Sequential(
#             nn.Linear(self.n, self.n),
#             # nn.BatchNorm1d(256),
#             nn.LeakyReLU(0.01),
#             )
#         self.l3 = nn.Sequential(
#             nn.Linear(self.n, 100),
#             # nn.BatchNorm1d(256),
#             nn.LeakyReLU(0.01),
#         )
#         self.l4 = nn.Sequential(
#             nn.Linear(100, 1),
#             nn.Sigmoid()
#         )

#     def forward(self, input):
#         x = self.l1(input)
#         x = self.l2(x)
#         x = self.l3(x)
#         output = self.l4(x)
#         return output


models = [607, 612] # Also refer to tasks/classes, indicating how many sources of data are used
# models = [607, 611, 612, 613, 614, 615, 616, 617, 618, 619]
sensor = 14 # The selected target sensor
traininig_size = 1000 # Number of training data points for eacg task
test_size = 200
chosen_size = traininig_size + test_size
data_path = 'meta_pickle'
X = {}
Y = {}
flag = 0

# Built training and test sets
for model in models:
    with open(os.path.join(data_path, '%d.pkl' % model), 'rb') as f:
        all_data = pickle.load(f)
        chosen_idx = random.sample([item for item in range(all_data.shape[0])], chosen_size)
        chosen_data = all_data[chosen_idx, :, :]
        # Put np data to tensor
        all_data = torch.from_numpy(chosen_data).to(dtype=torch.float32)
        x = all_data[:, :, :14]
        y = all_data[:, :, 14:]
    X[model] = {}
    Y[model] = {}
    X[model]['training'] = x[:traininig_size, :, :]
    Y[model]['training'] = y[:traininig_size, :, :]
    if flag == 0:
        x_test = x[traininig_size:, :, :]
        y_test = y[traininig_size:, :, :]
        flag = 1
    else:
        x_test = torch.cat([x_test, x[traininig_size:, :, :]], dim=0)
        y_test = torch.cat([y_test, y[traininig_size:, :, :]], dim=0)

# Start training
# Set hyperparameters
task_num = len(models)
Step = 10000
batch_size = 5
meta_lr = 1e-5
update_lr = 1e-5
update_step = 5
input_dim = 14
hidden_dim = 500
output_dim = 16
net = LSTM(input_dim, hidden_dim, output_dim).cuda()

# Optimizers
meta_optimizer = torch.optim.Adam(net.parameters(), lr=meta_lr)
L1Loss = torch.nn.MSELoss()

x_test = x_test.cuda().transpose(0, 1) # Required for LSTM network, the input dimension is [length, batch, feature]
y_test = y_test.cuda()

print('Start training')
for step in range(Step):
    meta_loss_total = 0
    task_order = [i for i in range(len(models))]
    random.shuffle(task_order)
    for i in task_order:
        # Choose data
        vehicle = models[i]
        num = X[vehicle]['training'].shape[0]
        batch_list = [item for item in range(traininig_size)]
        batch_content = random.sample(batch_list, batch_size)
        x_batch = X[vehicle]['training'][batch_content, :].cuda().transpose(0, 1)
        y_batch = Y[vehicle]['training'][batch_content, :].cuda()

        # Initialize network. This network 'net_i' is used to train several steps, and the learnt parameters are used to update 'net'
        net_i = LSTM(input_dim, hidden_dim, output_dim).cuda()
        hidden_state = net_i.init_hidden()
        net_i.load_state_dict(net.state_dict())
        update_optimizer = torch.optim.Adam(net_i.parameters(), lr=update_lr)
        

        # Update process
        start = time.time()
        for j in range(update_step):
            y_pred, hidden_state_i = net_i(x_batch, hidden_state)
            y_pred = y_pred.transpose(0, 1)
            update_loss = L1Loss(y_pred, y_batch)
            update_optimizer.zero_grad()
            update_loss.backward(retain_graph=True)
            update_optimizer.step()
            # print('epoch: %d || loss_1: %f' % (step, update_loss.data))
        net.point_grad_to(net_i)
        meta_optimizer.step()
        end = time.time()
        print(start-end)
    # 1st order update
    y_pred, hidden_state_i = net(x_test, hidden_state)
    y_pred = y_pred.transpose(0, 1)
    update_loss = L1Loss(y_pred, y_test)
    
#     print('epoch: %d || loss_1: %f' % (step, update_loss.data))
#         # file = open('out' + '/log.txt', 'a')
#         # file.write('task: %d || epoch: %d || loss_1: %f\n' % (i, step, meta_loss.data))
#         # file.close()
#     if step % 100 == 0:
#         file = open('out2/log.txt', 'a')
#         file.write('epoch: %d || loss_1: %f\n' % (step, update_loss.data))
#         file.close()
#     if step > 2000 and step % 200 == 0:
#         # Save weights
#         weight_name = 'out2/model_%d.ckpt' % (step)
#         torch.save(net.state_dict(), weight_name)

Start training
-0.09624052047729492
-0.08042263984680176
-0.07466483116149902
-0.10356855392456055
-0.10634970664978027
-0.09688758850097656
-0.1005105972290039
-0.10463261604309082
-0.10004711151123047
-0.10182332992553711
-0.0933375358581543
-0.10653805732727051
-0.09725022315979004
-0.10520386695861816
-0.08891844749450684
-0.10292935371398926
-0.10332226753234863
-0.09438848495483398
-0.0967566967010498
-0.09603571891784668
-0.09934449195861816
-0.10131192207336426
-0.09852313995361328
-0.09572243690490723
-0.09847307205200195
-0.10033893585205078
-0.09519791603088379
-0.10734987258911133
-0.0988607406616211
-0.10141491889953613
-0.10067033767700195


KeyboardInterrupt: 