In [1]:
import os 
os.chdir(os.path.pardir)
# load data from file 
import numpy as np 
save_file_name = ['fea_seq.npy', 'last_observation_seq.npy', 'label_seq.npy', 'masking_seq.npy',
                   'delta_seq.npy', 'train_valid_test_split.npy']
save_folder = 'data/raw/met-search'
saved_arrays = []
for file_name in save_file_name:
    saved_arrays.append(np.load(os.path.join(save_folder, file_name)))
[fea_seq, last_observation_seq, label_seq, masking_seq, delta_seq, train_valid_test_split] = saved_arrays


In [2]:
# train-test-split 
train_index = [k for k in range(train_valid_test_split[0])]
dev_index = [k for k in range(train_valid_test_split[0], 
                               train_valid_test_split[0] + train_valid_test_split[1])]
test_index = [k for k in range(train_valid_test_split[0] + train_valid_test_split[1],
              train_valid_test_split[0] + train_valid_test_split[1] + train_valid_test_split[2])]

In [3]:
def get_array_by_index_range(nparray_list, label_array, index_range):
    '''
    nparray_list: list of nparrays to select according to index range 
    label_array: select the labels from label array
    '''
    # get non-na index
    non_na_index = []
    for index in index_range:
        if not np.isnan(label_array[index]):
            non_na_index.append(index)
    
    return [k[non_na_index] for k in nparray_list], label_array[non_na_index].reshape(-1)

In [4]:
# split set to train, test and dev sets 
# train set
[fea_train, last_train], label_train =  get_array_by_index_range([fea_seq,last_observation_seq], label_seq, train_index)
# dev set 
[fea_dev, last_dev], label_dev =  get_array_by_index_range([fea_seq, last_observation_seq], label_seq, dev_index)
# test set 
[fea_test, last_test], label_test =  get_array_by_index_range([fea_seq, last_observation_seq], label_seq, test_index)

In [5]:
def normalize_feature(fea_train, array_list):
    """
    array_list: [fea_dev, fea_test, last_train, last_dev, last_test] to normalize 
    """
    train_mean = np.nanmean(fea_train, axis=0)
    train_std = np.nanstd(fea_train, axis=0)
    def norm_arr(nparr):
        return(nparr - train_mean)/train_std
    return (norm_arr(fea_train), [norm_arr(k) for k in array_list])

In [6]:
fea_train, [fea_dev, fea_test, last_train, last_dev, last_test] = normalize_feature(fea_train,
                                                                                   [fea_dev, fea_test, 
                                                                                    last_train, last_dev,
                                                                                    last_test])

In [7]:
# record mean after normalization 
x_mean_aft_nor = np.nanmean(fea_train, axis=0)

In [8]:
# control experiment using last observed value for missing data imputation 
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
from torch.autograd import Variable, grad
from torch.optim.lr_scheduler import ReduceLROnPlateau
import math

In [9]:
class MFN(nn.Module):
    def __init__(self,config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig):
        super(MFN, self).__init__()
        [self.d_l,self.d_a] = config["input_dims"]
        [self.dh_l,self.dh_a] = config["h_dims"]
        total_h_dim = self.dh_l+self.dh_a
        
        self.mem_dim = config["memsize"]
        window_dim = config["windowsize"]
        output_dim = 1
        attInShape = total_h_dim*window_dim
        gammaInShape = attInShape+self.mem_dim
        final_out = total_h_dim+self.mem_dim
        h_att1 = NN1Config["shapes"]
        h_att2 = NN2Config["shapes"]
        h_gamma1 = gamma1Config["shapes"]
        h_gamma2 = gamma2Config["shapes"]
        h_out = outConfig["shapes"]
        att1_dropout = NN1Config["drop"]
        att2_dropout = NN2Config["drop"]
        gamma1_dropout = gamma1Config["drop"]
        gamma2_dropout = gamma2Config["drop"]
        out_dropout = outConfig["drop"]

        self.lstm_l = nn.LSTMCell(self.d_l, self.dh_l)
        self.lstm_a = nn.LSTMCell(self.d_a, self.dh_a)

        self.att1_fc1 = nn.Linear(attInShape, h_att1)
        self.att1_fc2 = nn.Linear(h_att1, attInShape)
        self.att1_dropout = nn.Dropout(att1_dropout)

        self.att2_fc1 = nn.Linear(attInShape, h_att2)
        self.att2_fc2 = nn.Linear(h_att2, self.mem_dim)
        self.att2_dropout = nn.Dropout(att2_dropout)

        self.gamma1_fc1 = nn.Linear(gammaInShape, h_gamma1)
        self.gamma1_fc2 = nn.Linear(h_gamma1, self.mem_dim)
        self.gamma1_dropout = nn.Dropout(gamma1_dropout)

        self.gamma2_fc1 = nn.Linear(gammaInShape, h_gamma2)
        self.gamma2_fc2 = nn.Linear(h_gamma2, self.mem_dim)
        self.gamma2_dropout = nn.Dropout(gamma2_dropout)

        self.out_fc1 = nn.Linear(final_out, h_out)
        self.out_fc2 = nn.Linear(h_out, output_dim)
        self.out_dropout = nn.Dropout(out_dropout)

    def forward(self,x):
        x_l = x[:,:,:self.d_l]
        x_a = x[:,:,self.d_l:self.d_l+self.d_a]
        # x is t x n x d
        n = x.shape[1]
        t = x.shape[0]
        self.h_l = torch.zeros(n, self.dh_l)
        self.h_a = torch.zeros(n, self.dh_a)

        self.c_l = torch.zeros(n, self.dh_l)
        self.c_a = torch.zeros(n, self.dh_a)
        
        self.mem = torch.zeros(n, self.mem_dim)
        all_h_ls = []
        all_h_as = []

        all_c_ls = []
        all_c_as = []

        all_mems = []
        for i in range(t):
            # prev time step
            prev_c_l = self.c_l
            prev_c_a = self.c_a

            # curr time step
            new_h_l, new_c_l = self.lstm_l(x_l[i], (self.h_l, self.c_l))
            new_h_a, new_c_a = self.lstm_a(x_a[i], (self.h_a, self.c_a))
   
            # concatenate
            prev_cs = torch.cat([prev_c_l,prev_c_a], dim=1)
            new_cs = torch.cat([new_c_l,new_c_a], dim=1)
            
            cStar = torch.cat([prev_cs,new_cs], dim=1)
            attention = F.softmax(self.att1_fc2(self.att1_dropout(F.relu(self.att1_fc1(cStar)))),dim=1)
            attended = attention*cStar
            
            cHat = F.tanh(self.att2_fc2(self.att2_dropout(F.relu(self.att2_fc1(attended)))))
            
            both = torch.cat([attended,self.mem], dim=1)
            gamma1 = F.sigmoid(self.gamma1_fc2(self.gamma1_dropout(F.relu(self.gamma1_fc1(both)))))
            gamma2 = F.sigmoid(self.gamma2_fc2(self.gamma2_dropout(F.relu(self.gamma2_fc1(both)))))
            
            self.mem = gamma1*self.mem + gamma2*cHat
            all_mems.append(self.mem)
            # update
            self.h_l, self.c_l = new_h_l, new_c_l
            self.h_a, self.c_a = new_h_a, new_c_a

            all_h_ls.append(self.h_l)
            all_h_as.append(self.h_a)
 
            all_c_ls.append(self.c_l)
            all_c_as.append(self.c_a)

        # last hidden layer last_hs is n x h
        last_h_l = all_h_ls[-1]
        last_h_a = all_h_as[-1]

        last_mem = all_mems[-1]
        last_hs = torch.cat([last_h_l,last_h_a,last_mem], dim=1)
        output = self.out_fc2(self.out_dropout(F.relu(self.out_fc1(last_hs))))
        return output

In [15]:
def train_mfn(X_train, y_train, X_valid, y_valid, X_test, y_test, configs):
#     p = np.random.permutation(X_train.shape[0])
    # no shuffle, keep original order 
    # swap axes for back propagation 
    def swap_axes(nparr):
        return nparr.swapaxes(0,1)
    X_train = swap_axes(X_train)
    X_valid = swap_axes(X_valid)
    X_test = swap_axes(X_test)
    
    # model parameters 
    input_size = X_train.shape[2]
    h = 128
    t = X_train.shape[0]
    output_dim = 1
    dropout = 0.5

#     d = X_train.shape[2]
#     h = 128
#     t = X_train.shape[0]
#     output_dim = 1
#     dropout = 0.5

    [config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig] = configs

    
    #model = EFLSTM(d,h,output_dim,dropout)
    model = MFN(config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig)

    optimizer = optim.Adam(model.parameters(),lr=config["lr"])
    #optimizer = optim.SGD(model.parameters(),lr=config["lr"],momentum=config["momentum"])

    # optimizer = optim.SGD([
    #                 {'params':model.lstm_l.parameters(), 'lr':config["lr"]},
    #                 {'params':model.classifier.parameters(), 'lr':config["lr"]}
    #             ], momentum=0.9)

    criterion = nn.MSELoss()
    device = torch.device('cpu')
    model = model.to(device)
    criterion = criterion.to(device)
    scheduler = ReduceLROnPlateau(optimizer, mode="min", patience=10, factor=0.5, verbose=True)
    
#     criterion = nn.L1Loss()
#     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#     model = model.to(device)
#     criterion = criterion.to(device)
#     scheduler = ReduceLROnPlateau(optimizer,mode='min',patience=100,factor=0.5,verbose=True)

    def train(model, batchsize, X_train, y_train, optimizer, criterion):
        epoch_loss = 0
        model.train()
        total_n = X_train.shape[1]
        num_batches = math.ceil(total_n / batchsize)
        for batch in range(num_batches):
            start = batch*batchsize
            end = (batch+1)*batchsize
            optimizer.zero_grad()
            batch_X = torch.Tensor(X_train[:,start:end])
            batch_y = torch.Tensor(y_train[start:end])
            predictions = model.forward(batch_X).squeeze(1)
            loss = criterion(predictions, batch_y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        return epoch_loss / num_batches

    def evaluate(model, X_valid, y_valid, criterion):
        epoch_loss = 0
        model.eval()
        with torch.no_grad():
            batch_X = torch.Tensor(X_valid)
            batch_y = torch.Tensor(y_valid)
            predictions = model.forward(batch_X).squeeze(1)
            epoch_loss = criterion(predictions, batch_y).item()
        return epoch_loss

    def predict(model, X_test):
        epoch_loss = 0
        model.eval()
        with torch.no_grad():
            batch_X = torch.Tensor(X_test)
            predictions = model.forward(batch_X).squeeze(1)
            predictions = predictions.cpu().data.numpy()
        return predictions

    best_valid = 999999.0
    rand = random.randint(0,100000)
    print('epoch train_loss valid_loss test_loss')
    for epoch in range(config["num_epochs"]):
        train_loss = train(model, config["batchsize"], X_train, y_train, optimizer, criterion)
        valid_loss = evaluate(model, X_valid, y_valid, criterion)
        test_loss = evaluate(model, X_test, y_test, criterion)
        scheduler.step(valid_loss)
        if valid_loss <= best_valid:
            # save model
            best_valid = valid_loss
            print(epoch, train_loss, valid_loss, test_loss, 'saving model')
            torch.save(model, 'models/temp_models/mfn_%d.pt' %rand)
        else:
            print(epoch, train_loss, valid_loss, test_loss)

#     print 'model number is:', rand
    model = torch.load('models/temp_models/mfn_%d.pt' %rand)

    predictions = predict(model, X_test)
    mae = np.mean(np.absolute(predictions-y_test))
    print("mae: ", mae)
    mse = np.mean((predictions - y_test)**2)
    print("mse: ", mse)

In [20]:
config = dict()
config["input_dims"] = [5, 47]
hl = 256
ha = 256
drop = 0.7
config["h_dims"] = [hl, ha]
config["memsize"] = hl
config["windowsize"] = 2
config["batchsize"] = hl
config["num_epochs"] = 50
config["lr"] = 0.0005
NN1Config = dict()
NN1Config["shapes"] = hl
NN1Config["drop"] = drop
NN2Config = dict()
NN2Config["shapes"] = 32
NN2Config["drop"] = drop
gamma1Config = dict()
gamma1Config["shapes"] = hl
gamma1Config["drop"] = drop
gamma2Config = dict()
gamma2Config["shapes"] = hl 
gamma2Config["drop"] = drop
outConfig = dict() 
outConfig["shapes"] = hl
outConfig["drop"] = drop
configs = [config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig]

seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_mfn(last_train, label_train, last_dev, label_dev, last_test, label_test, configs)

epoch train_loss valid_loss test_loss
0 140.88166046142578 111.26560974121094 89.40806579589844 saving model
1 136.88693491617838 107.45386505126953 86.0100326538086 saving model
2 132.5866724650065 102.74201965332031 81.72261047363281 saving model
3 127.05015055338542 96.2884521484375 75.81144714355469 saving model
4 118.77826182047527 86.91610717773438 67.29901123046875 saving model
5 107.15186564127605 72.70096588134766 54.58787536621094 saving model
6 89.29688262939453 51.30961227416992 36.29313659667969 saving model
7 62.16184743245443 25.88995933532715 18.863813400268555 saving model
8 37.9138126373291 26.237396240234375 37.65473937988281
9 39.766815185546875 36.205902099609375 54.5194206237793
10 35.47192128499349 21.870532989501953 29.35205841064453 saving model
11 26.16613833109538 18.805349349975586 18.300262451171875 saving model
12 26.37489954630534 20.234281539916992 16.827665328979492
13 28.813406626383465 19.88355255126953 16.761201858520508
14 28.116995493570965 18.7653

In [19]:
config = dict()
config["input_dims"] = [5, 47]
hl = 256
ha = 256
drop = 0.7
config["h_dims"] = [hl, ha]
config["memsize"] = hl
config["windowsize"] = 2
config["batchsize"] = hl
config["num_epochs"] = 50
config["lr"] = 0.001
NN1Config = dict()
NN1Config["shapes"] = hl
NN1Config["drop"] = drop
NN2Config = dict()
NN2Config["shapes"] = 32
NN2Config["drop"] = drop
gamma1Config = dict()
gamma1Config["shapes"] = hl
gamma1Config["drop"] = drop
gamma2Config = dict()
gamma2Config["shapes"] = hl 
gamma2Config["drop"] = drop
outConfig = dict() 
outConfig["shapes"] = hl
outConfig["drop"] = drop
configs = [config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig]

seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_mfn(last_train, label_train, last_dev, label_dev, last_test, label_test, configs)

epoch train_loss valid_loss test_loss
0 139.72117360432944 107.54705810546875 86.13550567626953 saving model
1 131.24529774983725 97.54459381103516 76.99809265136719 saving model
2 118.00240580240886 79.36834716796875 60.548675537109375 saving model
3 93.41127522786458 45.00913619995117 31.297805786132812 saving model
4 50.83070500691732 23.95153045654297 33.15581130981445 saving model
5 47.50562349955241 36.789405822753906 54.63804244995117
6 30.936551411946613 19.174989700317383 19.545486450195312 saving model
7 27.02692222595215 22.424211502075195 17.118722915649414
8 32.8072083791097 21.796279907226562 16.806575775146484
9 29.349951426188152 18.90011215209961 17.3961124420166 saving model
10 25.77461878458659 20.06362533569336 23.018266677856445
11 26.367183685302734 21.279848098754883 25.095792770385742
12 24.068212509155273 19.32607078552246 19.757123947143555
13 23.675694783528645 19.46296501159668 16.933671951293945
14 24.992013931274414 20.04755401611328 16.514951705932617
15 

In [13]:
config = dict()
config["input_dims"] = [5, 47]
hl = 256
ha = 256
drop = 0.7
config["h_dims"] = [hl, ha]
config["memsize"] = hl
config["windowsize"] = 2
config["batchsize"] = hl
config["num_epochs"] = 150
config["lr"] = 0.001
NN1Config = dict()
NN1Config["shapes"] = hl
NN1Config["drop"] = drop
NN2Config = dict()
NN2Config["shapes"] = 32
NN2Config["drop"] = drop
gamma1Config = dict()
gamma1Config["shapes"] = hl
gamma1Config["drop"] = drop
gamma2Config = dict()
gamma2Config["shapes"] = hl 
gamma2Config["drop"] = drop
outConfig = dict() 
outConfig["shapes"] = hl
outConfig["drop"] = drop
configs = [config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig]

seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_mfn(last_train, label_train, last_dev, label_dev, last_test, label_test, configs)

epoch train_loss valid_loss
0 139.72117360432944 107.54705810546875 saving model
1 131.24529774983725 97.54459381103516 saving model
2 118.00240580240886 79.36834716796875 saving model
3 93.41127522786458 45.00913619995117 saving model
4 50.83070500691732 23.95153045654297 saving model
5 47.50562349955241 36.789405822753906
6 30.936551411946613 19.174989700317383 saving model
7 27.02692222595215 22.424211502075195
8 32.8072083791097 21.796279907226562
9 29.349951426188152 18.90011215209961 saving model
10 25.77461878458659 20.06362533569336
11 26.367183685302734 21.279848098754883
12 24.068212509155273 19.32607078552246
13 23.675694783528645 19.46296501159668
14 24.992013931274414 20.04755401611328
15 24.89605967203776 19.78915023803711
16 25.064217885335285 19.91752052307129
17 22.890396118164062 20.342605590820312
18 23.806041081746418 20.133739471435547
19 23.821678161621094 20.165884017944336
Epoch    21: reducing learning rate of group 0 to 5.0000e-04.
20 23.063982645670574 20.271

In [11]:
last_train.shape

(664, 7, 52)