In [1]:
import os 
os.chdir(os.path.pardir)
# load data from file 
import numpy as np 
save_file_name = ['fea_seq.npy', 'last_observation_seq.npy', 'label_seq.npy', 'masking_seq.npy',
                   'delta_seq.npy', 'train_valid_test_split.npy']
save_folder = 'data/raw/pol_temp_rh'
saved_arrays = []
for file_name in save_file_name:
    saved_arrays.append(np.load(os.path.join(save_folder, file_name)))
[fea_seq, last_observation_seq, label_seq, masking_seq, delta_seq, train_valid_test_split] = saved_arrays


In [2]:
# train-test-split 
train_index = [k for k in range(train_valid_test_split[0])]
dev_index = [k for k in range(train_valid_test_split[0], 
                               train_valid_test_split[0] + train_valid_test_split[1])]
test_index = [k for k in range(train_valid_test_split[0] + train_valid_test_split[1],
              train_valid_test_split[0] + train_valid_test_split[1] + train_valid_test_split[2])]

In [3]:
def get_array_by_index_range(nparray_list, label_array, index_range):
    '''
    nparray_list: list of nparrays to select according to index range 
    label_array: select the labels from label array
    '''
    # get non-na index
    non_na_index = []
    for index in index_range:
        if not np.isnan(label_array[index]):
            non_na_index.append(index)
    
    return [k[non_na_index] for k in nparray_list], label_array[non_na_index].reshape(-1)

In [4]:
# split set to train, test and dev sets 
# train set
[fea_train, last_train], label_train =  get_array_by_index_range([fea_seq,last_observation_seq], label_seq, train_index)
# dev set 
[fea_dev, last_dev], label_dev =  get_array_by_index_range([fea_seq, last_observation_seq], label_seq, dev_index)
# test set 
[fea_test, last_test], label_test =  get_array_by_index_range([fea_seq, last_observation_seq], label_seq, test_index)

In [5]:
def normalize_feature(fea_train, array_list):
    """
    array_list: [fea_dev, fea_test, last_train, last_dev, last_test] to normalize 
    """
    train_mean = np.nanmean(fea_train, axis=0)
    train_std = np.nanstd(fea_train, axis=0)
    def norm_arr(nparr):
        return(nparr - train_mean)/train_std
    return (norm_arr(fea_train), [norm_arr(k) for k in array_list])

In [6]:
fea_train, [fea_dev, fea_test, last_train, last_dev, last_test] = normalize_feature(fea_train,
                                                                                   [fea_dev, fea_test, 
                                                                                    last_train, last_dev,
                                                                                    last_test])

In [7]:
# record mean after normalization 
x_mean_aft_nor = np.nanmean(fea_train, axis=0)

In [8]:
# control experiment using last observed value for missing data imputation 
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
from torch.autograd import Variable, grad
from torch.optim.lr_scheduler import ReduceLROnPlateau
import math

In [9]:
class MFN(nn.Module):
    def __init__(self,config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig):
        super(MFN, self).__init__()
        [self.d_l,self.d_a] = config["input_dims"]
        [self.dh_l,self.dh_a] = config["h_dims"]
        total_h_dim = self.dh_l+self.dh_a
        
        self.mem_dim = config["memsize"]
        window_dim = config["windowsize"]
        output_dim = 1
        attInShape = total_h_dim*window_dim
        gammaInShape = attInShape+self.mem_dim
        final_out = total_h_dim+self.mem_dim
        h_att1 = NN1Config["shapes"]
        h_att2 = NN2Config["shapes"]
        h_gamma1 = gamma1Config["shapes"]
        h_gamma2 = gamma2Config["shapes"]
        h_out = outConfig["shapes"]
        att1_dropout = NN1Config["drop"]
        att2_dropout = NN2Config["drop"]
        gamma1_dropout = gamma1Config["drop"]
        gamma2_dropout = gamma2Config["drop"]
        out_dropout = outConfig["drop"]

        self.lstm_l = nn.LSTMCell(self.d_l, self.dh_l)
        self.lstm_a = nn.LSTMCell(self.d_a, self.dh_a)

        self.att1_fc1 = nn.Linear(attInShape, h_att1)
        self.att1_fc2 = nn.Linear(h_att1, attInShape)
        self.att1_dropout = nn.Dropout(att1_dropout)

        self.att2_fc1 = nn.Linear(attInShape, h_att2)
        self.att2_fc2 = nn.Linear(h_att2, self.mem_dim)
        self.att2_dropout = nn.Dropout(att2_dropout)

        self.gamma1_fc1 = nn.Linear(gammaInShape, h_gamma1)
        self.gamma1_fc2 = nn.Linear(h_gamma1, self.mem_dim)
        self.gamma1_dropout = nn.Dropout(gamma1_dropout)

        self.gamma2_fc1 = nn.Linear(gammaInShape, h_gamma2)
        self.gamma2_fc2 = nn.Linear(h_gamma2, self.mem_dim)
        self.gamma2_dropout = nn.Dropout(gamma2_dropout)

        self.out_fc1 = nn.Linear(final_out, h_out)
        self.out_fc2 = nn.Linear(h_out, output_dim)
        self.out_dropout = nn.Dropout(out_dropout)

    def forward(self,x):
        x_l = x[:,:,:self.d_l]
        x_a = x[:,:,self.d_l:self.d_l+self.d_a]
        # x is t x n x d
        n = x.shape[1]
        t = x.shape[0]
        self.h_l = torch.zeros(n, self.dh_l)
        self.h_a = torch.zeros(n, self.dh_a)

        self.c_l = torch.zeros(n, self.dh_l)
        self.c_a = torch.zeros(n, self.dh_a)
        
        self.mem = torch.zeros(n, self.mem_dim)
        all_h_ls = []
        all_h_as = []

        all_c_ls = []
        all_c_as = []

        all_mems = []
        for i in range(t):
            # prev time step
            prev_c_l = self.c_l
            prev_c_a = self.c_a

            # curr time step
            new_h_l, new_c_l = self.lstm_l(x_l[i], (self.h_l, self.c_l))
            new_h_a, new_c_a = self.lstm_a(x_a[i], (self.h_a, self.c_a))
   
            # concatenate
            prev_cs = torch.cat([prev_c_l,prev_c_a], dim=1)
            new_cs = torch.cat([new_c_l,new_c_a], dim=1)
            
            cStar = torch.cat([prev_cs,new_cs], dim=1)
            attention = F.softmax(self.att1_fc2(self.att1_dropout(F.relu(self.att1_fc1(cStar)))),dim=1)
            attended = attention*cStar
            
            cHat = F.tanh(self.att2_fc2(self.att2_dropout(F.relu(self.att2_fc1(attended)))))
            
            both = torch.cat([attended,self.mem], dim=1)
            gamma1 = F.sigmoid(self.gamma1_fc2(self.gamma1_dropout(F.relu(self.gamma1_fc1(both)))))
            gamma2 = F.sigmoid(self.gamma2_fc2(self.gamma2_dropout(F.relu(self.gamma2_fc1(both)))))
            
            self.mem = gamma1*self.mem + gamma2*cHat
            all_mems.append(self.mem)
            # update
            self.h_l, self.c_l = new_h_l, new_c_l
            self.h_a, self.c_a = new_h_a, new_c_a

            all_h_ls.append(self.h_l)
            all_h_as.append(self.h_a)
 
            all_c_ls.append(self.c_l)
            all_c_as.append(self.c_a)

        # last hidden layer last_hs is n x h
        last_h_l = all_h_ls[-1]
        last_h_a = all_h_as[-1]

        last_mem = all_mems[-1]
        last_hs = torch.cat([last_h_l,last_h_a,last_mem], dim=1)
        output = self.out_fc2(self.out_dropout(F.relu(self.out_fc1(last_hs))))
        return output

In [10]:
fea_train.shape

(664, 7, 5)

In [11]:
def train_mfn(X_train, y_train, X_valid, y_valid, X_test, y_test, configs):
#     p = np.random.permutation(X_train.shape[0])
    # no shuffle, keep original order 
    # swap axes for back propagation 
    def swap_axes(nparr):
        return nparr.swapaxes(0,1)
    X_train = swap_axes(X_train)
    X_valid = swap_axes(X_valid)
    X_test = swap_axes(X_test)
    
    # model parameters 
    input_size = X_train.shape[2]
    h = 128
    t = X_train.shape[0]
    output_dim = 1
    dropout = 0.5

#     d = X_train.shape[2]
#     h = 128
#     t = X_train.shape[0]
#     output_dim = 1
#     dropout = 0.5

    [config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig] = configs

    
    #model = EFLSTM(d,h,output_dim,dropout)
    model = MFN(config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig)

    optimizer = optim.Adam(model.parameters(),lr=config["lr"])
    #optimizer = optim.SGD(model.parameters(),lr=config["lr"],momentum=config["momentum"])

    # optimizer = optim.SGD([
    #                 {'params':model.lstm_l.parameters(), 'lr':config["lr"]},
    #                 {'params':model.classifier.parameters(), 'lr':config["lr"]}
    #             ], momentum=0.9)

    criterion = nn.MSELoss()
    device = torch.device('cpu')
    model = model.to(device)
    criterion = criterion.to(device)
    scheduler = ReduceLROnPlateau(optimizer, mode="min", patience=10, factor=0.5, verbose=True)
    
#     criterion = nn.L1Loss()
#     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#     model = model.to(device)
#     criterion = criterion.to(device)
#     scheduler = ReduceLROnPlateau(optimizer,mode='min',patience=100,factor=0.5,verbose=True)

    def train(model, batchsize, X_train, y_train, optimizer, criterion):
        epoch_loss = 0
        model.train()
        total_n = X_train.shape[1]
        num_batches = math.ceil(total_n / batchsize)
        for batch in range(num_batches):
            start = batch*batchsize
            end = (batch+1)*batchsize
            optimizer.zero_grad()
            batch_X = torch.Tensor(X_train[:,start:end])
            batch_y = torch.Tensor(y_train[start:end])
            predictions = model.forward(batch_X).squeeze(1)
            loss = criterion(predictions, batch_y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        return epoch_loss / num_batches

    def evaluate(model, X_valid, y_valid, criterion):
        epoch_loss = 0
        model.eval()
        with torch.no_grad():
            batch_X = torch.Tensor(X_valid)
            batch_y = torch.Tensor(y_valid)
            predictions = model.forward(batch_X).squeeze(1)
            epoch_loss = criterion(predictions, batch_y).item()
        return epoch_loss

    def predict(model, X_test):
        epoch_loss = 0
        model.eval()
        with torch.no_grad():
            batch_X = torch.Tensor(X_test)
            predictions = model.forward(batch_X).squeeze(1)
            predictions = predictions.cpu().data.numpy()
        return predictions

    best_valid = 999999.0
    rand = random.randint(0,100000)
    print('epoch train_loss valid_loss')
    for epoch in range(config["num_epochs"]):
        train_loss = train(model, config["batchsize"], X_train, y_train, optimizer, criterion)
        valid_loss = evaluate(model, X_valid, y_valid, criterion)
        scheduler.step(valid_loss)
        if valid_loss <= best_valid:
            # save model
            best_valid = valid_loss
            print(epoch, train_loss, valid_loss, 'saving model')
            torch.save(model, 'models/temp_models/mfn_%d.pt' %rand)
        else:
            print(epoch, train_loss, valid_loss)

#     print 'model number is:', rand
    model = torch.load('models/temp_models/mfn_%d.pt' %rand)

    predictions = predict(model, X_test)
    mae = np.mean(np.absolute(predictions-y_test))
    print("mae: ", mae)
    mse = np.mean((predictions - y_test)**2)
    print("mse: ", mse)

In [58]:
fea_dev.shape

(118, 7, 5)

In [59]:
fea_test.shape

(121, 7, 5)

In [13]:
config = dict()
config["input_dims"] = [1, 4]
hl = 256
ha = 256
drop = 0.7
config["h_dims"] = [hl, ha]
config["memsize"] = hl
config["windowsize"] = 2
config["batchsize"] = hl
config["num_epochs"] = 50
config["lr"] = 0.0001
NN1Config = dict()
NN1Config["shapes"] = hl
NN1Config["drop"] = drop
NN2Config = dict()
NN2Config["shapes"] = 32
NN2Config["drop"] = drop
gamma1Config = dict()
gamma1Config["shapes"] = hl
gamma1Config["drop"] = drop
gamma2Config = dict()
gamma2Config["shapes"] = hl 
gamma2Config["drop"] = drop
outConfig = dict() 
outConfig["shapes"] = hl
outConfig["drop"] = drop
configs = [config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig]

seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_mfn(last_train, label_train, last_dev, label_dev, last_test, label_test, configs)

epoch train_loss valid_loss
0 138.94296010335287 111.36531829833984 saving model
1 137.82977294921875 110.49248504638672 saving model
2 137.01726786295572 109.66050720214844 saving model
3 136.0467071533203 108.86024475097656 saving model
4 135.12869771321616 108.07041931152344 saving model
5 134.25636291503906 107.2625961303711 saving model
6 133.40024058024088 106.4128189086914 saving model
7 132.40770975748697 105.5063705444336 saving model
8 131.41480509440103 104.52982330322266 saving model
9 130.3074213663737 103.48682403564453 saving model
10 129.0336430867513 102.34229278564453 saving model
11 127.77091979980469 101.06938934326172 saving model
12 126.33504740397136 99.64335632324219 saving model
13 124.54395548502605 98.02600860595703 saving model
14 122.75349934895833 96.17029571533203 saving model
15 120.59367116292317 94.01270294189453 saving model
16 118.17789967854817 91.48675537109375 saving model
17 114.96636199951172 88.4975814819336 saving model
18 111.62861887613933 8

In [12]:
config = dict()
config["input_dims"] = [1, 4]
hl = 128
ha = 128
drop = 0.7
config["h_dims"] = [hl, ha]
config["memsize"] = hl
config["windowsize"] = 2
config["batchsize"] = hl
config["num_epochs"] = 50
config["lr"] = 0.0001
NN1Config = dict()
NN1Config["shapes"] = hl
NN1Config["drop"] = drop
NN2Config = dict()
NN2Config["shapes"] = 32
NN2Config["drop"] = drop
gamma1Config = dict()
gamma1Config["shapes"] = hl
gamma1Config["drop"] = drop
gamma2Config = dict()
gamma2Config["shapes"] = hl 
gamma2Config["drop"] = drop
outConfig = dict() 
outConfig["shapes"] = hl
outConfig["drop"] = drop
configs = [config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig]

seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_mfn(last_train, label_train, last_dev, label_dev, last_test, label_test, configs)

epoch train_loss valid_loss




0 156.11405563354492 112.65887451171875 saving model


  "type " + obj.__name__ + ". It won't be checked "


1 155.20471954345703 111.86585235595703 saving model
2 154.27276102701822 111.05146026611328 saving model
3 153.34173075358072 110.19532775878906 saving model
4 152.46337381998697 109.27770233154297 saving model
5 151.3701604207357 108.2803726196289 saving model
6 150.23250579833984 107.18980407714844 saving model
7 148.7600466410319 105.98526000976562 saving model
8 147.48532740275064 104.63453674316406 saving model
9 145.8607610066732 103.10432434082031 saving model
10 143.8507563273112 101.32836151123047 saving model
11 141.74614461263022 99.2261962890625 saving model
12 139.60418319702148 96.70524597167969 saving model
13 135.94170888264975 93.63931274414062 saving model
14 132.91644287109375 89.83513641357422 saving model
15 128.44332122802734 85.05644226074219 saving model
16 121.9296391805013 78.93667602539062 saving model
17 113.12656720479329 71.0453872680664 saving model
18 104.2041409810384 61.046142578125 saving model
19 92.30810991923015 49.000938415527344 saving model
20 

In [57]:
config = dict()
config["input_dims"] = [1, 4]
hl = 64
ha = 32
config["h_dims"] = [hl, ha]
config["memsize"] = 64
config["windowsize"] = 2
config["batchsize"] = 16
config["num_epochs"] = 50
config["lr"] = 0.0001
NN1Config = dict()
NN1Config["shapes"] = 32
NN1Config["drop"] = 0.7
NN2Config = dict()
NN2Config["shapes"] = 32
NN2Config["drop"] = 0.7
gamma1Config = dict()
gamma1Config["shapes"] = 32
gamma1Config["drop"] = 0.7
gamma2Config = dict()
gamma2Config["shapes"] = 32 
gamma2Config["drop"] = 0.7
outConfig = dict() 
outConfig["shapes"] = 32
outConfig["drop"] = 0.7
configs = [config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig]

seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_mfn(last_train, label_train, last_dev, label_dev, last_test, label_test, configs)

epoch train_loss valid_loss
0 141.46016729445685 111.05241394042969 saving model
1 139.5429970877511 109.15363311767578 saving model
2 137.1174102056594 106.47740936279297 saving model
3 133.2586486453102 102.0577163696289 saving model
4 126.23620950608026 92.49103546142578 saving model
5 109.34628759111676 65.91061401367188 saving model
6 75.19069113050189 32.051841735839844 saving model
7 47.68678733280727 18.42310905456543 saving model
8 40.83152021680559 17.621511459350586 saving model
9 39.16866366068522 17.819971084594727
10 38.2137383052281 17.699769973754883
11 40.398299512409025 17.7195987701416
12 39.65765592030117 17.71005630493164
13 39.91312195005871 17.810375213623047
14 40.267848764147075 17.874692916870117
15 39.71537649063837 17.826465606689453
16 37.678475879487536 18.002872467041016
17 38.408209755307155 17.90630531311035
18 38.586224260784334 17.899261474609375
Epoch    20: reducing learning rate of group 0 to 5.0000e-05.
19 39.1255780401684 17.900102615356445
20 37

In [56]:
config = dict()
config["input_dims"] = [1, 4]
hl = 64
ha = 32
config["h_dims"] = [hl, ha]
config["memsize"] = 64
config["windowsize"] = 2
config["batchsize"] = 32
config["num_epochs"] = 50
config["lr"] = 0.0001
NN1Config = dict()
NN1Config["shapes"] = 32
NN1Config["drop"] = 0.5
NN2Config = dict()
NN2Config["shapes"] = 32
NN2Config["drop"] = 0.5
gamma1Config = dict()
gamma1Config["shapes"] = 32
gamma1Config["drop"] = 0.5
gamma2Config = dict()
gamma2Config["shapes"] = 32 
gamma2Config["drop"] = 0.5
outConfig = dict() 
outConfig["shapes"] = 32
outConfig["drop"] = 0.5
configs = [config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig]

seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_mfn(last_train, label_train, last_dev, label_dev, last_test, label_test, configs)

epoch train_loss valid_loss




0 143.4269805181594 111.73497772216797 saving model


  "type " + obj.__name__ + ". It won't be checked "


1 142.4031746273949 110.84442901611328 saving model
2 141.34705861409506 109.79991912841797 saving model
3 140.01563444591704 108.49638366699219 saving model
4 138.56229727608817 106.8376235961914 saving model
5 136.42176419212706 104.7589111328125 saving model
6 133.8928749447777 102.01949310302734 saving model
7 130.19739786783853 97.88046264648438 saving model
8 124.62183053152901 91.02835083007812 saving model
9 114.91846320742653 79.38703918457031 saving model
10 99.70678529285249 61.57096862792969 saving model
11 79.85024107070197 41.91075134277344 saving model
12 57.072988146827335 26.158111572265625 saving model
13 42.631982440040225 18.200637817382812 saving model
14 35.34261085873558 17.902345657348633 saving model
15 32.36609377179827 18.72093963623047
16 31.483746755690802 18.558666229248047
17 33.00557127453032 18.6918888092041
18 33.97688620431082 18.692129135131836
19 31.181169237409318 18.46806526184082
20 30.53945055462065 18.52680015563965
21 31.976759365626744 18.634

In [55]:
last_train[0]

array([[ 0.21854222,  0.49295122,  0.12868645, -0.46155388, -0.30762506],
       [-0.24856643,  0.7073676 , -0.24226635, -0.46902672, -0.91637258],
       [-0.71349436,  0.52921296, -0.06427684,  0.07079409, -0.11530977],
       [-0.53756466,  0.6018833 ,  0.43019764,  0.00278131,  0.15688786],
       [-0.72005529,  0.58860501, -0.07057453, -0.60066422, -0.62825199],
       [-2.0939877 ,  0.80690814, -0.40207978, -0.4524199 , -0.68732983],
       [-1.21070575,  0.49426344, -1.34088481, -0.45297472, -0.48903942]])

In [52]:
config = dict()
config["input_dims"] = [1, 4]
hl = 32
ha = 32
config["h_dims"] = [hl, ha]
config["memsize"] = 64
config["windowsize"] = 2
config["batchsize"] = 32
config["num_epochs"] = 50
config["lr"] = 0.0001
NN1Config = dict()
NN1Config["shapes"] = 32
NN1Config["drop"] = 0.5
NN2Config = dict()
NN2Config["shapes"] = 32
NN2Config["drop"] = 0.5
gamma1Config = dict()
gamma1Config["shapes"] = 32
gamma1Config["drop"] = 0.5
gamma2Config = dict()
gamma2Config["shapes"] = 32 
gamma2Config["drop"] = 0.5
outConfig = dict() 
outConfig["shapes"] = 32
outConfig["drop"] = 0.5
configs = [config,NN1Config,NN2Config,gamma1Config,gamma2Config,outConfig]

seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_mfn(last_train, label_train, last_dev, label_dev, last_test, label_test, configs)

epoch train_loss valid_loss




0 148.02859606061662 115.98423767089844 saving model


  "type " + obj.__name__ + ". It won't be checked "


1 147.50207846505302 115.56013488769531 saving model
2 147.06520044235955 115.17402648925781 saving model
3 146.61236935570128 114.72411346435547 saving model
4 146.07836986723402 114.17180633544922 saving model
5 145.40250723702567 113.51115417480469 saving model
6 144.6554728916713 112.66338348388672 saving model
7 143.56147112165178 111.53781127929688 saving model
8 142.12942722865515 110.00544738769531 saving model
9 140.02900986444382 107.83295440673828 saving model
10 137.35363151913597 104.84867095947266 saving model
11 134.32220241001673 100.68287658691406 saving model
12 128.79708099365234 94.39310455322266 saving model
13 121.39551035563152 84.52606201171875 saving model
14 108.39922986711774 68.81757354736328 saving model
15 89.18752688453311 46.43396759033203 saving model
16 67.97932406834194 25.161245346069336 saving model
17 51.22806340172177 17.280485153198242 saving model
18 41.087117513020836 18.57951545715332
19 43.430546261015394 18.96548843383789
20 38.3778433118547