In [1]:
import os 
os.chdir(os.path.pardir)
# load data from file 
import numpy as np 
save_file_name = ['fea_seq.npy', 'last_observation_seq.npy', 'label_seq.npy', 'masking_seq.npy',
                   'delta_seq.npy', 'train_valid_test_split.npy']
save_folder = 'data/raw'
saved_arrays = []
for file_name in save_file_name:
    saved_arrays.append(np.load(os.path.join(save_folder, file_name)))
[fea_seq, last_observation_seq, label_seq, masking_seq, delta_seq, train_valid_test_split] = saved_arrays

In [2]:
# train-test-split 
train_index = [k for k in range(train_valid_test_split[0])]
dev_index = [k for k in range(train_valid_test_split[0], 
                               train_valid_test_split[0] + train_valid_test_split[1])]
test_index = [k for k in range(train_valid_test_split[0] + train_valid_test_split[1],
              train_valid_test_split[0] + train_valid_test_split[1] + train_valid_test_split[2])]

In [26]:
last_observation_seq[0]

array([[14.3],
       [14.3],
       [14.3],
       [ 9.7],
       [ 9.7],
       [ 9.7],
       [ 7.4]])

In [25]:
fea_seq[0]

array([[14.3],
       [ nan],
       [ nan],
       [ 9.7],
       [ nan],
       [ nan],
       [ 7.4]])

In [7]:
def get_array_by_index_range(nparray_list, label_array, index_range):
    '''
    nparray_list: list of nparrays to select according to index range 
    label_array: select the labels from label array
    '''
    # get non-na index
    non_na_index = []
    for index in index_range:
        if not np.isnan(label_array[index]):
            non_na_index.append(index)
    
    return [k[non_na_index] for k in nparray_list], label_array[non_na_index].reshape(-1)

In [27]:
# split set to train, test and dev sets 
# train set
[fea_train, last_train], label_train =  get_array_by_index_range([fea_seq,last_observation_seq], label_seq, train_index)
# dev set 
[fea_dev, last_dev], label_dev =  get_array_by_index_range([fea_seq, last_observation_seq], label_seq, dev_index)
# test set 
[fea_test, last_test], label_test =  get_array_by_index_range([fea_seq, last_observation_seq], label_seq, test_index)


In [28]:
fea_train[0]

array([[nan],
       [9.7],
       [nan],
       [nan],
       [7.4],
       [nan],
       [nan]])

In [29]:
def normalize_feature(fea_train, array_list):
    """
    array_list: [fea_dev, fea_test, last_train, last_dev, last_test] to normalize 
    """
    train_mean = np.nanmean(fea_train, axis=0)
    train_std = np.nanstd(fea_train, axis=0)
    def norm_arr(nparr):
        return(nparr - train_mean)/train_std
    return (norm_arr(fea_train), [norm_arr(k) for k in array_list])

In [30]:
fea_train, [fea_dev, fea_test, last_train, last_dev, last_test] = normalize_feature(fea_train,
                                                                                   [fea_dev, fea_test, 
                                                                                    last_train, last_dev,
                                                                                    last_test])

In [37]:
# record mean after normalization 
x_mean_aft_nor = np.nanmean(fea_train, axis=0)

In [44]:
# control experiment using last observed value for missing data imputation 
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
from torch.autograd import Variable, grad
from torch.optim.lr_scheduler import ReduceLROnPlateau
import math

In [45]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_dim, dropout):
        """
        input_size - the number of expected features in the input x
        hidden_size - the number of hidden units in state h
        """
        super(LSTM, self).__init__()
        self.h = hidden_size
        self.lstm = nn.LSTMCell(input_size, hidden_size)
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_dim)
        self.dropout = nn.Dropout(dropout)
    
    
    def forward(self, x):
        """
        x: shape (time_step, n_features)
        """
        t = x.shape[0]
        n = x.shape[1]
        self.hx = torch.zeros(n, self.h)
        self.cx = torch.zeros(n, self.h)
        all_hs = []
        all_cs = []
        # iterate through cells 
        for i in range(t):
            self.hx, self.cx = self.lstm(x[i], (self.hx, self.cx))
            all_hs.append(self.hx)
            all_cs.append(self.cx)
        # last hidden layer last_hs is n * h
        last_hs = all_hs[-1]
        output = F.relu(self.fc1(last_hs))
        output = self.dropout(output)
        output = self.fc2(output)
        return output
    
def train_lstm(X_train, y_train, X_valid, y_valid, X_test, y_test, config):
    # no shuffle, keep original order 
    # swap axes for back propagation 
    def swap_axes(nparr):
        return nparr.swapaxes(0,1)
    X_train = swap_axes(X_train)
    X_valid = swap_axes(X_valid)
    X_test = swap_axes(X_test)
    
    # model parameters
    input_size = X_train.shape[2]
    h = config["h"]
    t = X_train.shape[0]
    output_dim = 1
    dropout = config["drop"]
    
    model = LSTM(input_size, h, output_dim, dropout)
    
    optimizer = optim.Adam(model.parameters(), lr=config["lr"])

    criterion = nn.MSELoss()
    
    device = torch.device('cpu')
    model = model.to(device)
    criterion = criterion.to(device)
    scheduler = ReduceLROnPlateau(optimizer, mode="min", patience=10, factor=0.5, verbose=True)
    
    def train(model, batchsize, X_train, y_train, optimizer, criterion):
        epoch_loss = 0
        model.train()
        total_n = X_train.shape[1]
        num_batches = math.ceil(total_n / batchsize)
        for batch in range(num_batches):
            start = batch*batchsize
            end = (batch+1)*batchsize
            optimizer.zero_grad()
            batch_X = torch.Tensor(X_train[:, start:end])
            batch_y = torch.Tensor(y_train[start:end])
            predictions = model.forward(batch_X).squeeze(1)
            loss = criterion(predictions, batch_y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        return epoch_loss / num_batches 
    
    def evaluate(model, X_valid, y_valid, criterion):
        epoch_loss = 0
        model.eval()
        with torch.no_grad():
            batch_X = torch.Tensor(X_valid)
            batch_y = torch.Tensor(y_valid)
            predictions = model.forward(batch_X).squeeze(1)
            epoch_loss = criterion(predictions, batch_y).item()
        return epoch_loss

    def predict(model, X_test):
        epoch_loss = 0
        model.eval()
        with torch.no_grad():
            batch_X = torch.Tensor(X_test)
            predictions = model.forward(batch_X).squeeze(1)
            predictions = predictions.cpu().data.numpy()
        return predictions

    # timing
#     start_time = time.time()
#     predictions = predict(model, X_test)
#     print(predictions.shape)
#     print(predictions)
#     end_time = time.time()
#     print(end_time-start_time)
#     assert False
     
    best_valid = 999999.0
    rand = random.randint(0,100000)
    print('epoch train_loss valid_loss')
    for epoch in range(config["num_epochs"]):
        train_loss = train(model, config["batchsize"], X_train, y_train, optimizer, criterion)
        valid_loss = evaluate(model, X_valid, y_valid, criterion)
        scheduler.step(valid_loss)
        if valid_loss <= best_valid:
            # save model
            best_valid = valid_loss
            print(epoch, train_loss, valid_loss, 'saving model')
            torch.save(model, 'models/lstm_%d.pt' %rand)
        else:
            print(epoch, train_loss, valid_loss)

    model = torch.load('models/lstm_%d.pt' %rand)

    predictions = predict(model, X_test)
    mae = np.mean(np.absolute(predictions-y_test))
    print("mae: ", mae)
    mse = np.mean((predictions - y_test)**2)
    print("mse: ", mse)
#     corr = np.corrcoef(predictions,y_test)[0][1]
#     print("corr: ", corr)
#     true_label = (y_test >= 0)
#     sys.stdout.flush()

In [48]:
config = {'h':64, 'lr':0.0001, 'num_epochs':50, 'batchsize':32, 'drop':0.2}
seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_lstm(last_train, label_train, last_dev, label_dev, last_test, label_test, config)

epoch train_loss valid_loss
0 142.87357802618118 111.47429656982422 saving model
1 142.34687877836683 110.97308349609375 saving model
2 141.76615397135416 110.4107666015625 saving model
3 141.09373728434244 109.73616027832031 saving model
4 140.27906181698754 108.87891387939453 saving model
5 139.21773928687685 107.75979614257812 saving model
6 137.77382478259858 106.08363342285156 saving model
7 135.49786921909876 103.17031860351562 saving model
8 131.27459099179222 97.47034454345703 saving model
9 123.0447983514695 86.34819793701172 saving model
10 107.89436521984283 67.90489959716797 saving model
11 85.38680884951637 46.99235153198242 saving model
12 63.24389203389486 31.502910614013672 saving model
13 45.89570054553804 22.629810333251953 saving model
14 36.13741179874965 18.74222183227539 saving model
15 30.911359469095867 17.6124324798584 saving model
16 28.188276835850306 17.65468406677246
17 26.426051684788295 18.0338191986084
18 26.41553565434047 18.366445541381836
19 26.640444

In [53]:
config = {'h':32, 'lr':0.0001, 'num_epochs':50, 'batchsize':32, 'drop':0.0}
seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_lstm(last_train, label_train, last_dev, label_dev, last_test, label_test, config)

epoch train_loss valid_loss
0 143.43412453787667 112.07876586914062 saving model
1 142.94505600702195 111.62542724609375 saving model
2 142.4531231834775 111.1673812866211 saving model
3 141.94688996814546 110.69254302978516 saving model
4 141.41649482363746 110.18985748291016 saving model
5 140.84891146705263 109.6414794921875 saving model
6 140.23351542154947 109.06795501708984 saving model
7 139.60275195893786 108.4738540649414 saving model
8 138.90728142147972 107.7906494140625 saving model
9 138.0716073172433 106.92293548583984 saving model
10 136.97825513567244 105.73148345947266 saving model
11 135.4612750098819 104.03018188476562 saving model
12 133.2705096290225 101.46720886230469 saving model
13 130.0471885317848 97.89236450195312 saving model
14 125.5413803827195 92.89722442626953 saving model
15 119.30120159330822 86.22345733642578 saving model
16 111.24217551095145 78.21641540527344 saving model
17 101.92916924612862 69.75833892822266 saving model
18 92.32010741460891 61.6

In [51]:
config = {'h':128, 'lr':0.0001, 'num_epochs':50, 'batchsize':32, 'drop':0.7}
seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_lstm(last_train, label_train, last_dev, label_dev, last_test, label_test, config)

epoch train_loss valid_loss
0 144.39079393659318 112.57483673095703 saving model
1 143.39836956205824 111.5570068359375 saving model
2 142.06606510707311 110.19802856445312 saving model
3 140.3621092296782 108.07511138916016 saving model
4 137.22879972912017 103.65140533447266 saving model
5 129.07548159644716 90.4023666381836 saving model
6 104.15223403204055 51.921749114990234 saving model
7 58.56558318365188 19.55475425720215 saving model
8 34.08881414504278 18.185840606689453 saving model
9 29.982911200750443 18.70934295654297
10 32.714118866693404 18.23284149169922
11 30.1445411046346 17.961305618286133 saving model
12 31.42852011181059 17.957353591918945 saving model
13 30.38989580245245 18.134645462036133
14 30.47426137470064 18.018064498901367
15 29.4111457098098 18.10401725769043
16 31.926031067257835 18.09047508239746
17 29.365579514276412 18.22480010986328
18 30.566774731590634 18.208377838134766
19 28.990326018560502 18.11907196044922
20 30.41956919715518 18.101903915405273

In [50]:
config = {'h':64, 'lr':0.0001, 'num_epochs':50, 'batchsize':32, 'drop':0.5}
seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_lstm(last_train, label_train, last_dev, label_dev, last_test, label_test, config)

epoch train_loss valid_loss
0 142.89171346028647 111.48150634765625 saving model
1 142.3996865408761 110.99354553222656 saving model
2 141.8175266810826 110.44922637939453 saving model
3 141.1881615774972 109.79469299316406 saving model
4 140.42027864002046 108.98526763916016 saving model
5 139.38258071172805 107.94928741455078 saving model
6 138.0432891845703 106.4462661743164 saving model
7 136.12773931594123 103.93438720703125 saving model
8 132.54222942533949 99.1956787109375 saving model
9 125.57864198230561 89.82416534423828 saving model
10 112.49674388340541 73.36267852783203 saving model
11 92.31576610746838 52.74055862426758 saving model
12 70.4735354468936 35.652896881103516 saving model
13 52.00707590012323 25.145174026489258 saving model
14 40.096625736781526 20.009614944458008 saving model
15 36.333033107575915 18.082090377807617 saving model
16 31.548081761314755 17.58267593383789 saving model
17 30.50357264564151 17.658864974975586
18 30.03938184465681 17.91042709350586


In [49]:
config = {'h':64, 'lr':0.0001, 'num_epochs':50, 'batchsize':32, 'drop':0.0}
seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_lstm(last_train, label_train, last_dev, label_dev, last_test, label_test, config)

epoch train_loss valid_loss
0 142.8925276256743 111.47087097167969 saving model
1 142.3457725161598 110.96311950683594 saving model
2 141.757325671968 110.38832092285156 saving model
3 141.07856823149183 109.6970443725586 saving model
4 140.239629473005 108.81123352050781 saving model
5 139.15494065057663 107.64851379394531 saving model
6 137.6426511492048 105.87740325927734 saving model
7 135.18729872930618 102.74186706542969 saving model
8 130.65362694149925 96.54956817626953 saving model
9 121.77558026994977 84.55403900146484 saving model
10 105.54756655011859 65.30238342285156 saving model
11 82.60005478631882 44.67216110229492 saving model
12 60.39279065813337 29.97088050842285 saving model
13 44.318261419023784 21.850908279418945 saving model
14 34.68320910135905 18.452030181884766 saving model
15 29.637947037106468 17.56721305847168 saving model
16 27.225909959702264 17.71426773071289
17 26.13494346255348 18.11783218383789
18 25.642766180492583 18.48337173461914
19 25.4021542412

In [47]:
config = {'h':128, 'lr':0.0001, 'num_epochs':50, 'batchsize':32, 'drop':0.7}
seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_lstm(last_train, label_train, last_dev, label_dev, last_test, label_test, config)

epoch train_loss valid_loss
0 144.3682334536598 112.52589416503906 saving model
1 143.26119595482237 111.4061279296875 saving model
2 141.8498996552967 109.84474182128906 saving model
3 139.6847679501488 107.14594268798828 saving model
4 135.3698225475493 100.72005462646484 saving model
5 123.05448986235119 79.72200775146484 saving model
6 87.02027366274879 34.20840835571289 saving model
7 41.48923669542585 17.409717559814453 saving model
8 27.530917213076638 19.884645462036133
9 25.840191977364675 19.44789695739746
10 25.427451587858656 18.974510192871094
11 25.319975807553245 18.907392501831055
12 25.20515977768671 18.928972244262695
13 25.093356768290203 18.93532943725586
14 25.002363068716868 18.930465698242188
15 24.928135009039018 18.9260196685791
16 24.864187558492024 18.924219131469727
17 24.806933902558825 18.924285888671875
Epoch    19: reducing learning rate of group 0 to 5.0000e-05.
18 24.754547028314498 18.925783157348633
19 24.674888656252907 18.95649528503418
20 24.65278

In [46]:
config = {'h':128, 'lr':0.0001, 'num_epochs':50, 'batchsize':32, 'drop':0.2}
seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_lstm(last_train, label_train, last_dev, label_dev, last_test, label_test, config)

epoch train_loss valid_loss
0 144.36276354108537 112.53226470947266 saving model


  "type " + obj.__name__ + ". It won't be checked "


1 143.28201184953963 111.42823791503906 saving model
2 141.90025765555245 109.90135955810547 saving model
3 139.80965241931733 107.2958984375 saving model
4 135.58907717750185 101.19510650634766 saving model
5 124.14143480573382 81.43133544921875 saving model
6 89.72457522437686 36.430763244628906 saving model
7 43.93294266292027 17.380149841308594 saving model
8 28.500378336225236 19.599977493286133
9 26.739432244073775 19.336875915527344
10 25.75014064425514 18.891464233398438
11 25.6573364621117 18.85200309753418
12 26.068444297427224 18.799102783203125
13 25.555141040257045 18.78846549987793
14 25.792273203531902 18.735254287719727
15 25.41020375206357 18.80600929260254
16 25.3397159576416 18.889617919921875
17 25.647798946925572 18.902498245239258
Epoch    19: reducing learning rate of group 0 to 5.0000e-05.
18 25.452332996186755 18.81488800048828
19 24.807841800508044 18.847740173339844
20 25.465374719528924 18.854116439819336
21 24.91729495638893 18.908771514892578
22 25.4078637

In [31]:
fea_train[0]

array([[        nan],
       [-0.24856643],
       [        nan],
       [        nan],
       [-0.72005529],
       [        nan],
       [        nan]])

In [32]:
last_train[0]

array([[ 0.21854222],
       [-0.24856643],
       [-0.71349436],
       [-0.53756466],
       [-0.72005529],
       [-2.0939877 ],
       [-1.21070575]])

In [19]:
last_train[0]

array([[nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan]])

In [None]:
fea_train, fea_dev, fea_test = normalize_feature(fea_train, fea_dev, fea_test)

In [None]:
np.mean(fea_train, axis=0)

In [None]:
# get mean_after_norm in train set 
mean_after_norm = 

In [None]:
fea_train[0]

In [None]:

 = ()

In [None]:
m         = np.nanmean(X_train, axis=0)
s         = np.nanstd(X_train, axis=0)
X_train_z = (X_train - m)/s 

In [None]:
fea_train.shape

In [None]:
# fea_seq[0]