In [61]:
import pandas as pd
import numpy as np
import sklearn.preprocessing
import sklearn.metrics
import torch
from torch.autograd import Variable
from data_split_utils import X_y_site_split
from CNN_utils import split_sizes_site, split_data, pad_stack_splits, get_monitorData_indices, r2, get_nonConst_vars
#from CNN_architecture import CNN 

### read in train, test, and val
train = pd.read_csv('../data/trainV_ridgeImp.csv')
val = pd.read_csv('../data/valV_ridgeImp.csv')
test = pd.read_csv('../data/testV_ridgeImp.csv')

### split train, val, and test into x, y, and sites
train_x, train_y, train_sites = X_y_site_split(train, y_var_name='MonitorData', site_var_name='site')
val_x, val_y, val_sites = X_y_site_split(val, y_var_name='MonitorData', site_var_name='site')
test_x, test_y, test_sites = X_y_site_split(test, y_var_name='MonitorData', site_var_name='site')

### get dataframes with non-constant features only
nonConst_vars = get_nonConst_vars(train, site_var_name='site', y_var_name='MonitorData', cutoff=1000)
train_x_nonConst = train_x.loc[:, nonConst_vars]
val_x_nonConst = val_x.loc[:, nonConst_vars]
test_x_nonConst = test_x.loc[:, nonConst_vars]

### standardize all features
standardizer_all = sklearn.preprocessing.StandardScaler(with_mean = True, with_std = True)
train_x_std_all = standardizer_all.fit_transform(train_x)
val_x_std_all = standardizer_all.transform(val_x)
test_x_std_all = standardizer_all.transform(test_x)

### standardize non-constant features
standardizer_nonConst = sklearn.preprocessing.StandardScaler(with_mean = True, with_std = True)
train_x_std_nonConst = standardizer_nonConst.fit_transform(train_x_nonConst)
val_x_std_nonConst = standardizer_nonConst.transform(val_x_nonConst)
test_x_std_nonConst = standardizer_nonConst.transform(test_x_nonConst)




### get split sizes for TRAIN data (splitting by site)
train_split_sizes = split_sizes_site(train_sites.values)

### get tuples by site
train_x_std_tuple_nonConst = split_data(torch.from_numpy(train_x_std_nonConst).float(), train_split_sizes, dim = 0)
train_x_std_tuple = split_data(torch.from_numpy(train_x_std_all).float(), train_split_sizes, dim = 0)
train_y_tuple = split_data(torch.from_numpy(train_y.values), train_split_sizes, dim = 0)

### get site sequences stacked into matrix to go through CNN
train_x_std_stack_nonConst = pad_stack_splits(train_x_std_tuple_nonConst, np.array(train_split_sizes), 'x')
train_x_std_stack_nonConst = Variable(torch.transpose(train_x_std_stack_nonConst, 1, 2))


### get split sizes for VALIDATION data (splitting by site)
val_split_sizes = split_sizes_site(val_sites.values)

### get tuples by site
val_x_std_tuple_nonConst = split_data(torch.from_numpy(val_x_std_nonConst).float(), val_split_sizes, dim = 0)
val_x_std_tuple = split_data(torch.from_numpy(val_x_std_all).float(), val_split_sizes, dim = 0)
val_y_tuple = split_data(torch.from_numpy(val_y.values), val_split_sizes, dim = 0)

### get site sequences stacked into matrix to go through CNN
val_x_std_stack_nonConst = pad_stack_splits(val_x_std_tuple_nonConst, np.array(val_split_sizes), 'x')
val_x_std_stack_nonConst = Variable(torch.transpose(val_x_std_stack_nonConst, 1, 2))


### get split sizes for TEST data (splitting by site)
test_split_sizes = split_sizes_site(test_sites.values)

### get tuples by site
test_x_std_tuple_nonConst = split_data(torch.from_numpy(test_x_std_nonConst).float(), test_split_sizes, dim = 0)
test_x_std_tuple = split_data(torch.from_numpy(test_x_std_all).float(), test_split_sizes, dim = 0)
test_y_tuple = split_data(torch.from_numpy(test_y.values), test_split_sizes, dim = 0)

### get site sequences stacked into matrix to go through CNN
test_x_std_stack_nonConst = pad_stack_splits(test_x_std_tuple_nonConst, np.array(test_split_sizes), 'x')
test_x_std_stack_nonConst = Variable(torch.transpose(test_x_std_stack_nonConst, 1, 2))

SystemError: <class 'torch.FloatTensor'> returned a result with an error set

In [27]:
import torch

### CNN model architecture
class CNN(torch.nn.Module):
    def __init__(self, input_size_conv, hidden_size_conv, kernel_size, padding, input_size_full, hidden_size_full, hidden_size_combo):
        super(CNN, self).__init__()
        
        #self.conv1d = torch.nn.Conv1d(in_channels=input_size_conv, out_channels=hidden_size_conv, kernel_size=kernel_size, padding=padding, bias=True)
        #self.norm_conv = torch.nn.BatchNorm1d(num_features = hidden_size_conv)
        #self.relu_conv = torch.nn.ReLU()
        
        #self.linear_full = torch.nn.Linear(in_features = input_size_full, out_features = hidden_size_full, bias = True)
        #self.norm_full = torch.nn.BatchNorm1d(num_features = hidden_size_full)
        #self.relu_full = torch.nn.ReLU()
        
        #self.linear1_combo = torch.nn.Linear(in_features = hidden_size_conv+hidden_size_full, out_features = hidden_size_combo, bias = True)
        #self.norm_combo = torch.nn.BatchNorm1d(num_features = hidden_size_combo)
        #self.relu_combo = torch.nn.ReLU()
        self.linear2_combo = torch.nn.Linear(in_features = input_size_full, out_features = 1, bias = True)
        
    def forward(self, input_conv, input_full, y_ind_by_site):
        #hidden_conv = self.conv1d(input_conv)
        #hidden_conv = self.norm_conv(hidden_conv)
        #hidden_conv = self.relu_conv(hidden_conv)
        
        #hidden_full = self.linear_full(input_full)
        #hidden_full = self.norm_full(hidden_full)
        #hidden_full = self.relu_full(hidden_full)
        
        #hidden_conv_w_response = []
        #for i in range(hidden_conv.size()[0]):
        #    hidden_conv_w_response.append(torch.transpose(hidden_conv[i][:, y_ind_by_site[i]], 0, 1)) 
        #hidden_conv_w_response = torch.cat(hidden_conv_w_response, dim = 0)
                
        #hidden_conv_w_response__hidden_full = torch.cat([hidden_conv_w_response, hidden_full], dim = 1)
        #hidden_combo = self.linear1_combo(hidden_conv_w_response__hidden_full)
        #hidden_combo = self.norm_combo(hidden_combo)
        #hidden_combo = self.relu_combo(hidden_combo)
        output = self.linear2_combo(input_full)

        return output

    
def r2(model, batch_size, x_stack_nonConst, x_tuple, y_tuple):
    """Computes R-squared
    
    Arguments:
        model (torch): model to test
        batch_size (int): to determine how many sequences to read in at a time
        x_stack (tensor): stack of site data sequences
        y_tuple (tuple): tuple of true y values by sequence, including NaNs
    
    """
    y = []
    pred = []
    
    # get number of batches
    if x_stack_nonConst.size()[0] % batch_size != 0:
        num_batches = int(np.floor(x_stack_nonConst.size()[0]/batch_size) + 1)
    else:
        num_batches = int(x_stack_nonConst.size()[0]/batch_size)
        
    for batch in range(num_batches):
        # get x and y for this batch
        x_stack_batch_nonConst = x_stack_nonConst[batch_size * batch:batch_size * (batch+1)]
        x_tuple_batch = x_tuple[batch_size * batch:batch_size * (batch+1)]
        y_tuple_nans = y_tuple[batch_size * batch:batch_size * (batch+1)]
        
        # get indices for monitor data and actual monitor data
        y_by_site = []
        x_by_site = []
        y_ind_by_site = []
        for i in range(len(y_tuple_nans)):
            y_ind = get_monitorData_indices(y_tuple_nans[i])
            y_by_site.append(y_tuple_nans[i][y_ind])
            y_ind_by_site.append(y_ind)
            x_by_site.append(x_tuple_batch[i][y_ind])
        y_batch = list(Variable(torch.cat(y_by_site, dim=0)).data.numpy())
        x_batch = Variable(torch.cat(x_by_site, dim=0)).float()
        
        # get model output
        pred_batch = list(model(x_stack_batch_nonConst, x_batch, y_ind_by_site).data.numpy())
        
        # concatenate new predictions with ones from previous batches
        y += y_batch
        pred += pred_batch
        
    return sklearn.metrics.r2_score(y, pred)

In [84]:
# CNN parameters
input_size_conv = train_x_std_nonConst.shape[1]
hidden_size_conv = 20
kernel_size = 3
padding = 1
input_size_full = train_x_std_all.shape[1]
hidden_size_full = 30
hidden_size_combo = 30

# instantiate model
cnn = CNN(input_size_conv, hidden_size_conv, kernel_size, padding, input_size_full, hidden_size_full, hidden_size_combo)

# Loss function
mse_loss = torch.nn.MSELoss(size_average=True)

# Optimizer
lr = 0.000001
weight_decay = 0.000001
optimizer = torch.optim.SGD(cnn.parameters(), lr=lr, weight_decay=weight_decay)

In [87]:
num_epochs = 50
batch_size = 50

# get number of batches
if train_x_std_stack_nonConst.size()[0] % batch_size != 0:
    num_batches = int(np.floor(train_x_std_stack_nonConst.size()[0]/batch_size) + 1)
else:
    num_batches = int(train_x_std_stack_nonConst.size()[0]/batch_size)
    
    
for epoch in range(num_epochs):
    epoch_loss = 0
    previous_ind = 0
    for batch in range(num_batches):
        # get x and y for this batch
        x_stack_batch_nonConst = train_x_std_stack_nonConst[batch_size * batch:batch_size * (batch+1)]
        x_tuple_batch = train_x_std_tuple[batch_size * batch:batch_size * (batch+1)]
        y_tuple_nans = train_y_tuple[batch_size * batch:batch_size * (batch+1)]
        
        # get indices for monitor data and actual monitor data
        y_by_site = []
        x_by_site = []
        y_ind_by_site = []
        for i in range(len(y_tuple_nans)):
            y_ind = get_monitorData_indices(y_tuple_nans[i])
            y_by_site.append(y_tuple_nans[i][y_ind])
            y_ind_by_site.append(y_ind)
            x_by_site.append(x_tuple_batch[i][y_ind])
        y_batch = Variable(torch.cat(y_by_site, dim=0)).float()
        x_batch = Variable(torch.cat(x_by_site, dim=0)).float()
        
        # get model output
        pred_batch = cnn(x_stack_batch_nonConst, x_batch, y_ind_by_site)
        
        # compute loss, backprop, and update parameters
        loss_batch = mse_loss(pred_batch, y_batch)
        loss_batch.backward()
        optimizer.step()
        
        # accumulate loss over epoch
        epoch_loss += loss_batch.data[0]
    
    print('Train R^2 after epoch ' + str(epoch) + ': ' + str(r2(cnn, batch_size, train_x_std_stack_nonConst, train_x_std_tuple, train_y_tuple)))
    print('Validation R^2 after epoch ' + str(epoch) + ': ' + str(r2(cnn, batch_size, val_x_std_stack_nonConst, val_x_std_tuple, val_y_tuple)))
    print('Epoch loss after epoch ' + str(epoch) + ': ' + str(epoch_loss))
    print()

print('Test R^2: ' + str(r2(cnn, batch_size, test_x_std_stack_nonConst[:-1], test_x_std_tuple[:-1], test_y_tuple[:-1])))

Train R^2 after epoch 0: -0.637913280052
Validation R^2 after epoch 0: -0.464547260105
Epoch loss after epoch 0: 408.4560546875

Train R^2 after epoch 1: -0.626780185663
Validation R^2 after epoch 1: -0.451543759152
Epoch loss after epoch 1: 404.3051300048828

Train R^2 after epoch 2: -0.615880303368
Validation R^2 after epoch 2: -0.438764696877
Epoch loss after epoch 2: 400.26110458374023

Train R^2 after epoch 3: -0.60521424435
Validation R^2 after epoch 3: -0.42621194804
Epoch loss after epoch 3: 396.326904296875

Train R^2 after epoch 4: -0.594782448922
Validation R^2 after epoch 4: -0.413887221127
Epoch loss after epoch 4: 392.49771881103516

Train R^2 after epoch 5: -0.584585037926
Validation R^2 after epoch 5: -0.401791926711
Epoch loss after epoch 5: 388.77449798583984

Train R^2 after epoch 6: -0.57462199402
Validation R^2 after epoch 6: -0.389927358578
Epoch loss after epoch 6: 385.16035079956055

Train R^2 after epoch 7: -0.564892597003
Validation R^2 after epoch 7: -0.37829