In [1]:
import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
import pandas as pd
import numpy as np
import sklearn.preprocessing
import sklearn.metrics

In [2]:
# read in imputed data
sensor_census_imp = pd.read_csv('../data/sensor_census_imputed_rf.csv')

In [3]:
np.random.seed(1)

# get sites for val/test data
val_test_sites = np.random.choice(np.unique(sensor_census_imp['site'].values), round(len(np.unique(sensor_census_imp['site'].values))/5), replace = False)

# get sites for test data
test_sites = np.random.choice(np.unique(val_test_sites), round(len(np.unique(val_test_sites))/2), replace = False)

# train sites/rows and x/y split
sensor_census_imp_train = sensor_census_imp[~sensor_census_imp['site'].isin(val_test_sites)]
sensor_census_imp_train_x = sensor_census_imp_train.iloc[:, 2:]
sensor_census_imp_train_y = sensor_census_imp_train.iloc[:, 1]

# val sites/rows and x/y split
sensor_census_imp_val = sensor_census_imp[(sensor_census_imp['site'].isin(val_test_sites)) & (~sensor_census_imp['site'].isin(test_sites))]
sensor_census_imp_val_x = sensor_census_imp_val.iloc[:, 2:]
sensor_census_imp_val_y = sensor_census_imp_val.iloc[:, 1]

# test sites/rows and x/y split
sensor_census_imp_test = sensor_census_imp[sensor_census_imp['site'].isin(test_sites)]
sensor_census_imp_test_x = sensor_census_imp_test.iloc[:, 2:]
sensor_census_imp_test_y = sensor_census_imp_test.iloc[:, 1]

# standardize train, val, and test data
standardizer = sklearn.preprocessing.StandardScaler(with_mean = True, with_std = True)
sensor_census_imp_train_x_stand = standardizer.fit_transform(sensor_census_imp_train_x)
sensor_census_imp_val_x_stand = standardizer.transform(sensor_census_imp_val_x)
sensor_census_imp_test_x_stand = standardizer.transform(sensor_census_imp_test_x)

In [9]:
# create torch tensor tuples for train, val, test
train = TensorDataset(torch.from_numpy(sensor_census_imp_train_x_stand), torch.from_numpy(sensor_census_imp_train_y.values))
val = TensorDataset(torch.from_numpy(sensor_census_imp_val_x_stand), torch.from_numpy(sensor_census_imp_val_y.values))
test = TensorDataset(torch.from_numpy(sensor_census_imp_test_x_stand), torch.from_numpy(sensor_census_imp_test_y.values))

# create batches
batch_size = 100
train_loader = torch.utils.data.DataLoader(dataset = train, batch_size = batch_size, shuffle = True)
val_loader = torch.utils.data.DataLoader(dataset = val, batch_size = batch_size, shuffle = False)
test_loader = torch.utils.data.DataLoader(dataset = test, batch_size = batch_size, shuffle = False)

In [10]:
# function for computing accuracy
def compute_accuracy(data_loader, model):
    model_pred = []
    targets = []
    for batch in data_loader:
        model_output = model(Variable(batch[0]).float()).data.numpy() # model preds
        model_pred += model_output.tolist() # add to list of preds
        targets += batch[1].numpy().tolist() # true digit values
        
    return sklearn.metrics.r2_score(targets, model_pred)

In [18]:
num_epochs = 25

# hyperparams to test
lrs = [0.1, 0.01, 0.001]
lams = [0.1, 0.001, 0.00001, 0]
hiddens = [33, 67, 100, 133, 166]
activations = ['tanh', 'ReLU']

count = 0
max_val_r2 = 0
for lr in lrs:
    for lam in lams:
        for hidden in hiddens:
            for activation in activations:

                # model architecture
                ff_nn = torch.nn.Sequential()
                ff_nn.add_module(name = '1st linear', module = torch.nn.Linear(in_features = sensor_census_imp_train_x_stand.shape[1], out_features = hidden, bias = True)) # linear layer
                ff_nn.add_module(name = 'norm', module = torch.nn.BatchNorm1d(num_features = hidden)) # normalize before tanh
                
                if activation == 'tanh':
                    ff_nn.add_module(name = 'tanh', module = torch.nn.Tanh()) # tanh function for hidden layer
                    
                elif activation == 'ReLU':
                    ff_nn.add_module(name = 'ReLU', module = torch.nn.ReLU()) # tanh function for hidden layer
                    
                ff_nn.add_module(name = '2nd linear', module = torch.nn.Linear(in_features = hidden, out_features = 1, bias = True)) # 2nd linear layer

                # mse loss
                loss_function = torch.nn.MSELoss()

                # adam optimizer
                adam = torch.optim.Adam(ff_nn.parameters(), lr=lr, weight_decay=lam)

                for epoch in range(num_epochs): 
                    for batch in train_loader:
                        model_output = ff_nn(Variable(batch[0]).float()) # model predictions
                        targets = Variable(batch[1].float()) # true digit values

                        adam.zero_grad() # zero gradient
                        loss_batch = loss_function(model_output, targets) # compute loss
                        loss_batch.backward() # take the gradient wrt parameters
                        adam.step() # update parameters
                
                # get validation R2
                val_r2 = compute_accuracy(val_loader, ff_nn)
                
                print('lambda=' + str(lam) + ', learning rate=' + str(lr) + ', hidden units=' + str(hidden) + ', activation=' + activation)
                print(str(val_r2) + ' ' + str(count))
                count += 1
                
                if val_r2 > max_val_r2: # compare current validation R2 to best R2 obtained so far
                    final_lr = lr
                    final_lam = lam
                    final_hidden = hidden
                    final_activation = activation
                    train_r2 = compute_accuracy(val_loader, ff_nn)
                    max_val_r2 = compute_accuracy(val_loader, ff_nn)
                    test_r2 = compute_accuracy(test_loader, ff_nn)
                        

print('lambda=' + str(final_lam) + ', learning rate=' + str(final_lr) + ', hidden units=' + str(final_hidden) + ', activation=' + final_activation)
print('Train R2: ' + str(train_r2))
print('Val R2: ' + str(max_val_r2))
print('Test R2: ' + str(test_r2))

lambda=0.1, learning rate=0.1, hidden units=33, activation=tanh
0.671635039623 0
lambda=0.1, learning rate=0.1, hidden units=33, activation=ReLU
0.697424717558 1
lambda=0.1, learning rate=0.1, hidden units=67, activation=tanh
0.668153921417 2
lambda=0.1, learning rate=0.1, hidden units=67, activation=ReLU
0.694663874097 3
lambda=0.1, learning rate=0.1, hidden units=100, activation=tanh
0.684034833585 4
lambda=0.1, learning rate=0.1, hidden units=100, activation=ReLU
0.715791100132 5
lambda=0.1, learning rate=0.1, hidden units=133, activation=tanh
0.696561124378 6
lambda=0.1, learning rate=0.1, hidden units=133, activation=ReLU
0.695907746724 7
lambda=0.1, learning rate=0.1, hidden units=166, activation=tanh
0.575040774688 8
lambda=0.1, learning rate=0.1, hidden units=166, activation=ReLU
0.707583862344 9
lambda=0.001, learning rate=0.1, hidden units=33, activation=tanh
0.757113680256 10
lambda=0.001, learning rate=0.1, hidden units=33, activation=ReLU
0.705366835038 11
lambda=0.001, le

lambda=0.001, learning rate=0.001, hidden units=166, activation=tanh
0.767097956802 98
lambda=0.001, learning rate=0.001, hidden units=166, activation=ReLU
0.767450629618 99
lambda=1e-05, learning rate=0.001, hidden units=33, activation=tanh
0.759880321532 100
lambda=1e-05, learning rate=0.001, hidden units=33, activation=ReLU
0.762608342514 101
lambda=1e-05, learning rate=0.001, hidden units=67, activation=tanh
0.763984215251 102
lambda=1e-05, learning rate=0.001, hidden units=67, activation=ReLU
0.766231591504 103
lambda=1e-05, learning rate=0.001, hidden units=100, activation=tanh
0.764337798884 104
lambda=1e-05, learning rate=0.001, hidden units=100, activation=ReLU
0.768048106217 105
lambda=1e-05, learning rate=0.001, hidden units=133, activation=tanh
0.744271760693 106
lambda=1e-05, learning rate=0.001, hidden units=133, activation=ReLU
0.750333061205 107
lambda=1e-05, learning rate=0.001, hidden units=166, activation=tanh
0.771503876317 108
lambda=1e-05, learning rate=0.001, hid