In [16]:
import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
import pandas as pd
import numpy as np
import sklearn.preprocessing

In [8]:
# read in imputed data
sensor_census_imp = pd.read_csv('../data/sensor_census_imputed_rf.csv')

In [18]:
np.random.seed(1)

# get sites for val/test data
val_test_sites = np.random.choice(np.unique(sensor_census_imp['site'].values), round(len(np.unique(sensor_census_imp['site'].values))/5), replace = False)

# get sites for test data
test_sites = np.random.choice(np.unique(val_test_sites), round(len(np.unique(val_test_sites))/2), replace = False)

# train sites/rows and x/y split
sensor_census_imp_train = sensor_census_imp[~sensor_census_imp['site'].isin(val_test_sites)]
sensor_census_imp_train_x = sensor_census_imp_train.iloc[:, 2:]
sensor_census_imp_train_y = sensor_census_imp_train.iloc[:, 1]

# val sites/rows and x/y split
sensor_census_imp_val = sensor_census_imp[(sensor_census_imp['site'].isin(val_test_sites)) & (~sensor_census_imp['site'].isin(test_sites))]
sensor_census_imp_val_x = sensor_census_imp_val.iloc[:, 2:]
sensor_census_imp_val_y = sensor_census_imp_val.iloc[:, 1]

# test sites/rows and x/y split
sensor_census_imp_test = sensor_census_imp[sensor_census_imp['site'].isin(test_sites)]
sensor_census_imp_test_x = sensor_census_imp_test.iloc[:, 2:]
sensor_census_imp_test_y = sensor_census_imp_test.iloc[:, 1]

# standardize train, val, and test data
standardizer = sklearn.preprocessing.StandardScaler(with_mean = True, with_std = True)
sensor_census_imp_train_x_stand = standardizer.fit_transform(sensor_census_imp_train_x)
sensor_census_imp_val_x_stand = standardizer.transform(sensor_census_imp_val_x)
sensor_census_imp_test_x_stand = standardizer.transform(sensor_census_imp_test_x)

In [17]:
# create torch tensor tuples for train, val, test
train = TensorDataset(torch.from_numpy(sensor_census_imp_train_x_stand), torch.from_numpy(sensor_census_imp_train_y.values))
val = TensorDataset(torch.from_numpy(sensor_census_imp_val_x_stand), torch.from_numpy(sensor_census_imp_val_y.values))
test = TensorDataset(torch.from_numpy(sensor_census_imp_test_x_stand), torch.from_numpy(sensor_census_imp_test_y.values))

# create batches
batch_size = 100
train_loader = torch.utils.data.DataLoader(dataset = train, batch_size = batch_size, shuffle = True)
validation_loader = torch.utils.data.DataLoader(dataset = val, batch_size = batch_size, shuffle = False)
test_loader = torch.utils.data.DataLoader(dataset = test, batch_size = batch_size, shuffle = False)

In [None]:
# model architecture
ff_nn = torch.nn.Linear(in_features = sensor_census_imp_train_x_stand.shape[1], out_features = 100, bias = True) # linear layer
ff_nn.add_module('norm', torch.nn.BatchNorm1d(num_features = 100)) # normalize before tanh
ff_nn.add_module(name = 'tanh', module = torch.nn.Tanh()) # tanh function for hidden layer
ff_nn.add_module(name = '2nd linear', module = torch.nn.Linear(in_features = 100, out_features = 1, bias = True)) # 2nd linear layer