# Predict Chart Type with a Neural Network Model

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
import datetime
import copy
import torch
from torch import nn, optim
import torch.nn.functional as F
from imblearn.over_sampling import RandomOverSampler
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split
import os
from os.path import join
import sys
base_path = os.path.abspath(os.path.join('..'))
if base_path not in sys.path:
    sys.path.append(base_path)
    
import neural_network.util as util
import neural_network.train as train
import neural_network.evaluate as evaluate
from helpers.processing import *
from helpers.analysis import *

RANDOM_STATE = 42

features_directory = '../features/processed'
saves_directory = '../neural_network/saves'
num_datapoints = None  # None if you want all of the data
model_prefix = 'agg'

In [298]:
# all the parameters can be adjusted here

# num_epochs: the max number of epochs we will train the NN for
# hidden_sizes: the number of neurons in each hidden layer, enter it as a list
# output_dim: the dimension of the output. Since outputs are 'line', 'scatter', 'bar', it's 9
#                                                            + 'circos' ; +  'table' will be 5
# weight_decay: how much to decay LR in the NN. This can be set to 0 since we decrease LR already through
#   the ReduceLROnPlateau() function
# dropout: the dropout in each layer
# patience: how many epochs we go through (with a near constant learning rate, this threshold is adjusted using
#   threshold) before dropping learning rate by a factor of 10
# model_prefix: all models will be loaded/saved with the prefix of the file in the beginning
# save_model: save each epoch's model onto models/ folder.
# print_test: print test accuracies into test.txt
# test_best: test the test accuracy of the best model we've found (best model determined using val accuracy)

# note: training is automatically stopped when learning rate < 0.01 *
# batch_size is determined in the dataloader, so the variable is irrelevant here
batch_size = 200
num_epochs = 100
hidden_sizes = [800, 800, 800] # AdvancedNet (from vizML) uses a list of hidden size values
hidden_size = 800 # FeedForward model uses a single hidden size value
learning_rate = 5e-4
weight_decay = 0
dropout = 0.0
patience = 10
threshold = 1e-3
input_dim = X_train.shape[1]
input_size = X_train.shape[1]
output_dim = len(chart_names)
output_period = 0 # output_period: output training loss every x batches
model_prefix = 'agg'
only_train = False
save_model = False
test_best = False
print_test = True

num_workers = 0

criterion = torch.nn.BCEWithLogitsLoss()                                      # Define the criterion (loss function). Here we will use Binary Cross Entropy with Logits
optimizer = torch.optim.Adam(Feedforward(input_dim, hidden_size, output_dim).parameters(), lr=learning_rate)    # Define the optimizer with model parameters and learning rate.

In [254]:
# for vizML model: 
parameters = {
    'batch_size': 200,
    'num_epochs': 100,
    'hidden_sizes': [800, 800, 800],
    'learning_rate': 5e-4,
    'output_dim': len(chart_names),
    'weight_decay': 0,
    'dropout': 0.00,
    'patience': 20,
    'threshold': 1e-3,
    'model_prefix': 'agg',
    'save_model': False,
    'print_test': True,
    'test_best': False
}

### Load Data
This data is created from vizML's features data using the notebook "Load and Clean Plotly Data".
Start there, or download features_with_9_chart_type_labels_888k.csv here (still waiting for it to upload, link coming soon)

In [3]:
data_dir_name = '../data'
data_file_name = 'features_with_chart_type_labels_888k.csv'
df_full = pd.read_csv(os.path.join(data_dir_name, data_file_name))
df_full.info()
# df = df_full # uncomment this one to run everything on the full data set of 888k+ charts

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 888484 entries, 0 to 888483
Columns: 849 entries, Unnamed: 0 to labels
dtypes: bool(120), float64(726), int64(1), object(2)
memory usage: 4.9+ GB


### Subset Data
Create a subset of the data for testing the notebook on a laptop. 
On a bigger machine, you can run the model on full data by uncommenting the df = df_full above

In [233]:
# take a random subset of data to be able to make and run models on my laptop
# later, skip this step and run df=df_full to run model on full data set

# create a series of weights corresponding to the frequency of each label in the data
label_weights = df_full.groupby('labels')['labels'].transform('count')
# create random sample 
subset_size = 10000
RANDOM_STATE = 42
df = df_full.sample(n=subset_size, replace=False, weights=label_weights, random_state=RANDOM_STATE)

In [19]:
### Prepare features and labels

In [346]:
# make features data frame without ids or chart type labels
# save a list of column labels for later
chart_names = df['labels'].unique().tolist()

# one-hot encode labels
labels = pd.get_dummies(df['labels'])
labels.head() # take a peek at one-hot encoded labels

features = df.iloc[:,2:]
features.drop('labels', axis=1, inplace=True)
# we need to encode label names to a zero-indexed series of intigers
# create a dictionary of classes to indexes
## class2idx = {val : idx for idx, val in enumerate(chart_names)}
# and a dictionary of the reverse to use later to reverse the encoding
## idx2class = {v: k for k, v in class2idx.items()}
# create (encoded) labels data frame
## labels = df['labels'].replace(class2idx)
## print(class2idx)

labels.head() # take a peek at encoded labels

Unnamed: 0,bar,box,heatmap,histogram,line,pie,scatter
338859,0,0,0,0,0,0,1
845535,0,0,0,0,1,0,0
654110,0,0,0,0,0,0,1
537280,0,0,0,0,0,0,1
143080,0,0,0,0,1,0,0


In [347]:
### Split train/val/test data and save matricies to disk

In [348]:
# Convert pandas dataframe data to numpy datatypes
X = features.to_numpy()                     # X is the image ID (the first column in the dataframe)
y = labels.to_numpy()   # y is the one-hot encoded label vector (all but the first column in the dataframe)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)               # Create train and test sets from entire dataset
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)    # Create train and val sets from train set. Here is where you could also do k-fold cross validation.

print(f'X_train: {len(X_train)}, X_val: {len(X_val)}, X_test: {len(X_test)}, Total: {len(X_train)+len(X_val)+len(X_test)}')
print(f'y_train: {len(y_train)}, y_val: {len(y_val)}, y_test: {len(y_test)}, Total: {len(y_train)+len(y_val)+len(y_test)}')

X_train: 7650, X_val: 1350, X_test: 1000, Total: 10000
y_train: 7650, y_val: 1350, y_test: 1000, Total: 10000


In [349]:
# cast data as torch tensors
X_train, y_train = torch.from_numpy(
    X_train.astype(np.float64, copy=False)), torch.from_numpy(y_train.astype(np.float64))
X_val, y_val = torch.from_numpy(X_val.astype(np.float64, copy=False)), torch.from_numpy(y_val)
X_test, y_test = torch.from_numpy(X_test.astype(np.float64, copy=False)), torch.from_numpy(y_test)

# load datasets
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
val_dataset = torch.utils.data.TensorDataset(X_val, y_val)
test_dataset = torch.utils.data.TensorDataset(X_test, y_test)

# create dataloaders that handle batch sizes
train_dataloader = torch.utils.data.DataLoader(
    train_dataset, shuffle=True, batch_size=batch_size, num_workers=num_workers)
val_dataloader = torch.utils.data.DataLoader(
    val_dataset, shuffle=True, batch_size=batch_size, num_workers=num_workers)
test_dataloader = torch.utils.data.DataLoader(
    test_dataset, shuffle=True, batch_size=batch_size, num_workers=num_workers)

### Set Device and Define Model Parameters

In [350]:
# set device to cuda if available, cpu if not
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dtype = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
print("device is ", device, "  dtype is ", dtype)	

device is  cpu   dtype is  <class 'torch.FloatTensor'>


### Define model and train, test, and fit options

In [351]:
# define a model that can be passed the above parameters
class Feedforward(nn.Module):
    '''
    This is my own model, more explicitly written
    Less flexible than AdvancedNet (from VizML, below) becayse you can't change the 
    architecture with a change to hidden_sizes -- but identical otherwise.
    Useful for debugging the cuda stuff.
    Change models below 
    '''
    def __init__(self, input_dim, hidden_size, output_dim, dropout=0.0):
        super(Feedforward, self).__init__()
        self.input_dim = input_dim
        self.hidden_size = hidden_size
        self.output_dim = output_dim
        self.fc1 = nn.Linear(input_dim, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, hidden_size)
        self.fc4 = nn.Linear(hidden_size, hidden_size)
        self.fc5 = nn.Linear(hidden_size, output_dim)
            
    def forward(self, x):
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = nn.functional.relu(self.fc3(x))
        x = nn.functional.relu(self.fc4(x))
        x = self.fc5(x)
        return F.log_softmax(x, dim=0)
    
def train(model, dataloader, optimizer, criterion, device):
    model.train()                                                  # Set model parameters to training mode
    running_loss = 0.0                                             # Keep track of running loss so average loss can be calculated at the end of the epoch
    all_true = []                                                  # Keep track of all of the true and predicted labels
    all_pred = []                                                  #   so AUC score can be calculated at the end of the epoch
    all_scores = []
    
    for i, (inputs, labels) in enumerate(dataloader):              # Loop through all batches in the dataloader
        
        all_true += np.argmax(labels, axis=1).tolist()                                # Save true labels
        inputs = inputs.to(device)                                 # Send inputs and labels to computing device. If the model is on 'cuda',
        labels = labels.to(device)                                 #   the inputs and labels must also be on cuda
        
        optimizer.zero_grad()                                      # Zero the gradients so the optimizer can keep track of a new pass of data through the network
        pred = model(inputs.to(dtype=torch.float, device=device))  # Pass a batch through the model
        loss = criterion(pred, labels)        # Calculate the loss between the ground truth labels and the model predictions
        loss.backward()                                            # Perform backpropagation on the loss to train the network
        optimizer.step()                                           # Step the optimizer forward
        
        all_pred += np.argmax(pred.detach().numpy(), axis=1).tolist()                   # Keep track of model predictions to calculate AUC later.
                                                                   # When we want to make predictions with our model without calculating BCEWithLogitsLoss,
                                                                   #   we must manually apply the sigmoid function to the model output to get predicted values
                                                                   #   between 0 and 1. tolist() simply converts the result to a normal Python list.
        
        running_loss += loss.item()
    return running_loss / (i+1) , f1_score(all_true, all_pred, average='micro')  # Return the average loss over the epoch and the f1 micro score

def test(model, dataloader, criterion, device):
    model.eval()                                                   # Set model parameters to evaluation mode
    running_loss = 0.0
    all_true = []
    all_pred = []
    for i, (inputs, labels) in enumerate(dataloader):
        all_true += np.argmax(labels, axis=1).tolist()
        inputs = inputs.to(device)
        labels = labels.to(device=device)
        
        with torch.no_grad():                                      # Don't track gradients through the model
            model(inputs.to(dtype=torch.float, device=device)) 
            loss = criterion(pred, labels)
            all_pred += np.argmax(pred.detach().numpy(), axis=1).tolist()

        
        running_loss += loss.item()
    return running_loss / (i+1), f1_score(all_true, all_pred, average='micro') 

def fit(model, train_dataloader, val_dataloader, optimizer, criterion, device, num_epochs):
    # Log start time
    start_time = str(datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S'))
    msg = f'Start model training {start_time}'
    with open('log.txt', 'a') as f: f.write(msg+'\n')
    print(msg)
    
    # Initialize best model weights, AUC score, and keep track of train/val loss and AUC
    best_model_wts = copy.deepcopy(model.state_dict())
    best_model_name = ''
    best_score = -1
    track_values = {'train_loss': [],
                    'val_loss': [],
                    'train_score': [],
                    'val_score': []}

    # Calculate initial loss and score on train and validation sets
    start = time.time()
    train_loss, train_score = test(model, train_dataloader, criterion, device)
    val_loss, val_score = test(model, val_dataloader, criterion, device)
    
    # Store initial losses and AUC scores
    track_values['train_loss'].append(train_loss)
    track_values['val_loss'].append(val_loss)
    track_values['train_score'].append(train_score)
    track_values['val_score'].append(val_score)
    
    # Print training status and write to a log file
    msg = f'Epoch 0/{num_epochs} Train Loss: {train_loss:.4f}, Train AUC: {train_score:.4f}, Val Loss: {val_loss:.4f}, Val AUC: {val_score:.4f} Time: {time.time()-start:.2f}s'
    with open('log.txt', 'a') as f: f.write(msg+'\n')
    print(msg)

    # Loop over the specified number of epochs
    for epoch in range(num_epochs):
        start = time.time() # Start timer to keep track of how long an epoch takes

        # Run train and test functions on train and val sets
        train_loss, train_score = train(model, train_dataloader, optimizer, criterion, device)
        val_loss, val_score = test(model, val_dataloader, criterion, device)
        
        # Store losses and AUC scores
        track_values['train_loss'].append(train_loss)
        track_values['val_loss'].append(val_loss)
        track_values['train_auc'].append(train_score)
        track_values['val_auc'].append(val_score)

        # Save the model weights if the AUC score on the validation set is higher than the previous best model
        if val_score > best_score:
            best_score = val_score
            best_model_wts = copy.deepcopy(model.state_dict())
            if os.path.exists(best_model_name): os.remove(best_model_name)
            best_model_name = f'./best_model_weights_epoch_{epoch+1}_auc_{val_score:.4f}.pt'
            torch.save(model.state_dict(), best_model_name)

        # Print training status and write to a log file
        msg = f'Epoch {epoch+1}/{num_epochs} Train Loss: {train_loss:.4f}, Train AUC: {train_score:.4f}, Val Loss: {val_loss:.4f}, Val AUC: {val_score:.4f} Time: {time.time()-start:.2f}s'
        with open('log.txt', 'a') as f: f.write(msg+'\n')
        print(msg)
    
    return track_values


In [356]:
all_true = []                                                  # Keep track of all of the true and predicted labels
all_pred = []                                                  #   so AUC score can be calculated at the end of the epoch
all_scores = []
    
for i, (inputs, labels) in enumerate(train_dataloader):              # Loop through all batches in the dataloader
        
    all_true += np.argmax(labels, axis=1).tolist()                                # Save true labels
    inputs = inputs.to(device)                                 # Send inputs and labels to computing device. If the model is on 'cuda',
    labels = labels.to(device)                                 #   the inputs and labels must also be on cuda
        
    optimizer.zero_grad()                                      # Zero the gradients so the optimizer can keep track of a new pass of data through the network
    pred = model(inputs.to(dtype=torch.float, device=device))  # Pass a batch through the model
    loss = criterion(pred, labels)        # Calculate the loss between the ground truth labels and the model predictions
    loss.backward()                                            # Perform backpropagation on the loss to train the network
    optimizer.step()                                           # Step the optimizer forward
        
    all_pred += np.argmax(pred.detach().numpy(), axis=1).tolist()                   # Keep track of model predictions to calculate AUC later.
                                                                   # When we want to make predictions with our model without calculating BCEWithLogitsLoss,
                                                                   #   we must manually apply the sigmoid function to the model output to get predicted values
                                                                   #   between 0 and 1. tolist() simply converts the result to a normal Python list. 
    running_loss += loss.item()

In [360]:
for i, (inputs,lables) in enumerate(train_dataloader:
    

AttributeError: 'DataLoader' object has no attribute 'inputs'

## Train Model

In [354]:
train(model, train_dataloader, optimizer, criterion, device)

(0.7571690674515563, 0.14143790849673202)

In [355]:
# start training based on my model and train/test/fit functions: 
model = Feedforward(input_dim, hidden_size, output_dim)
track_values = fit(model, train_dataloader, val_dataloader, optimizer, criterion, device, num_epochs)


Start model training 2020-07-01-11-49-23


ValueError: Target size (torch.Size([200, 7])) must be the same as input size (torch.Size([50, 7]))

## Alternate method: The VizML model, dataloaders, evaluations 

In [238]:
# split 10% of examples into val, and 10% into test
# save matrecies to disk as np arrays using custom function from vizML
util.save_matrices_to_disk(
    features, labels, [0.1, 0.1], saves_directory, parameters['model_prefix'], num_datapoints)

number of total examples is  10000
indexes for splitting between train/val/test are  [8000, 9000]


In [239]:
# load those back from disk using another function from vizML
X_train, y_train, X_val, y_val, X_test, y_test = util.load_matrices_from_disk(
        saves_directory, parameters['model_prefix'], num_datapoints)

In [None]:
class AdvancedNet(nn.Module):
    '''
    I'm borrowing AdvancedNet from vizML. 
    It uses nn.ModuleList to construct a list of modules/layers according to the above paramiters. 
    hidden_sizes is a list where each intiger becomes the number of neurons in a hidden layer. 
    By default it is [800, 800, 800] so makes a network with 3 hidden layers.
    '''
    def __init__(self, input_size, hidden_sizes, num_classes, dropout=0.0):
        super(AdvancedNet, self).__init__()
        self.nn_list = nn.ModuleList()
        self.nn_list.append(nn.Linear(input_size, hidden_sizes[0]))
        self.nn_list.append(nn.ReLU())
        if dropout:
            self.nn_list.append(nn.Dropout(p=dropout))

        for i in range(1, len(hidden_sizes)):
            self.nn_list.append(
                nn.Linear(hidden_sizes[i - 1], hidden_sizes[i]))
            self.nn_list.append(nn.ReLU())
            if dropout:
                self.nn_list.append(nn.Dropout(p=dropout))
        self.nn_list.append(nn.Linear(hidden_sizes[-1], num_classes))

    def forward(self, x):
        for module in self.nn_list:
            x = x.float()
            x = module(x)
        return x

In [240]:
def load_datasets(X_train, y_train, X_val, y_val,
                  parameters, X_test=None, y_test=None):

    # calculate output dim
    y_combined = np.concatenate((y_train, y_val))
    if y_test is not None:
        y_combined = np.concatenate((y_combined, y_test))
    output_dim = len(np.unique(y_combined))
    print('output_dim is', output_dim)
    parameters['input_dim'] = X_train.shape[1]
    parameters['output_dim'] = output_dim

    # datasets
    # convert np matrices into torch Variables, and then feed them into a
    # dataloader
    # photon note: changed to cast X as float64
    X_train, y_train = torch.from_numpy(
        X_train.astype(np.float64, copy=False)), torch.from_numpy(y_train.astype(np.float64))
    X_val, y_val = torch.from_numpy(X_val.astype(np.float64, copy=False)), torch.from_numpy(y_val)
    train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
    val_dataset = torch.utils.data.TensorDataset(X_val, y_val)
    train_dataloader = torch.utils.data.DataLoader(
        train_dataset, shuffle=True, batch_size=batch_size, num_workers=0)
    val_dataloader = torch.utils.data.DataLoader(
        val_dataset, shuffle=True, batch_size=batch_size, num_workers=0)
    test_dataloader = None

    if X_test is not None:
        X_test, y_test = torch.from_numpy(
            X_test.astype(np.float64, copy=False)), torch.from_numpy(y_test)
        test_dataset = torch.utils.data.TensorDataset(X_test, y_test)
        test_dataloader = torch.utils.data.DataLoader(
            test_dataset, shuffle=True, batch_size=batch_size, num_workers=0)

    return train_dataloader, val_dataloader, test_dataloader

In [248]:
train_dataloader, val_dataloader, test_dataloader = load_datasets(
    X_train.astype(np.float64), y_train.astype(np.float64), X_val.astype(np.float64), y_val.astype(np.float64), parameters, X_test=X_test.astype(np.float64), y_test=y_test.astype(np.float64))

output_dim is 2


In [183]:
# start training 
# using vizML's model and train functions

# nets and optimizers
criterion = nn.CrossEntropyLoss().to(device)
net = AdvancedNet(
    input_dim,
    hidden_sizes,
    output_dim,
    dropout=dropout).to(device)
optimizer = optim.Adam(
    net.parameters(),
    lr=learning_rate,
    weight_decay=weight_decay)
# ReduceLROnPlateau reduces learning rate by factor of 10 once val loss
# has plateaued
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, patience=patience, threshold=threshold)

num_train_batches = len(train_dataloader)
epoch = 1
best_epoch, best_acc = 0, 0
train_acc = [0]

print('Starting training at ' + util.get_time())
print(', '.join(['{}={!r}'.format(k, v)
                for k, v in sorted(parameters.items())]))

# print out test accuracies to a separate file
suffix=''
test_file = None
if print_test:
    test_file = open('test{}.txt'.format(suffix), 'a')
    test_file.write('\n\n')
    test_file.write('Starting at ' + util.get_time() + '\n')
    test_file.write(', '.join(['{}={!r}'.format(k, v) for k, v in sorted(parameters.items())]) + '\n\n')

print('starting training')
while epoch <= num_epochs:
    running_loss = 0.0
    epoch_acc = 0.0

    net.train()
    print(
        'epoch: %d, lr: %.1e' %
        (epoch,
        optimizer.param_groups[0]['lr']) +
        '    ' +
        util.get_time())
    for batch_num, (inputs, labels) in enumerate(train_dataloader, 1):
        optimizer.zero_grad()
        inputs, labels = inputs.to(device), labels.to(dtype=torch.long, device=device) # changing labels dtype to torch.long seems like it worked
        outputs = net(inputs) # here's where it was hanging up before
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
        # output is 2D array (logsoftmax output), so we flatten it to a 1D to get the max index for each example
        # and then calculate accuracy off that
        max_index = outputs.max(dim=1)[1]
        epoch_acc += np.sum(max_index.data.cpu().numpy()
                           == labels.data.cpu().numpy()) / inputs.size()[0]

        # output every output_period batches
        if output_period:
            if batch_num % output_period == 0:
                print('[%d:%.2f] loss: %.3f' % (
                    epoch, batch_num * 1.0 / num_train_batches,
                    running_loss / output_period))
                running_loss = 0.0
                gc.collect()

    # save model after every epoch in models/ folder
    if save_model:
        torch.save(
            net.state_dict(),
            models_directory +
            '/' +
            model_prefix +
            ".%d" %
            epoch)

    # print training/val accuracy
    epoch_acc = epoch_acc / num_train_batches
    train_acc.append(epoch_acc)
    print('train acc: %.4f' % (epoch_acc))
    if only_train:
        scheduler.step(loss)
    else:
        val_accuracy, total_loss = evaluate.eval_error(
            net, val_dataloader, criterion)
        print('val acc: %.4f, loss: %.4f' % (val_accuracy, total_loss))
        # remember: feed val loss into scheduler
        scheduler.step(total_loss)
        if val_accuracy > best_acc:
            best_epoch, best_acc = epoch, val_accuracy
        print()

        # write test accuracy
        if print_test:
            test_accuracy, total_loss = evaluate.eval_error(
                net, test_dataloader, criterion)
            test_file.write(
                'epoch: %d' %
                (epoch) +
                '    ' +
                util.get_time() +
                '\n')
            test_file.write('train acc: %.4f' % (epoch_acc) + '\n')
            test_file.write('val acc: %.4f' % (val_accuracy) + '\n')
            test_file.write('test acc: %.4f' % (test_accuracy) + '\n')
            test_file.write('loss: %.4f' % (test_accuracy) + '\n')

        gc.collect()
            
    # perform early stopping here if our learning rate is below a threshold
    # because small lr means little change in accuracy anyways
    if optimizer.param_groups[0]['lr'] < (0.9 * 0.01 * learning_rate):
        print('Low LR reached, finishing training early')
        break
    epoch += 1

print('best epoch: %d' % best_epoch)
print('best val accuracy: %.4f' % best_acc)
print('train accuracy at that epoch: %.4f' % train_acc[best_epoch])
print('ending at', time.ctime())

if test_best:
    net.load_state_dict(
        torch.load(
            models_directory +
            '/' +
            model_prefix +
            '.' +
            str(best_epoch)))
    best_test_accuracy, total_loss = evaluate.eval_error(
        net, test_dataloader, criterion)
    test_file.write('*****\n')
    test_file.write(
        'best test acc: %.4f, loss: %.4f' %
        (best_test_accuracy, total_loss) + '\n')
    test_file.write('*****\n')
    print('best test acc: %.4f, loss: %.4f' %
        (best_test_accuracy, total_loss))

if print_test:
    test_file.write('\n')
    test_file.close()

print('\n\n\n')




Starting training at 2020-07-01 00:33:10
batch_size=200, dropout=0.0, hidden_sizes=[800, 800, 800], input_dim=846, learning_rate=0.0005, model_prefix='agg', num_epochs=100, output_dim=7, patience=20, print_test=True, save_model=False, test_best=False, threshold=0.001, weight_decay=0
starting training
epoch: 1, lr: 5.0e-04    2020-07-01 00:33:10
train acc: 0.6425


AssertionError: Torch not compiled with CUDA enabled

In [116]:
AdvancedNet(input_dim, hidden_sizes, output_dim).eval()

AdvancedNet(
  (nn_list): ModuleList(
    (0): Linear(in_features=846, out_features=800, bias=True)
    (1): ReLU()
    (2): Linear(in_features=800, out_features=800, bias=True)
    (3): ReLU()
    (4): Linear(in_features=800, out_features=800, bias=True)
    (5): ReLU()
    (6): Linear(in_features=800, out_features=7, bias=True)
  )
)