In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os

from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
#Switching directories for easy access to the data
data = '/Users/alexchoe/Desktop/Capstone/BETO2020-master/data/carbon/'
os.chdir(data)

In [None]:
#Taking in data as a dataframe for easy pre-processing
W2V_df = pd.read_excel('Data.xlsx')
print(W2V_df.shape)
W2V_df.head()

In [None]:
#Hyper parameters
num_epochs = 100
batch_size = 50
learning_rate = 0.008

In [None]:
X = W2V_df[['Word 1', 'Word 2']] #Input features used to make predictions
Y = W2V_df[['Syn','NonSyn']] #Target features to be predicted 

x_train, x_test, y_train, y_test = train_test_split(X,Y, test_size = 0.2, shuffle = True) #split dataset into separate testing and training datasets

syn_train = y_train['Syn']
syn_test = y_test['Syn']
nonsyn_train = y_train['NonSyn']
nonsyn_test = y_test['NonSyn']

x_train_tensor = torch.tensor(x_train.values.astype(np.float32)) #convert pd.DataFrame -> np.ndarray -> torch.tensor
syn_train_tensor = torch.tensor(syn_train.values.astype(np.float32))
ant_train_tensor = torch.tensor(syn_train.values.astype(np.float32))

#create tensor with features and targets
train_tensor = torch.utils.data.TensorDataset(x_train_tensor, syn_train_tensor, nonsyn_train_tensor)
#create iterable dataset with batches
training_data_set = torch.utils.data.DataLoader(dataset = train_tensor, batch_size = batch_size, shuffle = True)

x_test_tensor = torch.tensor(x_test.values.astype(np.float32))
syn_test_tensor = torch.tensor(syn_test.values.astype(np.float32))
nonsyn_test_tensor = torch.tensor(nonsyn_test.values.astype(np.float32))

test_tensor = torch.utils.data.TensorDataset(x_test_tensor, syn_test_tensor)
testing_data_set = torch.utils.data.DataLoader(dataset = test_tensor, batch_size = batch_size, shuffle = True)

In [None]:
#Defining the neural network
class syn_NN(nn.Module):
    
    def __init__(self, in_dims, out_dims):
        
        #embedding layer
        self.em_layer = nn.Linear(in_dims, out_dims)

        #hidden layers
        self.h_layer1 = nn.Linear(out_dims, 32)
        self.h_layer2 = nn.Linear(32, 16)
        
        #output layer
        self.o_layer = nn.Linear(16, 2)
        
    def forward(self, x):
        #data enters embedding layer
        out = self.em_layer(x)
        
        #embedded data is passed to hidden layers
        out = self.h_layer1(out)
        out = self.h_layer2(out)
        
        #embedded data is passed to output layers
        syn_out = self.o_layer(out)
        
        return syn_out

In [None]:
def train_model(model, training_data_set, optimizer):
    train_epoch_loss = []
    syn_train_epoch_loss = []
    
    syn_losses = []
    train_total = 0
    
    #switch model to training mode
    model.train()
    syn_criterion = PhysLoss.ThresholdedMSELoss(lower = 0, upper = 6)
    
    for train_data in training_data_set:
        
        model.zero_grad() #zero out any gradients from prior loops 
        syn_out = model(train_data) #gather model predictions for this loop
        
        #calculate error in the predictions
        syn_loss = syn_criterion(predictions = syn_out)
        
        total_loss = syn_loss
        
        #BACKPROPAGATE LIKE A MF
        torch.autograd.backward([syn_loss])
        optimizer.step()
        
        #save loss for this batch
        train_losses.append(total_loss.item())
        train_total+=1
        
        syn_train_losses.append(pce_loss.item())
        
    #calculate and save total error for this epoch of training
    epoch_loss = sum(train_losses)/train_total
    train_epoch_loss.append(epoch_loss)
    
    syn_train_epoch_loss.append(sum(ff_train_losses)/train_total)
    
    #update progress bar
    print(f"Total Epoch Training Loss = {train_epoch_loss}")
    
    return train_epoch_loss, syn_train_epoch_loss

In [None]:
def eval_model(model, testing_data_set, optimizer):
    #evaluate the model
    model.eval()
    
    syn_criterion = PhysLoss.ThresholdedMSELoss(lower = 0, upper = 6)
    accuracy = PhysLoss.MAPE()

    #don't update nodes during evaluation b/c not training
    with torch.no_grad():
        test_losses = []
        syn_test_losses = []
        syn_test_acc_list = []
        
        test_total = 0

        for inputs in testing_data_set:
            inputs = inputs.to(device)
            syn_labels = syn_labels.to(device)

            syn_out = model(inputs)

            # calculate loss per batch of testing data
            syn_test_loss = syn_criterion(syn_out)
            
            test_loss = pce_test_loss + voc_test_loss + jsc_test_loss + ff_test_loss
            
            test_losses.append(test_loss.item())
            syn_test_losses.append(syn_test_loss.item())
            test_total += 1 

            syn_acc = accuracy(syn_out)
            syn_test_acc_list.append(syn_acc.item())

        test_epoch_loss = sum(test_losses)/test_total
        syn_test_epoch_loss = sum(syn_test_losses)/test_total
        
        syn_epoch_acc = sum(syn_test_acc_list)/test_total

        print(f"Total Epoch Testing Loss = {test_epoch_loss}")
        print(f"Epoch MAPE: Syn = {syn_epoch_acc}")
    return test_epoch_loss, syn_test_epoch_loss, syn_epoch_acc