In [1]:
!pip install tqdm



In [2]:
from datetime import datetime
import os
import pickle
import math
import time

from torch import nn, optim
import torch
from tqdm import tqdm

from parser_model import ParserModel
from utils.parser_utils import minibatches, load_and_preprocess_data, AverageMeter

In [3]:
# -----------------
# Primary Functions
# -----------------
def train(parser, train_data, dev_data, output_path, batch_size=1024, n_epochs=10, lr=0.0005):
    """ Train the neural dependency parser.

    @param parser (Parser): Neural Dependency Parser
    @param train_data ():
    @param dev_data ():
    @param output_path (str): Path to which model weights and results are written.
    @param batch_size (int): Number of examples in a single batch
    @param n_epochs (int): Number of training epochs
    @param lr (float): Learning rate
    """
    best_dev_UAS = 0


    ### YOUR CODE HERE (~2-7 lines)
    ### TODO:
    ###      1) Construct Adam Optimizer in variable `optimizer`
    
    # we will need to pass our parameters to the optimizer
    #  pass it an iterable containining parameters we want optimized
    # in our case this is going to take the parser model.model.param, which comes from nn.module()
    # because our neural parser inherits from torch.nn - https://pytorch.org/docs/stable/nn.html
    optimizer = optim.Adam(parser.model.parameters())
    
    ###      2) Construct the Cross Entropy Loss Function in variable `loss_func`
    ### we end up passing this into the train_for_epoch
    # just simply instantiation of class CEL, which we then use for output loss 
    loss_func = nn.CrossEntropyLoss()
    
    
    
    ### Hint: Use `parser.model.parameters()` to pass optimizer
    ###       necessary parameters to tune.
    ### Please see the following docs for support:
    ###     Adam Optimizer: https://pytorch.org/docs/stable/optim.html
    ###     Cross Entropy Loss: https://pytorch.org/docs/stable/nn.html#crossentropyloss


    ### END YOUR CODE

    for epoch in range(n_epochs):
        print("Epoch {:} out of {:}".format(epoch + 1, n_epochs))
        dev_UAS = train_for_epoch(parser, train_data, dev_data, optimizer, loss_func, batch_size)
        if dev_UAS > best_dev_UAS:
            best_dev_UAS = dev_UAS
            print("New best dev UAS! Saving model.")
            torch.save(parser.model.state_dict(), output_path)
        print("")


In [4]:
def train_for_epoch(parser, train_data, dev_data, optimizer, loss_func, batch_size):
    """ Train the neural dependency parser for single epoch.

    Note: In PyTorch we can signify train versus test and automatically have
    the Dropout Layer applied and removed, accordingly, by specifying
    whether we are training, `model.train()`, or evaluating, `model.eval()`

    @param parser (Parser): Neural Dependency Parser
    @param train_data ():
    @param dev_data ():
    @param optimizer (nn.Optimizer): Adam Optimizer
    @param loss_func (nn.CrossEntropyLoss): Cross Entropy Loss Function
    @param batch_size (int): batch size
    @param lr (float): learning rate

    @return dev_UAS (float): Unlabeled Attachment Score (UAS) for dev data
    """
    parser.model.train() # Places model in "train" mode, i.e. apply dropout layer
    n_minibatches = math.ceil(len(train_data) / batch_size)
    loss_meter = AverageMeter()

    with tqdm(total=(n_minibatches)) as prog:
        for i, (train_x, train_y) in enumerate(minibatches(train_data, batch_size)):
            
            # this just makes sure we have fresh gradients each run....zeroes out existing grad 
            # recommended in documentation 
            optimizer.zero_grad()   # remove any baggage in the optimizer
            loss = 0. # store loss for this batch here -> don't see the point of this given what we do below
            train_x = torch.from_numpy(train_x).long()
            train_y = torch.from_numpy(train_y.nonzero()[1]).long()

            ### YOUR CODE HERE (~5-10 lines)
            ### TODO:
            ###      1) Run train_x forward through model to produce `logits`
            
            logits = parser.model.forward(train_x)
            
            ###      2) Use the `loss_func` parameter to apply the PyTorch CrossEntropyLoss function.
            ###         This will take `logits` and `train_y` as inputs. It will output the CrossEntropyLoss
            ###         between softmax(`logits`) and `train_y`. Remember that softmax(`logits`)
            ###         are the predictions (y^ from the PDF).
            
            # loss_func is just nn.CrossEntropyLoss: https://pytorch.org/docs/stable/nn.html#crossentropyloss
            # output = loss(input, target)
            # output.backward()
            loss = loss_func(logits, train_y)
            
            
            ###      3) Backprop losses - running 
            loss.backward()
            
            ###      4) Take step with the optimizer
            ### Please see the following docs for support:
            ###     Optimizer Step: https://pytorch.org/docs/stable/optim.html#optimizer-step
            
            # Assumption: We pass in an instance of Optimizer which stores our weights....
            # I think i will build optimizer in earlier functions
            optimizer.step()
            
            

            ### END YOUR CODE
            prog.update(1)
            loss_meter.update(loss.item())

    print ("Average Train Loss: {}".format(loss_meter.avg))

    print("Evaluating on dev set",)
    parser.model.eval() # Places model in "eval" mode, i.e. don't apply dropout layer
    dev_UAS, _ = parser.parse(dev_data)
    print("- dev UAS: {:.2f}".format(dev_UAS * 100.0))
    return dev_UAS

### Running the full process:

- This should just output a very small set.

- From assignment: 
    - When running with debug=True, you should be able to get a loss smaller than 0.2 and a UAS
larger than 65 on the dev set (although in rare cases your results may be lower, there is some
randomness when training).
   - It should take about 1 hour to train the model on the entire the training dataset, i.e., when
debug=False.

   

In [5]:
debug = False

# hope this is not an issue
#assert(torch.__version__ == "1.0.0"),  "Please install torch version 1.0.0"

print(80 * "=")
print("INITIALIZING")
print(80 * "=")
parser, embeddings, train_data, dev_data, test_data = load_and_preprocess_data(debug)

start = time.time()
model = ParserModel(embeddings)
parser.model = model
print("took {:.2f} seconds\n".format(time.time() - start))

print(80 * "=")
print("TRAINING")
print(80 * "=")
output_dir = "results/{:%Y%m%d_%H%M%S}/".format(datetime.now())
output_path = output_dir + "model.weights"

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

train(parser, train_data, dev_data, output_path, batch_size=1024, n_epochs=10, lr=0.0005)

if not debug:
    print(80 * "=")
    print("TESTING")
    print(80 * "=")
    print("Restoring the best model weights found on the dev set")
    parser.model.load_state_dict(torch.load(output_path))
    print("Final evaluation on test set",)
    parser.model.eval()
    UAS, dependencies = parser.parse(test_data)
    print("- test UAS: {:.2f}".format(UAS * 100.0))
    print("Done!")


INITIALIZING
Loading data...
took 2.95 seconds
Building parser...
took 1.50 seconds
Loading pretrained embeddings...
took 3.59 seconds
Vectorizing data...
took 2.06 seconds
Preprocessing training data...


  0%|          | 0/1848 [00:00<?, ?it/s]

took 59.45 seconds
took 0.03 seconds

TRAINING
Epoch 1 out of 10


100%|██████████| 1848/1848 [04:44<00:00,  6.49it/s]


Average Train Loss: 0.17285270159620614
Evaluating on dev set


1445850it [00:00, 8026607.10it/s]       
  0%|          | 0/1848 [00:00<?, ?it/s]

- dev UAS: 84.56
New best dev UAS! Saving model.

Epoch 2 out of 10


100%|██████████| 1848/1848 [04:42<00:00,  6.55it/s]


Average Train Loss: 0.11140619965826536
Evaluating on dev set


1445850it [00:00, 24322229.46it/s]      
  0%|          | 0/1848 [00:00<?, ?it/s]

- dev UAS: 86.65
New best dev UAS! Saving model.

Epoch 3 out of 10


100%|██████████| 1848/1848 [04:41<00:00,  6.57it/s]


Average Train Loss: 0.09637250826448138
Evaluating on dev set


1445850it [00:00, 42238388.83it/s]      
  0%|          | 0/1848 [00:00<?, ?it/s]

- dev UAS: 87.29
New best dev UAS! Saving model.

Epoch 4 out of 10


100%|██████████| 1848/1848 [04:41<00:00,  6.56it/s]


Average Train Loss: 0.08726071365161504
Evaluating on dev set


1445850it [00:00, 35639641.26it/s]      
  0%|          | 0/1848 [00:00<?, ?it/s]

- dev UAS: 87.49
New best dev UAS! Saving model.

Epoch 5 out of 10


100%|██████████| 1848/1848 [04:42<00:00,  6.55it/s]


Average Train Loss: 0.08039719181022409
Evaluating on dev set


1445850it [00:00, 28147032.46it/s]      
  0%|          | 0/1848 [00:00<?, ?it/s]

- dev UAS: 87.99
New best dev UAS! Saving model.

Epoch 6 out of 10


100%|██████████| 1848/1848 [04:39<00:00,  6.61it/s]


Average Train Loss: 0.07483406893675139
Evaluating on dev set


1445850it [00:00, 40325930.71it/s]      
  0%|          | 0/1848 [00:00<?, ?it/s]

- dev UAS: 87.99

Epoch 7 out of 10


100%|██████████| 1848/1848 [04:40<00:00,  6.58it/s]


Average Train Loss: 0.07007720324440629
Evaluating on dev set


1445850it [00:00, 29419522.53it/s]      
  0%|          | 0/1848 [00:00<?, ?it/s]

- dev UAS: 87.98

Epoch 8 out of 10


100%|██████████| 1848/1848 [04:41<00:00,  6.56it/s]


Average Train Loss: 0.06603788080306731
Evaluating on dev set


1445850it [00:00, 28442877.89it/s]      
  0%|          | 0/1848 [00:00<?, ?it/s]

- dev UAS: 88.19
New best dev UAS! Saving model.

Epoch 9 out of 10


100%|██████████| 1848/1848 [04:40<00:00,  6.59it/s]


Average Train Loss: 0.06218883276663043
Evaluating on dev set


1445850it [00:00, 24192115.81it/s]      
  0%|          | 0/1848 [00:00<?, ?it/s]

- dev UAS: 88.32
New best dev UAS! Saving model.

Epoch 10 out of 10


100%|██████████| 1848/1848 [04:46<00:00,  6.46it/s]


Average Train Loss: 0.05876554490280745
Evaluating on dev set


1445850it [00:00, 45239684.28it/s]      


- dev UAS: 87.95

TESTING
Restoring the best model weights found on the dev set
Final evaluation on test set


2919736it [00:00, 39760843.85it/s]      

- test UAS: 88.82
Done!



