In [2]:
!pip install tqdm

Collecting tqdm
  Downloading https://files.pythonhosted.org/packages/4a/1c/6359be64e8301b84160f6f6f7936bbfaaa5e9a4eab6cbc681db07600b949/tqdm-4.45.0-py2.py3-none-any.whl (60kB)
[K    100% |████████████████████████████████| 61kB 3.1MB/s ta 0:00:011
[?25hInstalling collected packages: tqdm
Successfully installed tqdm-4.45.0


In [3]:
from datetime import datetime
import os
import pickle
import math
import time

from torch import nn, optim
import torch
from tqdm import tqdm

from parser_model import ParserModel
from utils.parser_utils import minibatches, load_and_preprocess_data, AverageMeter

In [4]:
?train_for_epoch

Object `train_for_epoch` not found.


In [8]:
# -----------------
# Primary Functions
# -----------------
def train(parser, train_data, dev_data, output_path, batch_size=1024, n_epochs=10, lr=0.0005):
    """ Train the neural dependency parser.

    @param parser (Parser): Neural Dependency Parser
    @param train_data ():
    @param dev_data ():
    @param output_path (str): Path to which model weights and results are written.
    @param batch_size (int): Number of examples in a single batch
    @param n_epochs (int): Number of training epochs
    @param lr (float): Learning rate
    """
    best_dev_UAS = 0


    ### YOUR CODE HERE (~2-7 lines)
    ### TODO:
    ###      1) Construct Adam Optimizer in variable `optimizer`
    
    # we will need to pass our parameters to the optimizer
    #  pass it an iterable containining parameters we want optimized
    # in our case this is going to take the parser model.model.param, which comes from nn.module()
    # because our neural parser inherits from torch.nn - https://pytorch.org/docs/stable/nn.html
    optimizer = optim.Adam(parser.model.parameters())
    
    ###      2) Construct the Cross Entropy Loss Function in variable `loss_func`
    ### we end up passing this into the train_for_epoch
    # just simply instantiation of class CEL, which we then use for output loss 
    loss_func = nn.CrossEntropyLoss()
    
    
    
    ### Hint: Use `parser.model.parameters()` to pass optimizer
    ###       necessary parameters to tune.
    ### Please see the following docs for support:
    ###     Adam Optimizer: https://pytorch.org/docs/stable/optim.html
    ###     Cross Entropy Loss: https://pytorch.org/docs/stable/nn.html#crossentropyloss


    ### END YOUR CODE

    for epoch in range(n_epochs):
        print("Epoch {:} out of {:}".format(epoch + 1, n_epochs))
        dev_UAS = train_for_epoch(parser, train_data, dev_data, optimizer, loss_func, batch_size)
        if dev_UAS > best_dev_UAS:
            best_dev_UAS = dev_UAS
            print("New best dev UAS! Saving model.")
            torch.save(parser.model.state_dict(), output_path)
        print("")


In [16]:
def train_for_epoch(parser, train_data, dev_data, optimizer, loss_func, batch_size):
    """ Train the neural dependency parser for single epoch.

    Note: In PyTorch we can signify train versus test and automatically have
    the Dropout Layer applied and removed, accordingly, by specifying
    whether we are training, `model.train()`, or evaluating, `model.eval()`

    @param parser (Parser): Neural Dependency Parser
    @param train_data ():
    @param dev_data ():
    @param optimizer (nn.Optimizer): Adam Optimizer
    @param loss_func (nn.CrossEntropyLoss): Cross Entropy Loss Function
    @param batch_size (int): batch size
    @param lr (float): learning rate

    @return dev_UAS (float): Unlabeled Attachment Score (UAS) for dev data
    """
    parser.model.train() # Places model in "train" mode, i.e. apply dropout layer
    n_minibatches = math.ceil(len(train_data) / batch_size)
    loss_meter = AverageMeter()

    with tqdm(total=(n_minibatches)) as prog:
        for i, (train_x, train_y) in enumerate(minibatches(train_data, batch_size)):
            
            # this just makes sure we have fresh gradients each run....zeroes out existing grad 
            # recommended in documentation 
            optimizer.zero_grad()   # remove any baggage in the optimizer
            loss = 0. # store loss for this batch here -> don't see the point of this given what we do below
            train_x = torch.from_numpy(train_x).long()
            train_y = torch.from_numpy(train_y.nonzero()[1]).long()

            ### YOUR CODE HERE (~5-10 lines)
            ### TODO:
            ###      1) Run train_x forward through model to produce `logits`
            
            logits = parser.model.forward(train_x)
            
            ###      2) Use the `loss_func` parameter to apply the PyTorch CrossEntropyLoss function.
            ###         This will take `logits` and `train_y` as inputs. It will output the CrossEntropyLoss
            ###         between softmax(`logits`) and `train_y`. Remember that softmax(`logits`)
            ###         are the predictions (y^ from the PDF).
            
            # loss_func is just nn.CrossEntropyLoss: https://pytorch.org/docs/stable/nn.html#crossentropyloss
            # output = loss(input, target)
            # output.backward()
            loss = loss_func(logits, train_y)
            
            
            ###      3) Backprop losses - running 
            loss.backward()
            
            ###      4) Take step with the optimizer
            ### Please see the following docs for support:
            ###     Optimizer Step: https://pytorch.org/docs/stable/optim.html#optimizer-step
            
            # Assumption: We pass in an instance of Optimizer which stores our weights....
            # I think i will build optimizer in earlier functions
            optimizer.step()
            
            

            ### END YOUR CODE
            prog.update(1)
            loss_meter.update(loss.item())

    print ("Average Train Loss: {}".format(loss_meter.avg))

    print("Evaluating on dev set",)
    parser.model.eval() # Places model in "eval" mode, i.e. don't apply dropout layer
    dev_UAS, _ = parser.parse(dev_data)
    print("- dev UAS: {:.2f}".format(dev_UAS * 100.0))
    return dev_UAS

### Running the debugger:

- This should just output a very small set.

- From assignment: 
    - When running with debug=True, you should be able to get a loss smaller than 0.2 and a UAS
larger than 65 on the dev set (although in rare cases your results may be lower, there is some
randomness when training).
   - It should take about 1 hour to train the model on the entire the training dataset, i.e., when
debug=False.

    - 

In [17]:
debug = True
    # debug = False

# hope this is not an issue
#assert(torch.__version__ == "1.0.0"),  "Please install torch version 1.0.0"

print(80 * "=")
print("INITIALIZING")
print(80 * "=")
parser, embeddings, train_data, dev_data, test_data = load_and_preprocess_data(debug)

start = time.time()
model = ParserModel(embeddings)
parser.model = model
print("took {:.2f} seconds\n".format(time.time() - start))

print(80 * "=")
print("TRAINING")
print(80 * "=")
output_dir = "results/{:%Y%m%d_%H%M%S}/".format(datetime.now())
output_path = output_dir + "model.weights"

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

train(parser, train_data, dev_data, output_path, batch_size=1024, n_epochs=10, lr=0.0005)


INITIALIZING
Loading data...
took 3.06 seconds
Building parser...
took 0.04 seconds
Loading pretrained embeddings...
took 3.21 seconds
Vectorizing data...
took 0.07 seconds
Preprocessing training data...


  0%|          | 0/48 [00:00<?, ?it/s]

took 1.90 seconds
took 0.01 seconds

TRAINING
Epoch 1 out of 10


100%|██████████| 48/48 [00:07<00:00,  6.61it/s]


Average Train Loss: 0.6727653381725153
Evaluating on dev set


125250it [00:00, 9085567.11it/s]       
  0%|          | 0/48 [00:00<?, ?it/s]

- dev UAS: 53.73
New best dev UAS! Saving model.

Epoch 2 out of 10


100%|██████████| 48/48 [00:07<00:00,  6.81it/s]


Average Train Loss: 0.3198740941782792
Evaluating on dev set


125250it [00:00, 8209153.61it/s]       
  0%|          | 0/48 [00:00<?, ?it/s]

- dev UAS: 62.43
New best dev UAS! Saving model.

Epoch 3 out of 10


100%|██████████| 48/48 [00:06<00:00,  7.09it/s]


Average Train Loss: 0.2537969369441271
Evaluating on dev set


125250it [00:00, 12684387.10it/s]      
  0%|          | 0/48 [00:00<?, ?it/s]

- dev UAS: 66.34
New best dev UAS! Saving model.

Epoch 4 out of 10


100%|██████████| 48/48 [00:06<00:00,  7.03it/s]


Average Train Loss: 0.21584195860972008
Evaluating on dev set


125250it [00:00, 11744351.26it/s]      
  0%|          | 0/48 [00:00<?, ?it/s]

- dev UAS: 68.52
New best dev UAS! Saving model.

Epoch 5 out of 10


100%|██████████| 48/48 [00:06<00:00,  6.93it/s]


Average Train Loss: 0.18658663984388113
Evaluating on dev set


125250it [00:00, 9624722.00it/s]       
  0%|          | 0/48 [00:00<?, ?it/s]

- dev UAS: 71.54
New best dev UAS! Saving model.

Epoch 6 out of 10


100%|██████████| 48/48 [00:06<00:00,  7.12it/s]


Average Train Loss: 0.1691342949246367
Evaluating on dev set


125250it [00:00, 8014043.45it/s]       
  0%|          | 0/48 [00:00<?, ?it/s]

- dev UAS: 71.05

Epoch 7 out of 10


100%|██████████| 48/48 [00:06<00:00,  7.08it/s]


Average Train Loss: 0.14968902291730046
Evaluating on dev set


125250it [00:00, 5180118.88it/s]       
  0%|          | 0/48 [00:00<?, ?it/s]

- dev UAS: 73.20
New best dev UAS! Saving model.

Epoch 8 out of 10


100%|██████████| 48/48 [00:06<00:00,  7.21it/s]


Average Train Loss: 0.13530923270930847
Evaluating on dev set


125250it [00:00, 5661442.54it/s]       
  0%|          | 0/48 [00:00<?, ?it/s]

- dev UAS: 73.42
New best dev UAS! Saving model.

Epoch 9 out of 10


100%|██████████| 48/48 [00:06<00:00,  7.15it/s]


Average Train Loss: 0.12109526122609775
Evaluating on dev set


125250it [00:00, 12537865.78it/s]      
  0%|          | 0/48 [00:00<?, ?it/s]

- dev UAS: 74.22
New best dev UAS! Saving model.

Epoch 10 out of 10


100%|██████████| 48/48 [00:06<00:00,  7.16it/s]


Average Train Loss: 0.11045274324715137
Evaluating on dev set


125250it [00:00, 5267482.61it/s]       

- dev UAS: 75.51
New best dev UAS! Saving model.






In [12]:
torch.__version__ 

'1.4.0+cpu'