In [None]:
# Import Drive API and authenticate.
from google.colab import drive

# Mount your Drive to the Colab VM.
drive.mount('/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /gdrive


In [None]:
!cp '/gdrive/My Drive/Project/attention.py' .
!cp '/gdrive/My Drive/Project/decoder.py' .
!cp '/gdrive/My Drive/Project/encoder.py' .
!cp '/gdrive/My Drive/Project/model.py' .

In [None]:
import os
import torch
import torchtext
from torch.nn.utils import clip_grad_norm_
from model import training
from datetime import datetime
import pickle
import collections

In [None]:
def check_dir_exists(path):
    if not os.path.isdir(path):
        os.makedirs(path)

def save_object_to_Models(obj, path):
    check_dir_exists(os.path.dirname(path))
    with open(path, 'wb') as fd:
        pickle.dump(obj, fd)

def save_model_to_Models(model, epoch, train_loss, val_loss):
    print('(Saving model...', end='')
    torch.save(model.state_dict(), '/gdrive/My Drive/Project/Models/' + ('seq2seq-%d-%f-%f.pt' % (epoch, train_loss, val_loss)))
    print('done)', end='')

def save_vocab_to_Models(vocab, path):
    
    # Saves Torchtext Field vocabulary
    
    vocab.vectors = None
    save_object_to_Models(vocab, path)

In [None]:
def load_object(path):
    with open(path, 'rb') as fd:
        obj = pickle.load(fd)
    return obj

def load_dataset(args, device):
    """
    Loads field for twitter dataset.
    """
    #'<sos>' --> start of sentence token
    #'<eos>' --> end of sentence token
    #'<pad>' --> pad token
    field = torchtext.data.Field(init_token='<sos>', eos_token='<eos>', pad_token='<pad>', tokenize='spacy', lower=True)
    dataset = 'applesupport'

    train, val, test = torchtext.data.TabularDataset.splits(path='/gdrive/My Drive/Project/Data/', format='csv', train=dataset + '_train.csv', validation=dataset + '_val.csv', test=dataset + '_test.csv', fields=[('', None), ('author_id', None), ('question', field), ('answer', field)], skip_header=True)
    #field.build_vocab(train, vectors='glove.twitter.27B.200d', min_freq=2, max_size=20000)
    field.vocab = load_object('/gdrive/My Drive/Project/Models/' + 'vocab')

    #field.build_vocab(train, vectors='glove.twitter.27B.200d', min_freq=2, max_size=20000)
    # create batchs for training
    train1, val1, test1 = torchtext.data.BucketIterator.splits((train, val, test), batch_size=args['batch_size'], sort_key=lambda x: len(x.question), device=device, repeat=False)

    vocab = field.vocab
    Metadata = collections.namedtuple('Metadata', 'vocab_size padding_idx vectors')
    metadata = Metadata(vocab_size=len(vocab), padding_idx=vocab.stoi['<pad>'], vectors=vocab.vectors)

    return metadata, field.vocab, train1, val1, test1

In [None]:
def evaluate(model, val1, metadata):
    model.eval()  # put models in eval mode (this is important because of dropout)

    total_loss = 0
    with torch.no_grad():
        for batch in val1:
            # calculate models predictions
            question, answer = batch.question, batch.answer
            logits = model(question, answer)

            # calculate batch loss
            loss = torch.nn.functional.cross_entropy(logits.view(-1, metadata.vocab_size), answer[1:].view(-1),
                                   ignore_index=metadata.padding_idx)  # answer[1:] skip <sos> token
            total_loss += loss.item()

    return total_loss / len(val1)

In [None]:
def train(model, optimizer, train1, metadata, grad_clip):
    model.train()

    total_loss = 0
    for batch in train1:
        # models predictions
        question, answer = batch.question, batch.answer
        logits = model(question, answer)

        optimizer.zero_grad()

        # calculate loss and backpropagate errors
        loss = torch.nn.functional.cross_entropy(logits.view(-1, metadata.vocab_size), answer[1:].view(-1),
                               ignore_index=metadata.padding_idx)  # answer[1:] skip <sos> token
        loss.backward()

        total_loss += loss.item()

        # clip gradients to avoid exploding gradient
        clip_grad_norm_(model.parameters(), grad_clip)

        # update parameters
        optimizer.step()

    return total_loss / len(train1)

In [None]:
def main():

    cuda = False
    if torch.cuda.is_available():
      cuda = True
    
    torch.set_default_tensor_type(torch.cuda.FloatTensor if cuda else torch.FloatTensor)
    device = torch.device('cuda' if cuda else 'cpu')
    print("DEVICE = ", device)

    hyper_parameter = {'epochs':300,
            'grad_clip':5,
            'batch_size':64,
            'learning_rate':1e-4,
            'path' : '/gdrive/My Drive/Project/Models/',
            'multi_gpu' : True,
            'encoder_hidden_size' : 1024,
            'encoder_num_layers' : 2,
            'encoder_rnn_dropout' : 0.2,
            'decoder_hidden_size' : 512,
            'decoder_num_layers' : 2,
            'decoder_rnn_dropout' : 0.2,
            'attn_hidden_size': 512,
            'embedding_size': 200,
            'cuda':cuda,}

    if cuda:
        hyper_parameter['multi_gpu'] = True
    #print("My services = ", hyper_parameter['dataset'])

    metadata, vocab, train1, val1, test1 = load_dataset(hyper_parameter, device)
    #print(list(train1))
    #print(list(val1))

    
    print('Saving vocab and args...', end='')
    save_vocab_to_Models(vocab, hyper_parameter['path'] + os.path.sep + 'vocab')
    save_object_to_Models(hyper_parameter, hyper_parameter['path'] + os.path.sep + 'args')
    print('Done')

    model = training(hyper_parameter, metadata)
    
    if cuda and hyper_parameter['multi_gpu']:
        model = torch.nn.DataParallel(model, dim=1)  # if we were using batch_first we'd have to use dim=0
        print('Use model DataParallel')
    model.load_state_dict(torch.load('/gdrive/My Drive/Project/Models/' + os.path.sep + 'seq2seq-63-2.229289-2.533210.pt'))

    print(model)  # print models summary

    optimizer = torch.optim.Adam(model.parameters(), lr=hyper_parameter['learning_rate'], amsgrad=True)

    try:
        for epoch in range(65, hyper_parameter['epochs']): 
            begin = datetime.now()
            print('Training Epoch %d' % epoch)
            # calculate train and val loss
            train_loss = train(model, optimizer, train1, metadata, hyper_parameter['grad_clip'])
            val_loss = evaluate(model, val1, metadata)
            print("With epoch %d (%d): TRAIN-LOSS %f ** VALIDATION-LOSS %f (TIME = %s)" % (epoch + 1, hyper_parameter['epochs'], train_loss, val_loss, datetime.now() - begin), end='')

            # save models if models achieved best val loss (or save every epoch is selected)
            #if not best_val_loss or val_loss < best_val_loss:
            save_model_to_Models(model, epoch + 1, train_loss, val_loss)                
            print()
            print("***********************")
    except (KeyboardInterrupt, BrokenPipeError):
        print('Training stopped!!!!!!!!!!!!!!!.')

    test_loss = evaluate(model, test1, metadata)
    print("TEST-LOSS %f" % test_loss)

In [None]:
!cp '/gdrive/My Drive/Project/Data/amazonhelp_train.csv' .
!cp '/gdrive/My Drive/Project/Data/amazonhelp_val.csv' .
!cp '/gdrive/My Drive/Project/Data/amazonhelp_test.csv' .

In [None]:
main()

DEVICE =  cuda
Saving vocab and args...Done
Use model DataParallel
DataParallel(
  (module): SequenceToSequenceTrain(
    (encoder): Encoder(
      (embed): Embedding(10998, 200, padding_idx=1)
      (rnn): RNN(
        (rnn): LSTM(200, 1024, num_layers=2, dropout=0.2, bidirectional=True)
      )
    )
    (decoder): Decoder(
      (initial_hidden): DecoderInit(
        (linear): Linear(in_features=1024, out_features=512, bias=True)
      )
      (embed): Embedding(10998, 200, padding_idx=1)
      (rnn): LSTM(712, 512, num_layers=2, dropout=0.2)
      (attn): Attention(
        (attn_score): generalAttentionScore()
      )
      (attn_hidden_lin): Linear(in_features=2560, out_features=512, bias=True)
      (out): Linear(in_features=512, out_features=10998, bias=True)
    )
  )
)
Training Epoch 65
With epoch 66 (300): TRAIN-LOSS 2.214210 ** VALIDATION-LOSS 2.549388 (TIME = 0:17:06.519266)(Saving model...done)
***********************
Training Epoch 66
With epoch 67 (300): TRAIN-LOSS 2.21