# Attention-Based Bidirectional Long Short-Term Memory Networks for Relation Classification   ACL  2016

# 1前言

### 1,1课程回顾

<img src='imgs/overall.png' width="800" height="800" align="bottom">

### 1.2 模型结构

<img src="./imgs/model.png"  width="600" height="600" align="bottom" />

### 1.3 代码结构展示

<img src="./imgs/dir.png"  width="300" height="300" align="bottom" />

# 2 准备工作
### 2.1项目环境配置

* Python3.8
* jupyter notebook
* torch            1.6.0+cu10.2
* numpy            1.18.5
* ConfigArgParse      1.5.2
* torchtext         0.7.0

代码运行环境建议使用Visual Studio Code(VScode)

### 2.2 数据集下载
* 执行文件 `python reader.py`

# 3 项目代码结构（VScode中演示）

>1）是什么？

　　我们首先会在VScode环境中让代码跑一下，直观感受到项目的训练，并展示前向推断的输出，让大家看到模型的效果。
>2）怎么构成的？

　　然后介绍项目代码的构成，介绍项目有哪些文件夹，包含哪些文件，这些文件构成了什么功能模块如：数据预处理模块，模型设计模块，损失函数模块，推断与评估模块。
>3）小结

　　在主文件中在过一下启动训练的流程。

# 4 算法模块及细节（jupyter和VScode中演示）

　　在jupyter notebook中细致地讲解每一个模块。
  
　　以实现模块功能为目的，来讲解每个函数的执行流程，呈现中间数据，方便同学们理解学习。
  
　　内容分为以下几个模块：**超参数设置，数据读取与处理，模型定义，模型训练，模型评价**。

### 4.1 超参数设置

In [1]:
import os
import sys
import json
import random
import configargparse
from utils import show_time, fwrite, shell

In [7]:
def get_args():
    cur_time = show_time(printout=False)
    parser = configargparse.ArgumentParser(
        description='Args for Text Classification')
    group = parser.add_argument_group('Model Hyperparameters')
    group.add_argument('-f')
    group.add_argument('-init_xavier', default=False, action='store_true',
                       help='whether to use xavier normal as initiator for model weights')
    group.add_argument('-emb_dropout', default=0.3, type=float,
                       help='dropout of the embedding layer')
    group.add_argument('-emb_dim', default=100, type=int,
                       help='dimension of embedding vectors')
    group.add_argument('-vocab_max_size', default=100000, type=int,
                       help='max number of words in vocab')
    group.add_argument('-lstm_n_layer', default=1, type=int,
                       help='num of layers in LSTM')
    group.add_argument('-lstm_dropout', default=0.3, type=float,
                       help='dropout in >=1th LSTM layer')
    group.add_argument('-lstm_dim', default=100, type=int,
                       help='dimension of the lstm hidden states')
    group.add_argument('-lstm_combine', default='add',
                       choices=['add', 'concat'], type=str,
                       help='the way to combine bidirectional lstm outputs')
    group.add_argument('-n_linear', default=1, type=int,
                       help='number of linear layers after lstm')
    group.add_argument('-linear_dropout', default=0.5, type=float,
                       help='dropout of the penultimate layer')
    group.add_argument('-n_classes', default=2, type=int,
                       help='number of classes to predict')

    group = parser.add_argument_group('Training Specs')
    group.add_argument('-seed', default=0, type=int, help='random seed')
    group.add_argument('-batch_size', default=10, type=int, help='batch size')
    group.add_argument('-epochs', default=100, type=int,
                       help='number of epochs to train the model')
    group.add_argument('-lr', default=1.0, type=float, help='learning rate')
    group.add_argument('-weight_decay', default=1e-5, type=float,
                       help='weight decay')

    group = parser.add_argument_group('Files')
    group.add_argument('-data_dir', default='data/re_semeval/', type=str,
                       help='the directory for data files')
    group.add_argument('-train_fname', default='train.csv', type=str,
                       help='training file name')
    group.add_argument('-data_sizes', nargs=3, default=[None, None, None],
                       type=int,
                       help='# samples to use in train/dev/test files')
    group.add_argument('-preprocessed', action='store_false', default=True,
                       help='whether input data is preprocessed by spacy')
    group.add_argument('-lower', action='store_true', default=False,
                       help='whether to lowercase the input data')

    group.add_argument('-uid', default=cur_time, type=str,
                       help='the id of this run')
    group.add_argument('-save_dir', default='tmp/', type=str,
                       help='directory to save output files')
    group.add_argument('-save_dir_cp', default='tmp_cp/', type=str,
                       help='directory to backup output files')
    group.add_argument('-save_meta_fname', default='run_meta.txt', type=str,
                       help='file name to save arguments and model structure')
    group.add_argument('-save_log_fname', default='run_log.txt', type=str,
                       help='file name to save training logs')
    group.add_argument('-save_valid_fname', default='valid_e00.txt', type=str,
                       help='file name to save valid outputs')
    group.add_argument('-save_vis_fname', default='example.txt', type=str,
                       help='file name to save visualization outputs')
    group.add_argument('-save_model_fname', default='model', type=str,
                       help='file to torch.save(model)')
    group.add_argument('-save_vocab_fname', default='vocab.json', type=str,
                       help='file name to save vocab')

    group = parser.add_argument_group('Run specs')
    group.add_argument('-n_gpus', default=1, type=int, help='# gpus to run on')
    group.add_argument('-load_model', default='', type=str,
                       help='path to pretrained model')
    group.add_argument('-verbose', action='store_true', default=False,
                       help='whether to show pdb.set_trace() or not')

    args = parser.parse_args()
    return args

In [8]:
def select_data(save_dir='./tmp', data_dir='./data/wiki_person',
                train_fname='train.csv', data_sizes=[None, None, None],
                skip_header=True, verbose=True):
    files = ['train', 'valid', 'test']
    suffix = '.' + train_fname.split('.')[-1]
    n_lines = {}

    def _get_num_lines(file):
        with open(file) as f:
            data = [line.strip() for line in f if line]
        num_lines = len(data) if not skip_header else len(data) - 1
        return num_lines

    for file, data_size in zip(files, data_sizes):

        read_from = os.path.join(data_dir,
                                 train_fname.replace('train', file))
        save_to = os.path.join(save_dir, file + suffix)

        with open(read_from) as f:
            data = [line for line in f]
        if skip_header:
            header, body = data[:1], data[1:]
        else:
            header, body = [], data
        random.shuffle(body)
        data = header + body[:data_size]

        fwrite(''.join(data), save_to)

        n_lines[file] = _get_num_lines(save_to)

    if verbose:
        writeout = ['{}: {}'.format(*item) for item in n_lines.items()]
        writeout = ', '.join(writeout)
        print('[Info] #samples in', writeout)
    return list(n_lines.values())

In [9]:
def setup():
    args = get_args()

    if not os.path.isdir(args.save_dir):
        os.mkdir(args.save_dir)
    elif not args.load_model:
        shell('rm {}/*'.format(args.save_dir))
    args.save_meta_fname = os.path.join(args.save_dir, args.save_meta_fname)
    args.save_log_fname = os.path.join(args.save_dir, args.save_log_fname)
    args.save_valid_fname = os.path.join(args.save_dir, args.save_valid_fname)
    args.save_vis_fname = os.path.join(args.save_dir, args.save_vis_fname)
    args.save_model_fname = os.path.join(args.save_dir, args.save_model_fname)
    args.save_vocab_fname = os.path.join(args.save_dir, args.save_vocab_fname)

    args.data_sizes = \
        select_data(save_dir=args.save_dir, data_dir=args.data_dir,
                    train_fname=args.train_fname, data_sizes=args.data_sizes,
                    skip_header=True, verbose=True)

    if not args.verbose: import pdb; pdb.set_trace = lambda: None

    return args

In [10]:
args = setup()

[Info] #samples in train: 7200, valid: 800, test: 2717


In [11]:
vars(args)

{'f': '/home/niuhao/.local/share/jupyter/runtime/kernel-fcf17709-a65c-4a9c-ac13-4ff09331c3c5.json',
 'init_xavier': False,
 'emb_dropout': 0.3,
 'emb_dim': 100,
 'vocab_max_size': 100000,
 'lstm_n_layer': 1,
 'lstm_dropout': 0.3,
 'lstm_dim': 100,
 'lstm_combine': 'add',
 'n_linear': 1,
 'linear_dropout': 0.5,
 'n_classes': 2,
 'seed': 0,
 'batch_size': 10,
 'epochs': 100,
 'lr': 1.0,
 'weight_decay': 1e-05,
 'data_dir': 'data/re_semeval/',
 'train_fname': 'train.csv',
 'data_sizes': [7200, 800, 2717],
 'preprocessed': True,
 'lower': False,
 'uid': '10102031',
 'save_dir': 'tmp/',
 'save_dir_cp': 'tmp_cp/',
 'save_meta_fname': 'tmp/run_meta.txt',
 'save_log_fname': 'tmp/run_log.txt',
 'save_valid_fname': 'tmp/valid_e00.txt',
 'save_vis_fname': 'tmp/example.txt',
 'save_model_fname': 'tmp/model',
 'save_vocab_fname': 'tmp/vocab.json',
 'n_gpus': 1,
 'load_model': '',
 'verbose': False}

select_data

In [15]:
data_dir =args.data_dir
save_dir=args.save_dir
train_fname=args.train_fname
data_sizes=args.data_sizes
skip_header=True
verbose=True

In [16]:
files = ['train', 'valid', 'test']

In [17]:
file = files[0]

In [18]:
read_from = os.path.join(data_dir,
                                 train_fname.replace('train', file))

In [19]:
read_from

'data/re_semeval/train.csv'

In [20]:
suffix = '.' + train_fname.split('.')[-1]
n_lines = {}

In [21]:
suffix

'.csv'

In [22]:
save_to = os.path.join(save_dir, file + suffix)
save_to

'tmp/train.csv'

In [23]:
with open(read_from) as f:
    data = [line for line in f]

In [24]:
data[0]

'tgt,input,show_inp,ent1,ent2,id\n'

In [41]:
data[3]

'"Product-Producer(e2,e1)",The ENT_1_START builder ENT_1_END has now completed the ENT_2_START townhouse ENT_2_END .,The ENT_1_START builder ENT_1_END has now completed the ENT_2_START townhouse ENT_2_END .,builder,townhouse,2569'

In [26]:
header, body = data[:1], data[1:]

In [29]:
len(body)

7200

In [26]:
random.shuffle(body)
data = header + body[:data_size]

In [42]:
len(body[:None])

7200

In [30]:
 fwrite(''.join(data), save_to)

In [31]:
with open(save_to) as f:
    data = [line.strip() for line in f if line]

In [43]:
len(data)

7201

In [33]:
num_lines = len(data) if not skip_header else len(data) - 1

In [34]:
num_lines

7200

In [34]:
n_lines[file] = num_lines

In [35]:
writeout = ['{}: {}'.format(*item) for item in n_lines.items()]
writeout = ', '.join(writeout)
print('[Info] #samples in', writeout)

[Info] #samples in train: 7200


### 4.2 数据读取与处理

In [49]:
import json
import torch
import torchtext

from torchtext.data import Field, RawField, TabularDataset, \
    BucketIterator, Iterator
from torchtext.vocab import Vectors, GloVe
from utils import show_time, fwrite

In [131]:
class Dataset:
    def __init__(self, proc_id=0, data_dir='tmp/', train_fname='train.csv',
                 preprocessed=True, lower=True,
                 vocab_max_size=100000, emb_dim=100,
                 save_vocab_fname='vocab.json', verbose=True, ):
        self.verbose = verbose and (proc_id == 0)
        tokenize = lambda x: x.split() if preprocessed else 'spacy'

        INPUT = Field(sequential=True, batch_first=True, tokenize=tokenize,
                      lower=lower,
                      # include_lengths=True,
                      )
        # TGT = Field(sequential=False, dtype=torch.long, batch_first=True,
        #             use_vocab=False)
        TGT = Field(sequential=True, batch_first=True)
        SHOW_INP = RawField()
        fields = [
            ('tgt', TGT),
            ('input', INPUT),
            ('show_inp', SHOW_INP), ]

        if self.verbose:
            show_time("[Info] Start building TabularDataset from: {}{}"
                      .format(data_dir, 'train.csv'))
        datasets = TabularDataset.splits(
            fields=fields,
            path=data_dir,
            format=train_fname.rsplit('.')[-1],
            train=train_fname,
            validation=train_fname.replace('train', 'valid'),
            test=train_fname.replace('train', 'test'),
            skip_header=True,
        )
        INPUT.build_vocab(*datasets, max_size=vocab_max_size,
                          vectors=GloVe(name='6B', dim=emb_dim),
                          unk_init=torch.Tensor.normal_, )
        # load_vocab(hard_dosk) like opennmt
        # emb_dim = {50, 100}
        # Elmo
        TGT.build_vocab(*datasets)

        self.INPUT = INPUT
        self.TGT = TGT
        self.train_ds, self.valid_ds, self.test_ds = datasets

        if save_vocab_fname and self.verbose:
            writeout = {
                'tgt_vocab': {
                    'itos': TGT.vocab.itos, 'stoi': TGT.vocab.stoi,
                },
                'input_vocab': {
                    'itos': INPUT.vocab.itos, 'stoi': INPUT.vocab.stoi,
                },
            }
            fwrite(json.dumps(writeout, indent=4), save_vocab_fname)

        if self.verbose:
            msg = "[Info] Finished building vocab: {} INPUT, {} TGT" \
                .format(len(INPUT.vocab), len(TGT.vocab))
            show_time(msg)

    def get_dataloader(self, proc_id=0, n_gpus=1, device=torch.device('cpu'),
                       batch_size=64):
        def _distribute_dataset(dataset):
            n = len(dataset)
            part = dataset[n * proc_id // n_gpus: n * (proc_id + 1) // n_gpus]
            return torchtext.data.Dataset(part, dataset.fields)

        train_ds = _distribute_dataset(self.train_ds)
        self.verbose = self.verbose and (proc_id == 0)
        train_iter, valid_iter = BucketIterator.splits(
            (train_ds, self.valid_ds),
            batch_sizes=(batch_size, batch_size),
            sort_within_batch=True,
            sort_key=lambda x: len(x.input),
            device=device
        )

        test_iter = BucketIterator(
            self.test_ds,
            batch_size=1,
            sort=False,
            sort_within_batch=False,
            device=device
        )
        return train_iter, valid_iter, test_iter

In [132]:
proc_id = 0

In [133]:
dataset = Dataset(proc_id=proc_id, data_dir=args.save_dir,
                      train_fname=args.train_fname,
                      preprocessed=args.preprocessed, lower=args.lower,
                      vocab_max_size=args.vocab_max_size, emb_dim=args.emb_dim,
                      save_vocab_fname=args.save_vocab_fname, verbose=True, )

⏰ Time: 10112105-54	[Info] Start building TabularDataset from: tmp/train.csv
⏰ Time: 10112105-56	[Info] Finished building vocab: 24887 INPUT, 21 TGT


In [134]:
n_gpus = 1
device = torch.device('cpu')

In [135]:
train_dl, valid_dl, test_dl = \
        dataset.get_dataloader(proc_id=proc_id, n_gpus=n_gpus, device=device,
                               batch_size=args.batch_size)

In [136]:
len(train_dl)   # batch_size == 10

720

In [137]:
[i for i in train_dl][1]


[torchtext.data.batch.Batch of size 10]
	[.tgt]:[torch.LongTensor of size 10x1]
	[.input]:[torch.LongTensor of size 10x25]
	[.show_inp]:['The second ENT_1_START sentence ENT_1_END tells us about heart ENT_2_START disease ENT_2_END , which is an illness of late middle age and old age .', "The Royal Navy 's newest 1bn GBP ENT_1_START warship ENT_1_END has been handed over to the new ENT_2_START owner ENT_2_END in a formal ceremony .", 'The ENT_1_START activities ENT_1_END were documented on the ENT_2_START newsreels ENT_2_END of the day , and form part of a new BBC television series .', "The biggest broadcast was on election eve , when a ENT_1_START hookup ENT_1_END of twenty - six ENT_2_START stations ENT_2_END carried Coolidge 's speech .", 'Continuous ENT_1_START improvement ENT_1_END starts with ENT_2_START measuring ENT_2_END process performance , and instigating a robust process for reviewing further changes logically and quickly .', 'From time to time a ENT_1_START drove ENT_1_EN

Dataset

In [57]:
proc_id=proc_id    #0
data_dir=args.save_dir
train_fname=args.train_fname
preprocessed=args.preprocessed    # True
lower=args.lower   # False
vocab_max_size=args.vocab_max_size    # 100000
emb_dim=args.emb_dim    #(100,)
save_vocab_fname=args.save_vocab_fname   #'tmp/vocab.json'
verbose=True

In [58]:
verbose = verbose and (proc_id == 0)

In [59]:
verbose

True

In [60]:
tokenize = lambda x: x.split() if preprocessed else 'spacy'

In [61]:
INPUT = Field(sequential=True, batch_first=True, tokenize=tokenize,
                      lower=lower)       #数据预处理配置信息

In [62]:
TGT = Field(sequential=True, batch_first=True)

In [63]:
SHOW_INP = RawField()

In [64]:
fields = [
            ('tgt', TGT),
            ('input', INPUT),
            ('show_inp', SHOW_INP), ]

In [65]:
if verbose:
    show_time("[Info] Start building TabularDataset from: {}{}"
              .format(data_dir, 'train.csv'))

⏰ Time: 10112014-31	[Info] Start building TabularDataset from: tmp/train.csv


In [66]:
data_dir

'tmp/'

In [67]:
train_fname

'train.csv'

In [68]:
train_fname.replace('train', 'valid')

'valid.csv'

In [69]:
datasets = TabularDataset.splits(
            fields=fields,
            path=data_dir,
            format=train_fname.rsplit('.')[-1],
            train=train_fname,
            validation=train_fname.replace('train', 'valid'),
            test=train_fname.replace('train', 'test'),
            skip_header=True,  
        )

In [70]:
datasets

(<torchtext.data.dataset.TabularDataset at 0x7fee828cac70>,
 <torchtext.data.dataset.TabularDataset at 0x7fee828cae50>,
 <torchtext.data.dataset.TabularDataset at 0x7fef0d49f0d0>)

In [71]:
datasets[0].__dict__.keys()

dict_keys(['examples', 'fields'])

In [132]:
datasets[0].fields

{'tgt': <torchtext.data.field.Field at 0x7febe3f03c70>,
 'input': <torchtext.data.field.Field at 0x7febe3f03ca0>,
 'show_inp': <torchtext.data.field.RawField at 0x7febe3f03430>}

In [84]:
datasets[0].examples[0].input

['ENT_1_START',
 'Paralysis',
 'ENT_1_END',
 'or',
 'convulsions',
 'are',
 'caused',
 'by',
 'hormone',
 'deficiencies',
 'and',
 'ENT_2_START',
 'imbalances',
 'ENT_2_END',
 '.']

In [93]:
INPUT.build_vocab(*datasets, max_size=vocab_max_size,
                          vectors=GloVe(name='6B', dim=emb_dim),
                          unk_init=torch.Tensor.normal_, )

In [94]:
INPUT.vocab.stoi

defaultdict(<bound method Vocab._default_unk_index of <torchtext.vocab.Vocab object at 0x7fee7b3774f0>>,
            {'<unk>': 0,
             '<pad>': 1,
             'the': 2,
             'ENT_1_END': 3,
             'ENT_1_START': 4,
             'ENT_2_END': 5,
             'ENT_2_START': 6,
             '.': 7,
             'of': 8,
             'a': 9,
             ',': 10,
             'and': 11,
             'The': 12,
             'in': 13,
             'to': 14,
             'is': 15,
             'was': 16,
             'from': 17,
             'by': 18,
             'with': 19,
             '-': 20,
             'on': 21,
             'that': 22,
             'into': 23,
             'for': 24,
             "'s": 25,
             'an': 26,
             'are': 27,
             'as': 28,
             'has': 29,
             'have': 30,
             'A': 31,
             'it': 32,
             'at': 33,
             'his': 34,
             'caused': 35,
             '"': 36,


In [95]:
INPUT.vocab.vectors.shape

torch.Size([24887, 100])

In [96]:
TGT.build_vocab(*datasets)

In [110]:
INPUT = INPUT
TGT = TGT
train_ds, valid_ds, test_ds = datasets

In [98]:
TGT.vocab.stoi

defaultdict(<bound method Vocab._default_unk_index of <torchtext.vocab.Vocab object at 0x7fef0d498fa0>>,
            {'<unk>': 0,
             '<pad>': 1,
             'Other': 2,
             'Entity-Destination(e1,e2)': 3,
             'Cause-Effect(e2,e1)': 4,
             'Member-Collection(e2,e1)': 5,
             'Entity-Origin(e1,e2)': 6,
             'Message-Topic(e1,e2)': 7,
             'Component-Whole(e1,e2)': 8,
             'Component-Whole(e2,e1)': 9,
             'Instrument-Agency(e2,e1)': 10,
             'Content-Container(e1,e2)': 11,
             'Product-Producer(e2,e1)': 12,
             'Cause-Effect(e1,e2)': 13,
             'Product-Producer(e1,e2)': 14,
             'Content-Container(e2,e1)': 15,
             'Entity-Origin(e2,e1)': 16,
             'Message-Topic(e2,e1)': 17,
             'Instrument-Agency(e1,e2)': 18,
             'Member-Collection(e1,e2)': 19,
             'Entity-Destination(e2,e1)': 20})

In [99]:
if save_vocab_fname and verbose:
    writeout = {
        'tgt_vocab': {
            'itos': TGT.vocab.itos, 'stoi': TGT.vocab.stoi,
        },
        'input_vocab': {
            'itos': INPUT.vocab.itos, 'stoi': INPUT.vocab.stoi,
        },
    }
    fwrite(json.dumps(writeout, indent=4), save_vocab_fname)

In [100]:
if verbose:
    msg = "[Info] Finished building vocab: {} INPUT, {} TGT" \
        .format(len(INPUT.vocab), len(TGT.vocab))
    show_time(msg)

⏰ Time: 10112040-18	[Info] Finished building vocab: 24887 INPUT, 21 TGT


get_dataloader

In [126]:
proc_id=proc_id   #0
n_gpus=n_gpus  # 1
device=device  
batch_size=args.batch_size   #10

In [102]:
def _distribute_dataset(dataset):
    n = len(dataset)
    part = dataset[n * proc_id // n_gpus: n * (proc_id + 1) // n_gpus]
    return torchtext.data.Dataset(part, dataset.fields)

In [103]:
train_ds = _distribute_dataset(train_ds)

In [127]:
len(train_ds)

7200

In [105]:
train_iter, valid_iter = BucketIterator.splits(
        (train_ds, valid_ds),
        batch_sizes=(batch_size, batch_size),
        sort_within_batch=True,
        sort_key=lambda x: len(x.input),
        device=device
    )

In [128]:
item = next(iter(train_iter))
item


[torchtext.data.batch.Batch of size 10]
	[.tgt]:[torch.LongTensor of size 10x1]
	[.input]:[torch.LongTensor of size 10x19]
	[.show_inp]:['It is a controversial topic that provokes strong ENT_1_START arguments ENT_1_END for and against the ENT_2_START practice ENT_2_END .', "Whenever we try a new butcher we always buy the ENT_1_START butcher ENT_1_END 's ENT_2_START sausage ENT_2_END first .", 'The second ENT_1_START author ENT_1_END constructed the web ENT_2_START site ENT_2_END using a new open - source toolkit .', "Vietnam 's response on the ENT_1_START toll ENT_1_END caused by the ENT_2_START earthquake ENT_2_END in Sichuan , China .", 'Oh , so this was all about a ENT_1_START trash bag ENT_1_END with ENT_2_START money ENT_2_END in it .', 'The ENT_1_START assassination ENT_1_END resulted in extensive ENT_2_START arrests ENT_2_END of governmental , security , and criminal figures .', 'A superblack ENT_1_START material ENT_1_END made from microscopic carbon nanotubes is produced by a

In [129]:
item.tgt

tensor([[ 7],
        [12],
        [12],
        [ 4],
        [15],
        [13],
        [14],
        [ 7],
        [ 3],
        [ 9]])

In [190]:
item.input.shape

torch.Size([10, 18])

In [108]:
item.input

tensor([[    4, 16430,     3,   359,    19,     9,   349,   483,     8,     6,
            79,     5,     7],
        [   12,     4,  5988,     3,    16,  1322,    23,     2,  5988,     6,
           196,     5,     7],
        [   12,  9173,  6580,     4,  4836,     3,    15,    52,    17,     6,
          5721,     5,     7],
        [   12,   626,     4,  1046,     3,   246,    23,     9,   579,     6,
           706,     5,     7],
        [   12,     4, 18239,     3,    16,  1136,    19,     9, 18200,     6,
           625,     5,     7],
        [ 8163,    10,     4,   731,     3,    11,   995,   103,   601,     6,
          1654,     5,     7],
        [  855,    15,     9,     4, 18803,     3,     8,   571,   951,     6,
          3558,     5,     7],
        [   12,     4,   358,     3,    15,   568,    17,     2,     6,   301,
             5,     7,     1],
        [  407,     4,   667,     3, 24691,     2,   327,    19,     6, 20881,
             5,     7,     1],
        [ 

In [192]:
item.show_inp[0]

'One ENT_1_START examination paper ENT_1_END was in ENT_2_START physical geography ENT_2_END , the other in political geography .'

In [159]:
test_iter = BucketIterator(
            test_ds,
            batch_size=1,
            sort=False,
            sort_within_batch=False,
            device=device
        )

In [161]:
next(iter(test_iter))


[torchtext.data.batch.Batch of size 1]
	[.tgt]:[torch.LongTensor of size 1x1]
	[.input]:[torch.LongTensor of size 1x20]
	[.show_inp]:["Seasonal and nocturnal ENT_1_START migrations ENT_1_END cause sleep ENT_2_START deprivation ENT_2_END in diurnal Swainson 's thrush , Catharus ustulatus ."]

### 4.3 模型定义

In [139]:
from model import LSTMClassifier

In [171]:
model = LSTMClassifier(emb_vectors=dataset.INPUT.vocab.vectors,
                           emb_dropout=args.emb_dropout,
                           lstm_dim=args.lstm_dim,
                           lstm_n_layer=args.lstm_n_layer,
                           lstm_dropout=args.lstm_dropout,
                           lstm_combine=args.lstm_combine,
                           linear_dropout=args.linear_dropout,
                           n_linear=args.n_linear,
                           n_classes=len(dataset.TGT.vocab))    # vscde

In [173]:
model = model.to(device)

In [174]:
def model_setup(proc_id, model, args):
    def _count_parameters(model):
        return sum(
            p.numel() for p in model.parameters() if p.requires_grad)

    args.n_params = _count_parameters(model)

    if proc_id == 0:
        writeout = " ".join(sys.argv[1:]).replace(' -', ' \ \n-')
        writeout += '\n' * 3 + \
                    json.dumps(args.__dict__, indent=4, sort_keys=True)
        writeout += '\n' * 3 + repr(model)

        fwrite(writeout, args.save_meta_fname)

        print('[Info] Model has {} trainable parameters'.format(args.n_params))

    return args

In [175]:
args = model_setup(proc_id, model, args)

[Info] Model has 2652521 trainable parameters


In [176]:
model

LSTMClassifier(
  (embedding_layer): Embedding(24887, 100)
  (embedding_dropout): Dropout(p=0.3, inplace=False)
  (lstm): LSTM(100, 100, batch_first=True, bidirectional=True)
  (lstm_dropout): Dropout(p=0.3, inplace=False)
  (linear_layers): ModuleList()
  (linear_dropout): Dropout(p=0.5, inplace=False)
  (label): Linear(in_features=100, out_features=21, bias=True)
  (crit): CrossEntropyLoss()
)

### 4.4 模型训练

In [140]:
from tqdm import tqdm

from utils import shell, init_weights, set_seed

from get_args import setup, model_setup, clean_up
from dataloader import Dataset
from evaluate import Validator, Predictor

In [177]:
epochs=args.epochs
lr=args.lr
weight_decay=args.weight_decay

In [178]:
opt = torch.optim.Adadelta(
        filter(lambda p: p.requires_grad, model.parameters()), lr=1.0, rho=0.9,
        eps=1e-6, weight_decay=weight_decay)

In [179]:
model.train()

LSTMClassifier(
  (embedding_layer): Embedding(24887, 100)
  (embedding_dropout): Dropout(p=0.3, inplace=False)
  (lstm): LSTM(100, 100, batch_first=True, bidirectional=True)
  (lstm_dropout): Dropout(p=0.3, inplace=False)
  (linear_layers): ModuleList()
  (linear_dropout): Dropout(p=0.5, inplace=False)
  (label): Linear(in_features=100, out_features=21, bias=True)
  (crit): CrossEntropyLoss()
)

In [180]:
total_loss = 0
n_correct = 0
cnt = 0

In [181]:
pbar = tqdm(train_dl)

  0%|          | 0/720 [00:00<?, ?it/s]

In [184]:
batch = next(iter(pbar))

  0%|          | 0/720 [00:39<?, ?it/s]


In [185]:
batch.tgt

tensor([[ 7],
        [ 2],
        [14],
        [ 4],
        [ 8],
        [ 9],
        [ 2],
        [ 4],
        [ 3],
        [ 2]])

In [193]:
batch_size = len(batch.tgt)

In [202]:
loss, acc = model.loss_n_acc(batch.input, batch.tgt)   #vscode

In [203]:
total_loss += loss.item() * batch_size
cnt += batch_size
n_correct += acc

In [200]:
def clip_gradient(model, clip_value):
    params = list(filter(lambda p: p.grad is not None, model.parameters()))
    for p in params:
        p.grad.data.clamp_(-clip_value, clip_value)

In [204]:
opt.zero_grad()
loss.backward()
clip_gradient(model, 1)
opt.step()

### 4.5 模型评价
* 4.5.1 验证
* 4.5.2 最终测试

#### 4.5.1 验证

In [205]:
model.eval()

LSTMClassifier(
  (embedding_layer): Embedding(24887, 100)
  (embedding_dropout): Dropout(p=0.3, inplace=False)
  (lstm): LSTM(100, 100, batch_first=True, bidirectional=True)
  (lstm_dropout): Dropout(p=0.3, inplace=False)
  (linear_layers): ModuleList()
  (linear_dropout): Dropout(p=0.5, inplace=False)
  (label): Linear(in_features=100, out_features=21, bias=True)
  (crit): CrossEntropyLoss()
)

In [206]:
validator = Validator(dataloader=valid_dl, save_dir=args.save_dir,
                          save_log_fname=args.save_log_fname,
                          save_model_fname=args.save_model_fname,
                          valid_or_test='valid',
                          vocab_itos=dataset.INPUT.vocab.itos,
                          label_itos=dataset.TGT.vocab.itos)

In [209]:
validator.evaluate(model, 1)   # vscode

validator

In [225]:
dataloader=valid_dl
save_dir=args.save_dir
save_log_fname=args.save_log_fname
save_model_fname=args.save_model_fname
valid_or_test='valid'
vocab_itos=dataset.INPUT.vocab.itos
label_itos=dataset.TGT.vocab.itos
best_loss = float('inf')
best_epoch = 0
epoch = 0

In [211]:
error = 0
count = 0
n_correct = 0

In [212]:
batch = next(iter(dataloader))

In [213]:
batch


[torchtext.data.batch.Batch of size 10]
	[.tgt]:[torch.LongTensor of size 10x1]
	[.input]:[torch.LongTensor of size 10x11]
	[.show_inp]:['The ENT_1_START carbon ENT_1_END sank into the ENT_2_START emitter ENT_2_END .', 'The ENT_1_START headmaster ENT_1_END made the formal ENT_2_START announcement ENT_2_END .', 'ENT_1_START Prospectors ENT_1_END have arrived in midland ENT_2_START cars ENT_2_END .', 'Its ENT_1_START introduction ENT_1_END supplies useful historical ENT_2_START background ENT_2_END .', 'ENT_1_START Biodiesel ENT_1_END is derived from vegetable ENT_2_START oils ENT_2_END .', 'Sometimes ENT_1_START joy ENT_1_END comes from unexpected ENT_2_START places ENT_2_END .', 'The ENT_1_START train ENT_1_END started to its ENT_2_START destination ENT_2_END .', 'The ENT_1_START party ENT_1_END starts in an ENT_2_START hour ENT_2_END .', 'ENT_1_START Rainwater ENT_1_END falls into special ENT_2_START use ENT_2_END .', 'The ENT_1_START nurse ENT_1_END wields the ENT_2_START scalpel EN

In [214]:
loss, acc = model.loss_n_acc(batch.input, batch.tgt)

In [215]:
loss

tensor(3.0777, grad_fn=<NllLossBackward>)

In [216]:
acc

0

In [217]:
error += loss.item() * batch_size
count += batch_size
n_correct += acc

In [219]:
avg_loss = (error / count)
acc = (n_correct / count)

In [226]:
if (valid_or_test == 'valid') and (avg_loss < best_loss):
            best_loss = avg_loss
            best_epoch = epoch

            checkpoint = {
                'model': model.state_dict(),
                'model_opt': model.opts,
                'epoch': epoch,
            }
            torch.save(checkpoint, save_model_fname)

#### 4.5.2 最终测试

In [230]:
predictor = Predictor(args.save_vocab_fname)

In [234]:
predictor.use_pretrained_model(args.save_model_fname, device=device)

Predictor

In [235]:
args.save_model_fname

'tmp/model'

In [238]:
checkpoint = torch.load(args.save_model_fname)

In [239]:
checkpoint

{'model': OrderedDict([('att_w',
               tensor([[[ 2.1291],
                        [ 0.3747],
                        [ 0.3253],
                        [-1.5362],
                        [ 0.8928],
                        [ 0.1631],
                        [-0.1253],
                        [ 0.8173],
                        [-0.5952],
                        [-0.5248],
                        [ 0.5028],
                        [ 0.6665],
                        [ 0.4987],
                        [-0.1424],
                        [ 0.5954],
                        [ 0.0886],
                        [-0.7207],
                        [-1.4731],
                        [-0.6029],
                        [ 0.9682],
                        [ 0.6790],
                        [ 0.1617],
                        [ 0.8173],
                        [-0.2477],
                        [-0.0329],
                        [ 0.5171],
                        [ 2.5996],
                      

In [243]:
checkpoint.keys()

dict_keys(['model', 'model_opt', 'epoch'])

In [245]:
checkpoint['model'].keys()

odict_keys(['att_w', 'embedding_layer.weight', 'lstm.weight_ih_l0', 'lstm.weight_hh_l0', 'lstm.bias_ih_l0', 'lstm.bias_hh_l0', 'lstm.weight_ih_l0_reverse', 'lstm.weight_hh_l0_reverse', 'lstm.bias_ih_l0_reverse', 'lstm.bias_hh_l0_reverse', 'label.weight', 'label.bias'])

In [240]:
checkpoint['model_opt']

{'vocab_size': 24887,
 'emb_dim': 100,
 'emb_dropout': 0.3,
 'emb_vectors': tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [-0.0382, -0.2449,  0.7281,  ..., -0.1459,  0.8278,  0.2706],
         ...,
         [ 0.3643,  0.1154, -0.0702,  ..., -0.3755,  0.8278, -0.0084],
         [-0.1020,  0.7700,  0.1169,  ..., -0.1416, -0.1932, -0.4225],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]]),
 'lstm_dim': 100,
 'lstm_n_layer': 1,
 'lstm_dropout': 0.3,
 'lstm_combine': 'add',
 'n_linear': 1,
 'linear_dropout': 0.5,
 'n_classes': 21,
 'crit': CrossEntropyLoss()}

In [241]:
model = LSTMClassifier(**checkpoint['model_opt'])

In [242]:
model

LSTMClassifier(
  (embedding_layer): Embedding(24887, 100)
  (embedding_dropout): Dropout(p=0.3, inplace=False)
  (lstm): LSTM(100, 100, batch_first=True, bidirectional=True)
  (lstm_dropout): Dropout(p=0.3, inplace=False)
  (linear_layers): ModuleList()
  (linear_dropout): Dropout(p=0.5, inplace=False)
  (label): Linear(in_features=100, out_features=21, bias=True)
  (crit): CrossEntropyLoss()
)

In [246]:
model.load_state_dict(checkpoint['model'])

<All keys matched successfully>

In [247]:
model = model.to(device)
model.eval()

LSTMClassifier(
  (embedding_layer): Embedding(24887, 100)
  (embedding_dropout): Dropout(p=0.3, inplace=False)
  (lstm): LSTM(100, 100, batch_first=True, bidirectional=True)
  (lstm_dropout): Dropout(p=0.3, inplace=False)
  (linear_layers): ModuleList()
  (linear_dropout): Dropout(p=0.5, inplace=False)
  (label): Linear(in_features=100, out_features=21, bias=True)
  (crit): CrossEntropyLoss()
)

In [268]:
predictor.pred_sent(dataset.INPUT)   # vscode

test_sentence: 'The most common ENT_1_START audits ENT_1_END were about ENT_2_START waste ENT_2_END and recycling .'
test_label: 'Product-Producer(e2,e1)'
prediction: 'Message-Topic(e2,e1)'


# 5 代码梳理及细节回顾(在VScode中演示)

　　在VScode环境中的训练文件里再回顾训练流程。

# 6 作业
  
`【思考题】`思考这篇文章的模型有什么不足，有什么可以改进的地方，包括LSTM的部分以及attention的部分。

`【代码实践】`复现该文章的模型部分代码。

`【画图】`不看文章原图，按照自己的理解画出模型的结构图。

`【总结】`对这篇文章进行回顾，思考并学习文章写作总体结构，模型设计等部分，并对相关工作进行总结（涉及使用RNN的工作）

---