# Classifying Relations by Ranking with Convolutional Neural Networks  ACL 2015

# 1前言

### 1.1课程回顾

<img src='imgs/overall.png' width="800" height="800" align="bottom">

### 1.2 模型结构

<img src="./imgs/model.png"  width="300" height="300" align="bottom" />

### 1.3 代码结构

<img src="./imgs/dir.png"  width="300" height="300" align="bottom" />

# 2 准备工作
### 2.1项目环境配置

* Python3.8
* jupyter notebook
* torch            1.6.0+cu10.2
* numpy            1.18.5

代码运行环境建议使用Visual Studio Code(VScode)

# 3 项目代码结构（VScode中演示）

>1）是什么？

　　我们首先会在VScode环境中让代码跑一下，直观感受到项目的训练，并展示前向推断的输出，让大家看到模型的效果。
>2）怎么构成的？

　　然后介绍项目代码的构成，介绍项目有哪些文件夹，包含哪些文件，这些文件构成了什么功能模块如：数据预处理模块，模型设计模块，损失函数模块，推断与评估模块。
>3）小结

　　在主文件中在过一下启动训练的流程。

# 4 算法模块及细节（jupyter和VScode中演示）

　　在jupyter notebook中细致地讲解每一个模块。
  
　　以实现模块功能为目的，来讲解每个函数的执行流程，呈现中间数据，方便同学们理解学习。
  
　　内容分为以下几个模块：**超参数设置，数据读取与处理，模型定义，模型训练，模型评价**。

### 4.1 超参数设置

In [105]:
import numpy as np
import torch
import torch.nn as nn
import torch.utils.data as D
from torch.autograd import Variable
from sklearn.metrics import f1_score
import data_pro as pro
import pyt_CR_CNN as pa
import json

In [6]:
DW = 50
NDW = 100
NN = 123
NDP = 70
NNP = 123
NNR = 19
NDC = 1000
NKP = 0.5
NK = 3
NLR = 0.025
N_BATCH_SIZE = 80
epochs = 100

### 4.2 数据读取与处理

#### 4.2.1 读入数据

In [7]:
target_dict = json.load(open('./target_dict.txt', 'r', encoding='utf-8'))
c_target_dict = {value: key for key, value in target_dict.items()}
tr_target_dict = json.load(open('./tr_target_dict.txt', 'r', encoding='utf-8'))

In [36]:
c_target_dict

{0: 'Message-Topic(e1,e2)',
 1: 'Product-Producer(e2,e1)',
 2: 'Instrument-Agency(e2,e1)',
 3: 'Entity-Destination(e1,e2)',
 4: 'Cause-Effect(e2,e1)',
 5: 'Component-Whole(e1,e2)',
 6: 'Product-Producer(e1,e2)',
 7: 'Member-Collection(e2,e1)',
 8: 'Other',
 9: 'Entity-Origin(e1,e2)',
 10: 'Content-Container(e1,e2)',
 11: 'Entity-Origin(e2,e1)',
 12: 'Cause-Effect(e1,e2)',
 13: 'Component-Whole(e2,e1)',
 14: 'Content-Container(e2,e1)',
 15: 'Instrument-Agency(e1,e2)',
 16: 'Message-Topic(e2,e1)',
 17: 'Member-Collection(e1,e2)',
 18: 'Entity-Destination(e2,e1)'}

In [35]:
tr_target_dict   # 不考虑方向

{'Message-Topic(e1,e2)': 0,
 'Product-Producer(e2,e1)': 1,
 'Instrument-Agency(e2,e1)': 2,
 'Entity-Destination(e1,e2)': 3,
 'Cause-Effect(e2,e1)': 4,
 'Component-Whole(e1,e2)': 5,
 'Product-Producer(e1,e2)': 1,
 'Member-Collection(e2,e1)': 7,
 'Other': 8,
 'Entity-Origin(e1,e2)': 9,
 'Content-Container(e1,e2)': 6,
 'Entity-Origin(e2,e1)': 9,
 'Cause-Effect(e1,e2)': 4,
 'Component-Whole(e2,e1)': 5,
 'Content-Container(e2,e1)': 6,
 'Instrument-Agency(e1,e2)': 2,
 'Message-Topic(e2,e1)': 0,
 'Member-Collection(e1,e2)': 7,
 'Entity-Destination(e2,e1)': 3}

In [24]:
sentences = []
relations = []
e1_pos = []
e2_pos = []

In [10]:
file = './nine_train.txt'

In [19]:
f = open(file, 'r', encoding='utf-8', errors='ignore')

In [20]:
line = [line for line in f.readlines()][0]

In [21]:
line

'13 12 12 15 15 the system as described above has its greatest application in an arrayed configuration of antenna elements .\n'

In [22]:
line = line.strip().lower().split()
line

['13',
 '12',
 '12',
 '15',
 '15',
 'the',
 'system',
 'as',
 'described',
 'above',
 'has',
 'its',
 'greatest',
 'application',
 'in',
 'an',
 'arrayed',
 'configuration',
 'of',
 'antenna',
 'elements',
 '.']

In [25]:
relations.append(int(line[0]))
relations

[13]

In [26]:
e1_pos.append((int(line[1]), int(line[2])))  # (start_pos, end_pos)
e2_pos.append((int(line[3]), int(line[4])))  # (start_pos, end_pos)
e1_pos

[(12, 12)]

In [27]:
sentences.append(line[5:])
sentences

[['the',
  'system',
  'as',
  'described',
  'above',
  'has',
  'its',
  'greatest',
  'application',
  'in',
  'an',
  'arrayed',
  'configuration',
  'of',
  'antenna',
  'elements',
  '.']]

In [8]:
def load_data(file):
    sentences = []
    relations = []
    e1_pos = []
    e2_pos = []

    with open(file, 'r', encoding='utf-8', errors='ignore') as f:
        for line in f.readlines():
            line = line.strip().lower().split()
            relations.append(int(line[0]))
            e1_pos.append((int(line[1]), int(line[2])))  # (start_pos, end_pos)
            e2_pos.append((int(line[3]), int(line[4])))  # (start_pos, end_pos)
            sentences.append(line[5:])

    return sentences, relations, e1_pos, e2_pos

In [28]:
data = pro.load_data('./nine_train.txt')
t_data = pro.load_data('./nine_test.txt')

In [33]:
len(data[0])

8000

In [34]:
data[1]

[13,
 8,
 2,
 8,
 17,
 8,
 4,
 3,
 10,
 3,
 17,
 8,
 0,
 4,
 2,
 0,
 2,
 1,
 13,
 7,
 9,
 7,
 12,
 8,
 7,
 8,
 12,
 0,
 0,
 5,
 16,
 4,
 6,
 3,
 5,
 9,
 8,
 13,
 12,
 2,
 12,
 10,
 7,
 9,
 4,
 2,
 2,
 4,
 8,
 8,
 4,
 13,
 4,
 4,
 7,
 0,
 8,
 0,
 10,
 8,
 3,
 10,
 7,
 3,
 8,
 8,
 9,
 0,
 3,
 9,
 4,
 7,
 4,
 3,
 9,
 4,
 16,
 1,
 4,
 4,
 2,
 7,
 12,
 7,
 6,
 8,
 3,
 4,
 4,
 8,
 9,
 5,
 9,
 2,
 9,
 2,
 4,
 7,
 4,
 4,
 8,
 3,
 4,
 2,
 4,
 10,
 5,
 2,
 3,
 10,
 10,
 9,
 0,
 11,
 1,
 8,
 12,
 8,
 5,
 7,
 8,
 9,
 3,
 0,
 16,
 2,
 7,
 3,
 12,
 3,
 9,
 5,
 1,
 6,
 7,
 8,
 9,
 8,
 12,
 8,
 8,
 8,
 7,
 9,
 4,
 7,
 7,
 3,
 8,
 10,
 9,
 12,
 0,
 3,
 1,
 3,
 9,
 1,
 0,
 0,
 13,
 11,
 2,
 13,
 12,
 4,
 10,
 9,
 7,
 0,
 4,
 13,
 2,
 11,
 9,
 13,
 10,
 3,
 8,
 0,
 4,
 5,
 8,
 8,
 1,
 5,
 8,
 8,
 4,
 14,
 13,
 3,
 1,
 3,
 7,
 5,
 9,
 4,
 6,
 7,
 2,
 9,
 12,
 8,
 10,
 4,
 17,
 1,
 3,
 3,
 8,
 13,
 7,
 13,
 4,
 9,
 9,
 1,
 0,
 2,
 8,
 7,
 4,
 8,
 8,
 13,
 5,
 2,
 7,
 3,
 10,
 13,
 3,
 9,
 0,
 6,
 8,
 15,
 

#### 4.2.4 构建词表

In [40]:
from collections import Counter

In [41]:
word_count = Counter()

In [42]:
sentences = data[0]

In [43]:
for sent in sentences:
    for w in sent:
        word_count[w] += 1

In [44]:
word_count

Counter({'the': 13022,
         'system': 103,
         'as': 719,
         'described': 22,
         'above': 30,
         'has': 629,
         'its': 219,
         'greatest': 16,
         'application': 12,
         'in': 3081,
         'an': 780,
         'arrayed': 1,
         'configuration': 5,
         'of': 5492,
         'antenna': 10,
         'elements': 8,
         '.': 8015,
         'child': 30,
         'was': 1446,
         'carefully': 11,
         'wrapped': 9,
         'and': 3380,
         'bound': 5,
         'into': 939,
         'cradle': 7,
         'by': 1268,
         'means': 18,
         'a': 5377,
         'cord': 9,
         'author': 49,
         'keygen': 1,
         'uses': 52,
         'disassembler': 1,
         'to': 2595,
         'look': 23,
         'at': 469,
         'raw': 11,
         'assembly': 15,
         'code': 19,
         'misty': 1,
         'ridge': 1,
         'uprises': 3,
         'from': 1446,
         'surge': 2,
         'stud

In [45]:
ls = word_count.most_common()
ls

[('the', 13022),
 ('.', 8015),
 ('of', 5492),
 ('a', 5377),
 (',', 4758),
 ('and', 3380),
 ('in', 3081),
 ('to', 2595),
 ('is', 1647),
 ('was', 1446),
 ('from', 1446),
 ('by', 1268),
 ('with', 1254),
 ('on', 1016),
 ('that', 992),
 ('for', 941),
 ('into', 939),
 ("'s", 787),
 ('an', 780),
 ('as', 719),
 ('are', 693),
 ('this', 675),
 ('it', 630),
 ('has', 629),
 ('his', 485),
 ('have', 481),
 ('at', 469),
 ('caused', 423),
 ('or', 417),
 ('i', 373),
 ('been', 367),
 ('which', 365),
 ('he', 355),
 ('were', 350),
 ('their', 323),
 ('one', 299),
 ('(', 279),
 (')', 279),
 ('who', 263),
 ('had', 262),
 ('made', 252),
 ('they', 244),
 ('we', 239),
 ('out', 237),
 ('after', 234),
 ('my', 225),
 ('when', 221),
 ('its', 219),
 ('all', 215),
 ('other', 214),
 ('up', 214),
 ('but', 212),
 ('new', 204),
 ('about', 201),
 ('inside', 200),
 ('two', 199),
 ('be', 197),
 ('her', 196),
 ('first', 192),
 ('some', 186),
 ('also', 174),
 ('over', 170),
 ('not', 168),
 ('these', 159),
 ('people', 158),
 (

In [46]:
word_dict = {w[0]: index + 1 for (index, w) in enumerate(ls)}
word_dict

{'the': 1,
 '.': 2,
 'of': 3,
 'a': 4,
 ',': 5,
 'and': 6,
 'in': 7,
 'to': 8,
 'is': 9,
 'was': 10,
 'from': 11,
 'by': 12,
 'with': 13,
 'on': 14,
 'that': 15,
 'for': 16,
 'into': 17,
 "'s": 18,
 'an': 19,
 'as': 20,
 'are': 21,
 'this': 22,
 'it': 23,
 'has': 24,
 'his': 25,
 'have': 26,
 'at': 27,
 'caused': 28,
 'or': 29,
 'i': 30,
 'been': 31,
 'which': 32,
 'he': 33,
 'were': 34,
 'their': 35,
 'one': 36,
 '(': 37,
 ')': 38,
 'who': 39,
 'had': 40,
 'made': 41,
 'they': 42,
 'we': 43,
 'out': 44,
 'after': 45,
 'my': 46,
 'when': 47,
 'its': 48,
 'all': 49,
 'other': 50,
 'up': 51,
 'but': 52,
 'new': 53,
 'about': 54,
 'inside': 55,
 'two': 56,
 'be': 57,
 'her': 58,
 'first': 59,
 'some': 60,
 'also': 61,
 'over': 62,
 'not': 63,
 'these': 64,
 'people': 65,
 'water': 66,
 'more': 67,
 'through': 68,
 'so': 69,
 'used': 70,
 'time': 71,
 'most': 72,
 'many': 73,
 'like': 74,
 'years': 75,
 'there': 76,
 'book': 77,
 'put': 78,
 'them': 79,
 'than': 80,
 'you': 81,
 'our': 82,

In [37]:
def build_dict(sentences):
    word_count = Counter()
    for sent in sentences:
        for w in sent:
            word_count[w] += 1

    ls = word_count.most_common()
    word_dict = {w[0]: index + 1 for (index, w) in enumerate(ls)}
    # leave 0 to PAD
    return word_dict

In [38]:
word_dict = pro.build_dict(data[0])

#### 4.2.5 向量化

In [47]:
sentences, relations, e1_pos, e2_pos = data

In [50]:
e1_vec = []
e2_vec = []

In [51]:
num_data = len(sentences)
num_data

8000

In [53]:
sents_vec = np.zeros((num_data, NN), dtype=int)

In [54]:
dt = [(sent, pos1, pos2) for (sent, pos1, pos2)  in zip(sentences, e1_pos, e2_pos)]

In [55]:
sent, pos1, pos2 = dt[0]

In [56]:
sent

['the',
 'system',
 'as',
 'described',
 'above',
 'has',
 'its',
 'greatest',
 'application',
 'in',
 'an',
 'arrayed',
 'configuration',
 'of',
 'antenna',
 'elements',
 '.']

In [57]:
pos1

(12, 12)

In [58]:
pos2

(15, 15)

In [59]:
vec = [word_dict[w] if w in word_dict else 0 for w in sent]
vec

[1, 94, 20, 656, 436, 24, 48, 969, 1402, 7, 19, 9139, 3221, 3, 1690, 2105, 2]

In [60]:
sents_vec[0, :len(vec)] = vec

In [63]:
e1_vec.append(vec[pos1[1]])
e2_vec.append(vec[pos2[1]])

In [66]:
# compute relative distance
dist1 = []
dist2 = []

for sent, p1, p2 in zip(sents_vec, e1_pos, e2_pos):
    # current word position - last word position of e1 or e2
    dist1.append([pos(p1[1] - idx) for idx, _ in enumerate(sent)])
    dist2.append([pos(p2[1] - idx) for idx, _ in enumerate(sent)])


In [68]:
pos(e1_pos[0][1] - 0)

73

In [61]:
def vectorize(data, word_dict, max_len):
    sentences, relations, e1_pos, e2_pos = data

    # replace word with word-id
    # sents_vec = []
    e1_vec = []
    e2_vec = []

    num_data = len(sentences)
    sents_vec = np.zeros((num_data, max_len), dtype=int)

    logging.debug('data shape: (%d, %d)' % (num_data, max_len))

    for idx, (sent, pos1, pos2) in enumerate(zip(sentences, e1_pos, e2_pos)):
        vec = [word_dict[w] if w in word_dict else 0 for w in sent]
        sents_vec[idx, :len(vec)] = vec

        # last word of e1 and e2
        e1_vec.append(vec[pos1[1]])
        e2_vec.append(vec[pos2[1]])

    # compute relative distance
    dist1 = []
    dist2 = []

    for sent, p1, p2 in zip(sents_vec, e1_pos, e2_pos):
        # current word position - last word position of e1 or e2
        dist1.append([pos(p1[1] - idx) for idx, _ in enumerate(sent)])
        dist2.append([pos(p2[1] - idx) for idx, _ in enumerate(sent)])

    return sents_vec, relations, e1_vec, e2_vec, dist1, dist2


In [65]:
def pos(x):
    '''
    map the relative distance between [0, 123)
    '''
    if x < -60:
        return 0
    if x >= -60 and x <= 60:
        return x + 61
    if x > 60:
        return 122

In [62]:
x, y, e1, e2, dist1, dist2 = pro.vectorize(data, word_dict, NN)

In [69]:
y = np.array(y).astype(np.int64)

In [70]:
y

array([13,  8,  2, ...,  3,  8,  1])

In [71]:
np_cat = np.concatenate((x, np.array(dist1), np.array(dist2)), 1)

In [73]:
np_cat.shape

(8000, 369)

In [77]:
e_x, e_y, e_e1, e_e2, e_dist1, e_dist2 = pro.vectorize(t_data, word_dict, NN)

In [78]:
eval_cat = np.concatenate((e_x, np.array(e_dist1), np.array(e_dist2)), 1)

#### 4.2.6 导入glove

In [79]:
glove = '/home/niuhao/project/v2_ABSA_baseline/InterGCN-ABSA/glove.42B.300d.txt'

In [81]:
word_dict

{'the': 1,
 '.': 2,
 'of': 3,
 'a': 4,
 ',': 5,
 'and': 6,
 'in': 7,
 'to': 8,
 'is': 9,
 'was': 10,
 'from': 11,
 'by': 12,
 'with': 13,
 'on': 14,
 'that': 15,
 'for': 16,
 'into': 17,
 "'s": 18,
 'an': 19,
 'as': 20,
 'are': 21,
 'this': 22,
 'it': 23,
 'has': 24,
 'his': 25,
 'have': 26,
 'at': 27,
 'caused': 28,
 'or': 29,
 'i': 30,
 'been': 31,
 'which': 32,
 'he': 33,
 'were': 34,
 'their': 35,
 'one': 36,
 '(': 37,
 ')': 38,
 'who': 39,
 'had': 40,
 'made': 41,
 'they': 42,
 'we': 43,
 'out': 44,
 'after': 45,
 'my': 46,
 'when': 47,
 'its': 48,
 'all': 49,
 'other': 50,
 'up': 51,
 'but': 52,
 'new': 53,
 'about': 54,
 'inside': 55,
 'two': 56,
 'be': 57,
 'her': 58,
 'first': 59,
 'some': 60,
 'also': 61,
 'over': 62,
 'not': 63,
 'these': 64,
 'people': 65,
 'water': 66,
 'more': 67,
 'through': 68,
 'so': 69,
 'used': 70,
 'time': 71,
 'most': 72,
 'many': 73,
 'like': 74,
 'years': 75,
 'there': 76,
 'book': 77,
 'put': 78,
 'them': 79,
 'than': 80,
 'you': 81,
 'our': 82,

In [82]:
fin = open(glove, 'r', encoding='utf-8', newline='\n', errors='ignore')

In [84]:
num_words = len(word_dict) + 1
num_words

19216

In [85]:
dim = 300

In [86]:
embeddings = np.random.uniform(-0.01, 0.01, size=(num_words, dim))
embeddings.shape

(19216, 300)

In [87]:
pre_trained = 0

In [89]:
line = fin.readline()

In [92]:
tokens = line.rstrip().split()

In [95]:
tokens[0]

'the'

In [96]:
# if tokens[0] in word_dict:
embeddings[word_dict[tokens[0]]] = np.asarray(tokens[1:], dtype='float32')

In [97]:
embeddings[word_dict[tokens[0]]]

array([-2.08379999e-01, -1.49320006e-01, -1.75279994e-02, -2.84320004e-02,
       -6.01040013e-02, -2.64600009e-01, -4.14449978e+00,  6.29320025e-01,
        3.36719990e-01, -4.33950007e-01,  3.98990005e-01, -1.95730001e-01,
        1.39770001e-01, -2.15189997e-02,  3.78230006e-01, -5.52500010e-01,
       -1.12300001e-01, -8.14430043e-03,  2.90589988e-01,  6.68170005e-02,
        1.04649998e-01, -8.69430006e-02, -4.89830002e-02, -2.67569989e-01,
       -4.70380008e-01,  2.74690002e-01,  6.92450032e-02, -2.79670004e-02,
       -1.97190002e-01,  1.67490002e-02, -2.96810001e-01,  1.78379998e-01,
        5.83739989e-02, -2.48060003e-01,  8.58459994e-02,  3.50430012e-01,
        4.91570011e-02, -1.64309993e-01,  5.00119984e-01, -1.80529997e-01,
        3.14220011e-01,  1.06710002e-01,  3.18519995e-02,  7.42779970e-02,
        2.79560000e-01,  8.03169981e-02,  5.47799990e-02, -3.03490013e-01,
       -4.32150006e-01,  3.24169993e-01,  4.08560008e-01,  3.61919999e-01,
        1.34450004e-01, -

In [99]:
embeddings[0] = np.zeros(dim, dtype='float32')

In [80]:
def load_embedding(glove, word_dict):
    fin = open(glove, 'r', encoding='utf-8', newline='\n', errors='ignore')
    num_words = len(word_dict) + 1
    dim = 300
    embeddings = np.random.uniform(-0.01, 0.01, size=(num_words, dim))
    pre_trained = 0
    for line in fin:
        tokens = line.rstrip().split()
        if tokens[0] in word_dict:
            embeddings[word_dict[tokens[0]]] = np.asarray(tokens[1:], dtype='float32')
            pre_trained += 1
    embeddings[0] = np.zeros(dim, dtype='float32')

    return embeddings.astype('float32')

In [100]:
embedding = pro.load_embedding(glove, word_dict)

### 4.3 模型定义

In [106]:
class CR_CNN(nn.Module):
    # def __init__(self, max_len, vocab_size, embedding_size, pos_embed_size,
    #              pos_embed_num, slide_window, class_num,
    #              num_filters, keep_prob):
    def __init__(self, max_len, embedding, pos_embed_size,
                 pos_embed_num, slide_window, class_num,
                 num_filters, keep_prob):
        super(CR_CNN, self).__init__()
        # self.dw = embedding_size
        # self.vac_len = vocab_size
        self.dw = embedding.shape[1]
        self.vac_len = embedding.shape[0]
        self.dp = pos_embed_size
        self.d = self.dw + 2 * self.dp
        self.np = pos_embed_num
        self.nr = class_num
        self.dc = num_filters
        self.keep_prob = keep_prob
        self.k = slide_window
        self.p = (self.k - 1) // 2
        self.n = max_len
        self.x_embedding = nn.Embedding(self.vac_len, self.dw)
        self.x_embedding.weight = nn.Parameter(torch.from_numpy(embedding))
        self.d1_embedding = nn.Embedding(self.np, self.dp)
        self.d2_embedding = nn.Embedding(self.np, self.dp)
        self.init_r = np.sqrt(6 / (self.nr + self.dc))
        self.rel_weight = nn.Parameter(self.init_r * (torch.rand(self.dc, self.nr) - 0.5))
        self.dropout = nn.Dropout(self.keep_prob)
        self.conv = nn.Conv2d(1, self.dc, (self.k, self.d), (1, self.d), (self.p, 0), bias=True)  # renewed
        self.tanh = nn.Tanh()
        self.max_pool = nn.MaxPool2d((1, self.n), (1, self.dc))

    def concat_input(self, x, dist1, dist2, is_training=True):
        x_embed = self.x_embedding(x)  # (bz, n, dw)
        d1_embed = self.d1_embedding(dist1)
        d2_embed = self.d2_embedding(dist2)
        x_concat = torch.cat([x_embed, d1_embed, d2_embed], 2)
        if is_training:
            x_concat = self.dropout(x_concat)
        return x_concat

    def convolution(self, R):
        s = R.data.size()  # bz, n, d
        R = self.conv(R.view(s[0], 1, s[1], s[2]))  # bz, dc, n, 1
        rx = R.view(s[0], self.dc, s[1])
        rx = self.tanh(rx)  # added
        return rx  # bz, dc, n

    def max_pooling(self, rx, rel_weight):
        bz = rx.data.size()[0]
        max_rx = self.max_pool(rx.view(bz, 1, self.dc, self.n))  # (bz, dc)
        sc = torch.mm(max_rx.view(bz, self.dc), rel_weight)  # (bz, nr)
        return sc

    def forward(self, x, dist1, dist2, is_training=True):
        R = self.concat_input(x, dist1, dist2, is_training)
        R_star = self.convolution(R)
        sc = self.max_pooling(R_star, self.rel_weight)
        return sc

In [102]:
model = pa.CR_CNN(NN, embedding, NDP, NNP, NK, NNR, NDC, NKP).cuda()

In [107]:
optimizer = torch.optim.SGD(model.parameters(), lr=NLR, weight_decay=0.001)  # optimize all rnn parameters

In [108]:
class PairwiseRankingLoss(nn.Module):
    def __init__(self, nr, pos_margin=2.5, neg_margin=0.5, gamma=2):
        super(PairwiseRankingLoss, self).__init__()
        self.nr = nr
        self.pos_margin = pos_margin
        self.neg_margin = neg_margin
        self.gamma = gamma

    def forward(self, sc, in_y):
        pos_mask = one_hot(in_y, self.nr, 1000, 0)  # (bz, nr)
        neg_mask = one_hot(in_y, self.nr, 0, 1000)
        sc_neg = torch.max(sc - pos_mask, 1)[0]
        sc_pos = torch.max(sc - neg_mask, 1)[0]
        pos_ele = torch.mul((self.pos_margin - sc_pos), self.gamma)
        neg_ele = torch.mul((self.neg_margin + sc_neg), self.gamma)
        loss = torch.mean(torch.log1p(torch.exp(pos_ele)) + torch.log1p(torch.exp(neg_ele)))
        return loss

In [109]:
loss_func = pa.PairwiseRankingLoss(NNR)

### 4.4 模型训练

In [110]:
acc = 0
loss = 0

In [111]:
train = torch.from_numpy(np_cat.astype(np.int64))

In [112]:
train.shape

torch.Size([8000, 369])

In [113]:
y_tensor = torch.LongTensor(y)

In [114]:
train_datasets = D.TensorDataset(train, y_tensor)

In [115]:
train_dataloader = D.DataLoader(train_datasets, N_BATCH_SIZE, True, num_workers=2)

In [116]:
j = 0

In [120]:
b_x_cat, b_y = [(b_x_cat, b_y) for (b_x_cat, b_y) in train_dataloader][0]

In [122]:
b_x_cat.shape

torch.Size([80, 369])

In [123]:
b_y.shape

torch.Size([80])

In [125]:
list_x = np.split(b_x_cat.numpy(), [NN, NN + NNP], 1)
list_x

[array([[  491,  1559,  1524, ...,     0,     0,     0],
        [    1, 15243,    10, ...,     0,     0,     0],
        [ 9671,     9,     4, ...,     0,     0,     0],
        ...,
        [   43,    26,   608, ...,     0,     0,     0],
        [   99,  3162,  2086, ...,     0,     0,     0],
        [15193, 15194,   576, ...,     0,     0,     0]]),
 array([[71, 70, 69, ...,  0,  0,  0],
        [62, 61, 60, ...,  0,  0,  0],
        [61, 60, 59, ...,  0,  0,  0],
        ...,
        [65, 64, 63, ...,  0,  0,  0],
        [69, 68, 67, ...,  0,  0,  0],
        [65, 64, 63, ...,  0,  0,  0]]),
 array([[75, 74, 73, ...,  0,  0,  0],
        [67, 66, 65, ...,  0,  0,  0],
        [67, 66, 65, ...,  0,  0,  0],
        ...,
        [67, 66, 65, ...,  0,  0,  0],
        [72, 71, 70, ...,  0,  0,  0],
        [68, 67, 66, ...,  0,  0,  0]])]

In [128]:
list_x[1].shape

(80, 123)

In [131]:
bx = Variable(torch.from_numpy(list_x[0])).cuda()
bd1 = Variable(torch.from_numpy(list_x[1])).cuda()
bd2 = Variable(torch.from_numpy(list_x[2])).cuda()
target = Variable(b_y).cuda()

In [124]:
def data_unpack(cat_data, target):
    list_x = np.split(cat_data.numpy(), [NN, NN + NNP], 1)
    bx = Variable(torch.from_numpy(list_x[0])).cuda()
    bd1 = Variable(torch.from_numpy(list_x[1])).cuda()
    bd2 = Variable(torch.from_numpy(list_x[2])).cuda()
    target = Variable(target).cuda()
    return bx, bd1, bd2, target

In [132]:
bx, bd1, bd2, by = data_unpack(b_x_cat, b_y)

In [133]:
wo = model(bx, bd1, bd2)  #vscode

In [135]:
wo.shape

torch.Size([80, 19])

In [138]:
ay = list(by.cpu().data.numpy())
ay

[3,
 8,
 8,
 7,
 3,
 0,
 0,
 4,
 2,
 3,
 5,
 2,
 8,
 14,
 16,
 3,
 9,
 7,
 5,
 2,
 8,
 8,
 3,
 8,
 7,
 5,
 15,
 3,
 3,
 3,
 8,
 12,
 8,
 12,
 3,
 8,
 16,
 3,
 12,
 6,
 3,
 13,
 7,
 8,
 5,
 8,
 10,
 6,
 1,
 6,
 8,
 13,
 8,
 9,
 5,
 10,
 10,
 2,
 2,
 4,
 5,
 13,
 1,
 16,
 10,
 8,
 10,
 8,
 0,
 10,
 4,
 4,
 1,
 12,
 0,
 3,
 6,
 3,
 8,
 0]

In [139]:
c_y = [c_target_dict[item] for item in ay]

In [140]:
c_y

['Entity-Destination(e1,e2)',
 'Other',
 'Other',
 'Member-Collection(e2,e1)',
 'Entity-Destination(e1,e2)',
 'Message-Topic(e1,e2)',
 'Message-Topic(e1,e2)',
 'Cause-Effect(e2,e1)',
 'Instrument-Agency(e2,e1)',
 'Entity-Destination(e1,e2)',
 'Component-Whole(e1,e2)',
 'Instrument-Agency(e2,e1)',
 'Other',
 'Content-Container(e2,e1)',
 'Message-Topic(e2,e1)',
 'Entity-Destination(e1,e2)',
 'Entity-Origin(e1,e2)',
 'Member-Collection(e2,e1)',
 'Component-Whole(e1,e2)',
 'Instrument-Agency(e2,e1)',
 'Other',
 'Other',
 'Entity-Destination(e1,e2)',
 'Other',
 'Member-Collection(e2,e1)',
 'Component-Whole(e1,e2)',
 'Instrument-Agency(e1,e2)',
 'Entity-Destination(e1,e2)',
 'Entity-Destination(e1,e2)',
 'Entity-Destination(e1,e2)',
 'Other',
 'Cause-Effect(e1,e2)',
 'Other',
 'Cause-Effect(e1,e2)',
 'Entity-Destination(e1,e2)',
 'Other',
 'Message-Topic(e2,e1)',
 'Entity-Destination(e1,e2)',
 'Cause-Effect(e1,e2)',
 'Product-Producer(e1,e2)',
 'Entity-Destination(e1,e2)',
 'Component-Whole(

In [141]:
new_y = np.array([tr_target_dict[item] for item in c_y])
new_y    # 不考虑方向

array([3, 8, 8, 7, 3, 0, 0, 4, 2, 3, 5, 2, 8, 6, 0, 3, 9, 7, 5, 2, 8, 8,
       3, 8, 7, 5, 2, 3, 3, 3, 8, 4, 8, 4, 3, 8, 0, 3, 4, 1, 3, 5, 7, 8,
       5, 8, 6, 1, 1, 1, 8, 5, 8, 9, 5, 6, 6, 2, 2, 4, 5, 5, 1, 0, 6, 8,
       6, 8, 0, 6, 4, 4, 1, 4, 0, 3, 1, 3, 8, 0])

In [142]:
predict = torch.max(wo, 1)[1].long()
predict

tensor([14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14, 14, 14, 14, 14], device='cuda:0')

In [143]:
ap = list(predict.cpu().data.numpy())

In [144]:
c_target_dict

{0: 'Message-Topic(e1,e2)',
 1: 'Product-Producer(e2,e1)',
 2: 'Instrument-Agency(e2,e1)',
 3: 'Entity-Destination(e1,e2)',
 4: 'Cause-Effect(e2,e1)',
 5: 'Component-Whole(e1,e2)',
 6: 'Product-Producer(e1,e2)',
 7: 'Member-Collection(e2,e1)',
 8: 'Other',
 9: 'Entity-Origin(e1,e2)',
 10: 'Content-Container(e1,e2)',
 11: 'Entity-Origin(e2,e1)',
 12: 'Cause-Effect(e1,e2)',
 13: 'Component-Whole(e2,e1)',
 14: 'Content-Container(e2,e1)',
 15: 'Instrument-Agency(e1,e2)',
 16: 'Message-Topic(e2,e1)',
 17: 'Member-Collection(e1,e2)',
 18: 'Entity-Destination(e2,e1)'}

In [145]:
c_p = [c_target_dict[item] for item in ap]
c_p

['Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Container(e2,e1)',
 'Content-Cont

In [146]:
new_p = np.array([tr_target_dict[item] for item in c_p])

In [147]:
new_p

array([6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6])

In [148]:
f1 = f1_score(new_y, new_p, average='micro')
f1

0.0875

In [137]:
def prediction(sc, y):
    ay = list(y.cpu().data.numpy())
    c_y = [c_target_dict[item] for item in ay]
    new_y = np.array([tr_target_dict[item] for item in c_y])
    predict = torch.max(sc, 1)[1].long()
    ap = list(predict.cpu().data.numpy())
    c_p = [c_target_dict[item] for item in ap]
    new_p = np.array([tr_target_dict[item] for item in c_p])
    f1 = f1_score(new_y, new_p, average='micro')
    return f1 * 100

In [149]:
acc += prediction(wo, by)

In [150]:
acc

8.75

In [151]:
l = loss_func(wo, by)   #vscode

In [152]:
l

tensor(9.0330, device='cuda:0', grad_fn=<MeanBackward0>)

In [153]:
j += 1
optimizer.zero_grad()
l.backward()
optimizer.step()
loss += l

### 4.5 模型评价

In [154]:
eval = torch.from_numpy(eval_cat.astype(np.int64))

In [155]:
eval_acc = 0
ti = 0
y_tensor = torch.LongTensor(e_y)

In [156]:
eval_datasets = D.TensorDataset(eval, y_tensor)
eval_dataloader = D.DataLoader(eval_datasets, N_BATCH_SIZE, True, num_workers=2)

In [157]:
for (b_x_cat, b_y) in eval_dataloader:
    bx, bd1, bd2, by = data_unpack(b_x_cat, b_y)
    wo = model(bx, bd1, bd2, False)
    eval_acc += prediction(wo, by)
    ti += 1

In [158]:
eval_acc

568.0357142857143

In [160]:
i = 0

In [161]:
print('epoch:', i, 'f1:', acc / j, '%   loss:', loss.cpu().data.numpy() / j, 'test_f1:', eval_acc / ti, '%')

epoch: 0 f1: 8.75 %   loss: 9.032968521118164 test_f1: 16.706932773109244 %


# 5 代码梳理及细节回顾(在VScode中演示)

　　在VScode环境中的训练文件里再回顾训练流程。

# 6 作业
  
`【思考题】`思考这篇文章的模型在关系抽取任务上还有什么可以改进的地方。

`【代码实践】`复现该文章代码的CR_CNN和PairwiseRankingLoss部分。

`【画图】`不看文章原图，按照自己的理解画出模型整体的结构图。

`【总结】`对这篇文章进行回顾总结，思考并学习文章写作总体结构，学习实验设计等内容。

---