In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pickle
import numpy as np
import os
import json
import random
    
# load source words
source_words_path = os.path.join(os.getcwd(), 'source_words.pkl')
with open(source_words_path, 'rb') as f_source_words:
    source_words = pickle.load(f_source_words)
    
# load target words
target_words_path = os.path.join(os.getcwd(), 'target_words.pkl')
with open(target_words_path, 'rb') as f_target_words:
    target_words = pickle.load(f_target_words)
    
# load label words
label_words_path = os.path.join(os.getcwd(), 'label_words.pkl')
with open(label_words_path, 'rb') as f_label_words:
    label_words = pickle.load(f_label_words)

In [4]:
print(len(source_words))
print(len(target_words))
print(len(label_words))
print(source_words['<pad>'])
print(source_words['<sep>'])
print(source_words['<cls>'])
print(source_words['<unk>'])
print(target_words['<pad>'])

945
133
27
1
3
2
0
1


In [6]:
'''
编码器Encoder的实现
'''
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class Encoder(nn.Module):
    def __init__(self, input_dim, hid_dim, n_layers, n_heads, pf_dim, dropout, max_length=100):
        super(Encoder, self).__init__()
        self.tok_embedding = nn.Embedding(input_dim, hid_dim)
        self.pos_embedding = nn.Embedding(max_length, hid_dim)
        
        # 多层encoder
        self.layers = nn.ModuleList([EncoderLayer(hid_dim, n_heads, pf_dim, dropout) for _ in range(n_layers)])
        
        self.dropout = nn.Dropout(dropout)
        
        self.scale = torch.sqrt(torch.FloatTensor([hid_dim])).to(DEVICE)

    def forward(self, src, src_mask):
        #src:[batch_size, src_len]
        #src_mask:[batch_size, 1, 1, src_len]
        
        batch_size = src.shape[0]
        src_len = src.shape[1]
        #位置信息
        pos = torch.arange(0, src_len).unsqueeze(0).repeat(batch_size, 1).to(DEVICE)
        #token编码+位置编码
        src = self.dropout((self.tok_embedding(src) * self.scale) + self.pos_embedding(pos)) # [batch_size, src_len, hid_dim]
        
        for layer in self.layers:
            src = layer(src, src_mask) #[batch_size, src_len, hid_dim]
        
        return src
            
class EncoderLayer(nn.Module):
    def __init__(self, hid_dim, n_heads, pf_dim, dropout):
        super(EncoderLayer, self).__init__()
        self.self_attn_layer_norm = nn.LayerNorm(hid_dim)
        self.ff_layer_norm = nn.LayerNorm(hid_dim)
        self.self_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout)
        self.positionwise_feedforward = PositionwiseFeedforwardLayer(hid_dim, pf_dim, dropout)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, src, src_mask):
        # src:[batch_size, src_len, hid_dim]
        # src_mask:[batch_size, 1, 1, src_len]
        
        # 1.经过多头attetnion后，再经过add+norm
        # self-attention
        _src = self.self_attention(src, src, src, src_mask)
        
        src = self.self_attn_layer_norm(src + self.dropout(_src)) # [batch_size, src_len, hid_dim]
        
        # 2.经过一个前馈网络后，再经过add+norm
        _src = self.positionwise_feedforward(src)
        
        src = self.ff_layer_norm(src + self.dropout(_src)) # [batch_size, src_len, hid_dim]
        
        return src     

class MultiHeadAttentionLayer(nn.Module):
    def __init__(self, hid_dim, n_heads, dropout):
        super(MultiHeadAttentionLayer, self).__init__()
        assert hid_dim % n_heads == 0
        
        self.hid_dim = hid_dim
        self.n_heads = n_heads
        self.head_dim = hid_dim // n_heads
        
        self.fc_q = nn.Linear(hid_dim, hid_dim)
        self.fc_k = nn.Linear(hid_dim, hid_dim)
        self.fc_v = nn.Linear(hid_dim, hid_dim)
        
        self.fc_o = nn.Linear(hid_dim, hid_dim)
        
        self.dropout = nn.Dropout(dropout)
        
        self.scale = torch.sqrt(torch.FloatTensor([self.head_dim])).to(DEVICE)
        
    def forward(self, query, key, value, mask=None):
        batch_size = query.shape[0]
        
        # query:[batch_size, query_len, hid_dim]
        # key:[batch_size, query_len, hid_dim]
        # value:[batch_size, query_len, hid_dim]
        
        Q = self.fc_q(query)
        K = self.fc_k(key)
        V = self.fc_v(value)
        
        Q = Q.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3) # [batch_size, query_len, n_heads, head_dim]
        K = K.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
        V = V.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
        
        energy = torch.matmul(Q, K.permute(0, 1, 3, 2)) / self.scale # [batch_size, n_heads, query_len, key_len]
        
        if mask is not None:
            energy = energy.mask_fill(mask == 0, -1e10)
        
        attention = torch.softmax(energy, dim=-1) # [batch_size, n_heads, query_len, key_len]
        
        x = torch.matmul(self.dropout(attention), V) # [batch_size, n_heads, query_len, head_dim]
        
        x = x.permute(0, 2, 1, 3).contiguous() # [batch_size, query_len, n_heads, head_dim]
        
        x = x.view(batch_size, -1, self.hid_dim) # [batch_size, query_len, hid_dim]
        
        x = self.fc_o(x) # [batch_size, query_len, hid_dim]
        
        return x
        
class PositionwiseFeedforwardLayer(nn.Module):
    def __init__(self, hid_dim, pf_dim, dropout):
        super(PositionwiseFeedforwardLayer, self).__init__()
        self.fc_1 = nn.Linear(hid_dim, pf_dim)
        self.fc_2 = nn.Linear(pf_dim, hid_dim)
        self.dropout = nn.Dropout(dropout)
        self.gelu = nn.GELU()
        
    def forward(self, x):
        # x:[batch_size, seq_len, hid_dim]
        
        x = self.dropout(self.gelu(self.fc_1(x))) # [batch_size, seq_len, pf_dim]
        x = self.fc_2(x) # [batch_size, seq_len, hid_dim]
        
        return x

class BERT(nn.Module):
    def __init__(self, input_dim, hid_dim, n_layers, n_heads, pf_dim, dropout, slot_size, intent_size, src_pad_idx):
        super(BERT, self).__init__()
        self.src_pad_idx = src_pad_idx
        self.encoder = Encoder(input_dim, hid_dim, n_layers, n_heads, pf_dim, dropout)
        self.gelu = nn.GELU()
        
        self.fc = nn.Sequential(nn.Linear(hid_dim, hid_dim), nn.Dropout(dropout), nn.Tanh())
        self.intent_out = nn.Linear(hid_dim, intent_size)
        self.linear = nn.Linear(hid_dim, hid_dim)
       
        embed_weight = self.encoder.tok_embedding.weight
        self.slot_out = nn.Linear(hid_dim, slot_size, bias=False)
        self.slot_out.weight = embed_weight
    
    def make_src_mask(self, src):
        # src: [batch_size, src_len]
        src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2) # [batch_size, 1, 1, src_len]
        
    def forward(self, src):
        src_mask = self.make_src_mask(src)
        encoder_out = self.encoder(src, src_mask) #[batch_size, src_len, hid_dim]
        
        # 拿到[cls] token进行意图分类
        cls_hidden = self.fc(encoder_out[:, 0]) # [batch_size, hid_dim]
        intent_output = self.intent_out(cls_hidden) # [batch_size, intent_size]
        
        # 排除cls进行slot预测
        other_hidden = self.gelu(self.linear(encoder_out[:,1:])) # [batch_sze, src_len-1, hid_dim]
        slot_output = self.slot_out(other_hidden) # [batch_size, src_len-1, slot_size]
        return intent_output, slot_output

In [7]:
n_layers = 6 # transformer-encoder层数
n_heads = 12 # 多头self-attention
hid_dim =768 
dropout = 0.5
pf_dim = 768 * 4 

model_path = os.path.join(os.getcwd(), "model.h5")

input_dim = len(source_words) # source 词典大小（即词数量）
slot_size = len(target_words) # target 词典大小（即实体类型数量）
intent_size = len(label_words) # label 词典大小（即意图类别数量）

src_pad_idx = source_words['<pad>']

model = BERT(input_dim, hid_dim, n_layers, n_heads, pf_dim, dropout, slot_size, intent_size, src_pad_idx).to(DEVICE)

model.load_state_dict(torch.load(model_path))


<All keys matched successfully>

In [11]:
sentence = "i would like to find a flight from charlotte to las vegas that makes a stop in st. louis"
sentence2 = "which airlines have first class flights today"
sentence3 = 'on april first i need a ticket from tacoma to san jose departing before 7 am'
sentence4 = 'what are the departure times from detroit to westchester county'
sentence5 = 'please find a flight round trip from los angeles to tacoma washington with a stopover in san francisco not exceeding the price of 300 dollars for june tenth 1993'
sentence6 = 'show me flight us 1207 from indianapolis to charlotte on monday and flight us 1500 from charlotte to minneapolis on monday and flight twa 639 from minneapolis to indianapolis'
sentence7 = 'i need to fly from denver to westchester county on june seventh after 3 pm'
sentence8 = 'what meals are served on american flight 811 from tampa to milwaukee'
sentence9 = 'meals are served on american flight 665 673 from milwaukee to seattle'

model.eval()
with torch.no_grad():
    tokenized = sentence.split()  # tokenize the sentence
    tokenized = ['<cls>'] + tokenized + ['<sep>']
    indexed = [source_words[t] for t in tokenized]  # convert to integer sequence
    #pad = [1]*(seq_len - len(indexed))
    #indexed.extend(pad)
    print(tokenized)
    print(indexed)
    print('len source :{}'.format(len(indexed)))
    src_tensor = torch.LongTensor(indexed)  # convert to tensor
    src_tensor = src_tensor.unsqueeze(0).to(DEVICE)  # reshape in form of batch,no. of words
    
    intent_output, slot_output = model(src_tensor)  # prediction
    
    intent_output = intent_output.squeeze()
    intent_output = intent_output.argmax()
    intent = intent_output.detach().item()
    
    pred_token = slot_output.squeeze().argmax(1)
    
    slot_prediction = [target_words.itos[t.item()] for t in pred_token]
    
    print('slot_prediciton:{}'.format(' '.join(slot_prediction)))
    print('intent_prediction:{}'.format(label_words.itos[intent]))

['<cls>', 'i', 'would', 'like', 'to', 'find', 'a', 'flight', 'from', 'charlotte', 'to', 'las', 'vegas', 'that', 'makes', 'a', 'stop', 'in', 'st.', 'louis', '<sep>']
[2, 13, 40, 29, 4, 87, 16, 11, 5, 100, 4, 90, 89, 34, 345, 16, 127, 18, 67, 144, 3]
len source :21
slot_prediciton:o o o o o o o o o o o o o o o o o o o o
intent_prediction:flight


In [None]:
O O O B-airline_code B-flight_number O B-fromloc.city_name O B-toloc.city_name O B-depart_date.day_name O O O B-flight_number O B-fromloc.city_name O B-toloc.city_name O B-depart_date.day_name O O B-airline_code B-flight_number O B-fromloc.city_name O B-toloc.city_name