In [1]:
from data import *
import matplotlib.pyplot as plt
from torch.autograd import Variable
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import random
import numpy as np
from model import Encoder,Decoder

# Load Training Data:

In [2]:
train_raw = load_data("dataset/atis-2.train.w-intent.iob")

In [3]:
LENGTH=30
train_processed, word2index, slot2index, intent2index = process_data(train_raw, LENGTH)
index2slot = {v:k for k,v in slot2index.items()}
index2intent = {v:k for k,v in intent2index.items()}
index2word = {v:k for k,v in word2index.items()}

# Training

In [439]:
LEARNING_RATE=0.001
EMBEDDING_SIZE=64
HIDDEN_SIZE=32
BATCH_SIZE=16
EPOCHS=10
DROPOUT_P=0.1

In [440]:
encoder = Encoder(len(word2index),EMBEDDING_SIZE,HIDDEN_SIZE)
decoder = Decoder(len(slot2index),len(intent2index),len(slot2index)//3,HIDDEN_SIZE*2, dropout_p=DROPOUT_P)

encoder.init_weights()
decoder.init_weights()

loss_function_1 = nn.CrossEntropyLoss(ignore_index=0)
loss_function_2 = nn.CrossEntropyLoss()
enc_optim= optim.Adam(encoder.parameters(), lr=LEARNING_RATE)
dec_optim = optim.Adam(decoder.parameters(),lr=LEARNING_RATE)

In [442]:
intent_acc = []
slot_acc = []
for epoch in range(EPOCHS):
    losses=[]
    intent_truly_labeled = 0
    intent_mislabeled = 0
    slot_truly_labeled = 0
    slot_mislabeled = 0
    for i, batch in enumerate(getBatch(BATCH_SIZE,train_processed)):
        x,y_1,y_2 = zip(*batch)
        x = torch.cat(x)
        slot_target = torch.cat(y_1)
        intent_target = torch.cat(y_2)
        x_mask = torch.cat([Variable(torch.BoolTensor(tuple(map(lambda s: s ==0, t.data)))) for t in x]).view(BATCH_SIZE,-1)
        y_1_mask = torch.cat([Variable(torch.BoolTensor(tuple(map(lambda s: s ==0, t.data)))) for t in slot_target]).view(BATCH_SIZE,-1)
 
        encoder.zero_grad()
        decoder.zero_grad()

        output, hidden_c = encoder(x,x_mask) # hidden_c : last hidden state of encoder to start decoder
        
        start_decode = Variable(torch.LongTensor([[word2index['<SOS>']]*BATCH_SIZE])).transpose(1,0)
        # start_decode.shape = torch.Size([16, 1]) ==> start_decode = [2, 2, 2, 2, ..., 2] (word2index['<SOS>'] = 2)
        slot_score, intent_score = decoder(start_decode,hidden_c,output,x_mask)

        #print(slot_target.size()) ===> torch.Size([16, 50])
        #print(slot_score.size()) ===> torch.Size([800, 122]) (800 = B*T = 16*50)
        #print(intent_score.size()) ===> torch.Size([16, 22])

        #print(intent_score)
        _,intent_predicted = torch.max(intent_score,1)
        
        intent_truly_labeled += sum(intent_target == intent_predicted).item()
        intent_mislabeled += sum(intent_target != intent_predicted).item()
        
                 
        _,slot_predicted = torch.max(slot_score,1)

        #print(slot_target.size())
        true = 0
        false = 0
        for j in range(len(slot_target.view(-1))):
            if slot_target.view(-1)[j] != 0:
                if slot_target.view(-1)[j] == slot_predicted[j].item():
                    true += 1
                else:
                    false += 1

        slot_truly_labeled += true
        slot_mislabeled += false
        
        
        loss_1 = loss_function_1(slot_score,slot_target.view(-1))
        loss_2 = loss_function_2(intent_score,intent_target)

        loss = loss_1+loss_2
        losses.append(loss.data.numpy())
        loss.backward()

        torch.nn.utils.clip_grad_norm(encoder.parameters(), 5.0)
        torch.nn.utils.clip_grad_norm(decoder.parameters(), 5.0)

        enc_optim.step()
        dec_optim.step()

        if i % 100==0:
            print("EPOCH",epoch," batch",i," : ",np.mean(losses))
            
            losses=[]
                 
    intent_acc.append(intent_truly_labeled / (intent_truly_labeled + intent_mislabeled))
    slot_acc.append(slot_truly_labeled / (slot_truly_labeled + slot_mislabeled))



EPOCH 0  batch 0  :  7.9664974
EPOCH 0  batch 100  :  4.1804395
EPOCH 0  batch 200  :  2.634928
EPOCH 1  batch 0  :  2.244953
EPOCH 1  batch 100  :  2.1919923
EPOCH 1  batch 200  :  1.9852215
EPOCH 2  batch 0  :  2.1585941
EPOCH 2  batch 100  :  1.7021719
EPOCH 2  batch 200  :  1.4902431
EPOCH 3  batch 0  :  0.89181226
EPOCH 3  batch 100  :  1.0672523
EPOCH 3  batch 200  :  0.998114
EPOCH 4  batch 0  :  0.8869954
EPOCH 4  batch 100  :  0.7939249
EPOCH 4  batch 200  :  0.8265474
EPOCH 5  batch 0  :  1.2666262
EPOCH 5  batch 100  :  0.67856055
EPOCH 5  batch 200  :  0.6208588
EPOCH 6  batch 0  :  0.68808115
EPOCH 6  batch 100  :  0.5541038
EPOCH 6  batch 200  :  0.48256585
EPOCH 7  batch 0  :  0.15562415
EPOCH 7  batch 100  :  0.46624243
EPOCH 7  batch 200  :  0.41408074
EPOCH 8  batch 0  :  0.3710662
EPOCH 8  batch 100  :  0.37416816
EPOCH 8  batch 200  :  0.37432072
EPOCH 9  batch 0  :  0.40323794
EPOCH 9  batch 100  :  0.30311328
EPOCH 9  batch 200  :  0.3096535


In [443]:
intent_acc, slot_acc

([0.7155017921146953,
  0.8362455197132617,
  0.8891129032258065,
  0.9023297491039427,
  0.9222670250896058,
  0.9310035842293907,
  0.9372759856630825,
  0.9498207885304659,
  0.9614695340501792,
  0.9670698924731183],
 [0.6203436398749677,
  0.6601539961401485,
  0.7700356055934597,
  0.8478100809901896,
  0.884009233095873,
  0.9107050439906047,
  0.9297620942075111,
  0.9421313334261672,
  0.9499492729117349,
  0.9583416225679385])

In [None]:
train_intent_acc = intent_acc
train_slot_acc = slot_acc

Plot accuracy vs epoch graph:

In [None]:
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.plot(train_intent_acc, label='train_intent_acc')
plt.plot(train_slot_acc, label='train_slot_acc')
plt.legend()

Model's Performance on the training dataset:

In [470]:
index = random.choice(range(len(train_processed)))

sample = train_raw[index][0]
train_in = prepare_sequence(sample,word2index)

train_mask = Variable(torch.BoolTensor(tuple(map(lambda s: s ==0, train_in.data)))).view(1,-1)
start_decode = Variable(torch.LongTensor([[word2index['<SOS>']]*1])).transpose(1,0)

output, hidden_c = encoder(train_in.unsqueeze(0),train_mask.unsqueeze(0))

slot_score, intent_score = decoder(start_decode,hidden_c,output,train_mask)

v,i = torch.max(slot_score,1)

print("Input Sentence:   ",*train_raw[index][0])
print()
print("Slot Targets:   ",*train_raw[index][1])
print("Slot Predictions:   ",*list(map(lambda ii:index2slot[ii],i.data.tolist())))

print()
v,i = torch.max(intent_score,1)
print("Intent Targets:   ",train_raw[index][2])
print("Intent Predictions:   ",index2intent[i.data.tolist()[0]])

Input Sentence:    which airlines fly from boston to washington dc via other cities

Slot Targets:    O O O O B-fromloc.city_name O B-toloc.city_name B-toloc.state_code O O O
Slot Predictions:    O O O O B-fromloc.city_name O B-toloc.city_name B-toloc.state_code O O O

Intent Targets:    atis_airline
Intent Predictions:    atis_airline


Save Model's Parameters:

In [447]:
torch.save(decoder.state_dict(),'model/jointnlu-decoder.pkl')
torch.save(encoder.state_dict(),'model/jointnlu-encoder.pkl')

Load model's parameters:

In [448]:
encoder2 = Encoder(len(word2index),EMBEDDING_SIZE,HIDDEN_SIZE)
decoder2 = Decoder(len(slot2index),len(intent2index),len(slot2index)//3,HIDDEN_SIZE*2)

encoder2.load_state_dict(torch.load('model/jointnlu-encoder.pkl'))
decoder2.load_state_dict(torch.load('model/jointnlu-decoder.pkl'))

<All keys matched successfully>

# Load Testing Data:

In [450]:
test_raw = load_data("dataset/atis-2.dev.w-intent.iob")
test_processed = test_process(test_raw, word2index, slot2index, intent2index, LENGTH)

In [451]:
intent_truly_labeled = 0
intent_mislabeled = 0
slot_truly_labeled = 0
slot_mislabeled = 0

for i, batch in enumerate(getBatch(BATCH_SIZE,test_processed, Shuffle = False)):
    x,y_1,y_2 = zip(*batch)
    x = torch.cat(x)
    slot_target = torch.cat(y_1)
    intent_target = torch.cat(y_2)
    x_mask = torch.cat([Variable(torch.BoolTensor(tuple(map(lambda s: s ==0, t.data)))) for t in x]).view(BATCH_SIZE,-1)
    y_1_mask = torch.cat([Variable(torch.BoolTensor(tuple(map(lambda s: s ==0, t.data)))) for t in slot_target]).view(BATCH_SIZE,-1)
    
    start_decode = Variable(torch.LongTensor([[word2index['<SOS>']]*BATCH_SIZE])).transpose(1,0)
    
    output, hidden_c = encoder2(x,x_mask)

    slot_score, intent_score = decoder2(start_decode,hidden_c,output,x_mask)
    
    _,intent_predicted = torch.max(intent_score,1)
    
    intent_truly_labeled += sum(intent_target == intent_predicted).item()
    intent_mislabeled += sum(intent_target != intent_predicted).item()
    
    _,slot_predicted = torch.max(slot_score,1)

    #print(slot_target.size())
    true = 0
    false = 0
    for j in range(len(slot_target.view(-1))):
        if slot_target.view(-1)[j] != 0:
            if slot_target.view(-1)[j] == slot_predicted[j].item():
                true += 1
            else:
                false += 1

    slot_truly_labeled += true
    slot_mislabeled += false

    
    #print(slot_predicted)

    #print(slot_target.view(-1))
    #print()
        
intent_acc = intent_truly_labeled / (intent_truly_labeled + intent_mislabeled)
slot_acc = slot_truly_labeled / (slot_truly_labeled + slot_mislabeled)

In [452]:
intent_acc, slot_acc

(0.9415322580645161, 0.9514958399716764)

In [53]:
test_intent_acc = intent_acc
test_slot_acc  = slot_acc 

Model's Performance on the training dataset:

In [473]:
index = random.choice(range(len(test_processed)))

#index = 0
sample = test_raw[index][0]
test_in = prepare_sequence(sample,word2index)

test_mask = Variable(torch.BoolTensor(tuple(map(lambda s: s ==0, test_in.data)))).view(1,-1)
start_decode = Variable(torch.LongTensor([[word2index['<SOS>']]*1])).transpose(1,0)

output, hidden_c = encoder2(test_in.unsqueeze(0),test_mask.unsqueeze(0))

slot_score, intent_score = decoder2(start_decode,hidden_c,output,test_mask)

v,i = torch.max(slot_score,1)

print("Input Sentence:   ",*train_raw[index][0])
print()
print("Slot Targets:   ",*train_raw[index][1])
print("Slot Predictions:   ",*list(map(lambda ii:index2slot[ii],i.data.tolist())))

print()
v,i = torch.max(intent_score,1)
print("Intent Targets:   ",train_raw[index][2])
print("Intent Predictions:   ",index2intent[i.data.tolist()[0]])

Input Sentence:    what is the earliest flight arriving in charlotte from st. louis on friday

Slot Targets:    O O O B-flight_mod O O O B-toloc.city_name O B-fromloc.city_name I-fromloc.city_name O B-arrive_date.day_name
Slot Predictions:    O O O B-flight_mod O O B-fromloc.city_name O B-toloc.city_name B-toloc.state_name O B-toloc.city_name

Intent Targets:    atis_flight
Intent Predictions:    atis_flight
