# Intent Classification and Slot Filling

## Cloning the repository

In [1]:
!git clone https://github.com/DSKSD/RNN-for-Joint-NLU.git

Cloning into 'RNN-for-Joint-NLU'...
remote: Enumerating objects: 57, done.[K
remote: Total 57 (delta 0), reused 0 (delta 0), pack-reused 57[K
Unpacking objects: 100% (57/57), done.


In [2]:
import json
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
import random
import numpy as np
from collections import Counter
import pickle
%matplotlib inline

In [3]:
%cd /content/RNN-for-Joint-NLU

/content/RNN-for-Joint-NLU


In [4]:
!pwd

/content/RNN-for-Joint-NLU


## Downloading the dataset

Instructions:
1. Download the data (*atis-2.train.w-intent.iob* and *atis-2.dev.w-intent.iob*) from [this repo](https://github.com/yvchen/JointSLU/tree/master/data) or from the repo.
2. Move data (*atis-2.train.w-intent.iob* and *atis-2.dev.w-intent.iob*) to the cloned repository. Such as -> `/content/RNN-for-Joint-NLU/atis-2.train.w-intent.iob`and `/content/RNN-for-Joint-NLU/atis-2.dev.w-intent.iob`.

Note:
1. "The `IOB` format (short for inside, outside, beginning) is a common tagging format for tagging tokens in a chunking task in computational linguistics (ex. named-entity recognition)." Source: [Wikipedia](https://en.wikipedia.org/wiki/Inside%E2%80%93outside%E2%80%93beginning_(tagging\)#:~:text=The%20IOB%20format%20(short%20for,named%2Dentity%20recognition).))

## Pre-processing and encodings

In [5]:
USE_CUDA = torch.cuda.is_available()
USE_CUDA

True

In [6]:
def prepare_sequence(seq, to_ix):
    idxs = list(map(lambda w: to_ix[w] if w in to_ix.keys() else to_ix["<UNK>"], seq))
    tensor = Variable(torch.LongTensor(idxs)).cuda() if USE_CUDA else Variable(torch.LongTensor(idxs))
    return tensor


flatten = lambda l: [item for sublist in l for item in sublist]

In [33]:
train = open("/content/RNN-for-Joint-NLU/atis-2.train.w-intent.iob","r").readlines()
train = [t[:-1] for t in train]
train = [[t.split("\t")[0].split(" "),t.split("\t")[1].split(" ")[:-1],t.split("\t")[1].split(" ")[-1]] for t in train]
train = [[t[0][1:-1],t[1][1:],t[2]] for t in train]
train_copy = train

In [10]:
seq_in,seq_out, intent = list(zip(*train))
vocab = set(flatten(seq_in))
slot_tag = set(flatten(seq_out))
intent_tag = set(intent)
LENGTH=50
sin=[]
sout=[]

In [11]:
for i in range(len(seq_in)):
    temp = seq_in[i]
    if len(temp)<LENGTH:
        temp.append('<EOS>')
        while len(temp)<LENGTH:
            temp.append('<PAD>')
    else:
        temp = temp[:LENGTH]
        temp[-1]='<EOS>'
    sin.append(temp)
    
    temp = seq_out[i]
    if len(temp)<LENGTH:
        while len(temp)<LENGTH:
            temp.append('<PAD>')
    else:
        temp = temp[:LENGTH]
        temp[-1]='<EOS>'
    sout.append(temp)
word2index = {'<PAD>': 0, '<UNK>':1,'<SOS>':2,'<EOS>':3}
for token in vocab:
    if token not in word2index.keys():
        word2index[token]=len(word2index)

index2word = {v:k for k,v in word2index.items()}

tag2index = {'<PAD>' : 0}
for tag in slot_tag:
    if tag not in tag2index.keys():
        tag2index[tag] = len(tag2index)
index2tag = {v:k for k,v in tag2index.items()}

intent2index={}
for ii in intent_tag:
    if ii not in intent2index.keys():
        intent2index[ii] = len(intent2index)
index2intent = {v:k for k,v in intent2index.items()}
train = list(zip(sin,sout,intent))
train[0][2]

'atis_flight'

In [12]:
train_data=[]

for tr in train:
    
    temp = prepare_sequence(tr[0],word2index)
    temp = temp.view(1,-1)
    
    temp2 = prepare_sequence(tr[1],tag2index)
    temp2 = temp2.view(1,-1)
    
    temp3 = Variable(torch.LongTensor([intent2index[tr[2]]])).cuda() if USE_CUDA else Variable(torch.LongTensor([intent2index[tr[2]]]))
    
    train_data.append((temp,temp2,temp3))
def getBatch(batch_size,train_data):
    random.shuffle(train_data)
    sindex=0
    eindex=batch_size
    while eindex < len(train_data):
        batch = train_data[sindex:eindex]
        temp = eindex
        eindex = eindex+batch_size
        sindex = temp
        
        yield batch

## Black-box model (RNN with attention)

In [13]:
class Encoder(nn.Module):
    def __init__(self, input_size,embedding_size, hidden_size,batch_size=16 ,n_layers=1):
        super(Encoder, self).__init__()
        
        self.input_size = input_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.batch_size=batch_size
        
        self.embedding = nn.Embedding(input_size, embedding_size)
        self.lstm = nn.LSTM(embedding_size, hidden_size, n_layers, batch_first=True,bidirectional=True)
    
    def init_weights(self):
        self.embedding.weight.data.uniform_(-0.1, 0.1)
    
    def init_hidden(self,input):
        hidden = Variable(torch.zeros(self.n_layers*2, input.size(0), self.hidden_size)).cuda() if USE_CUDA else Variable(torch.zeros(self.n_layers*2, input.size(0), self.hidden_size))
        context = Variable(torch.zeros(self.n_layers*2, input.size(0), self.hidden_size)).cuda() if USE_CUDA else Variable(torch.zeros(self.n_layers*2, input.size(0), self.hidden_size))
        return (hidden,context)
     
    def forward(self, input,input_masking):
        self.hidden = self.init_hidden(input)
        
        embedded = self.embedding(input)
        output, self.hidden = self.lstm(embedded, self.hidden)
        
        real_context=[]
        
        for i,o in enumerate(output): 
            real_length = input_masking[i].data.tolist().count(0) 
            real_context.append(o[real_length-1])
            
        return output, torch.cat(real_context).view(input.size(0),-1).unsqueeze(1)
class Decoder(nn.Module):
    
    def __init__(self,slot_size,intent_size,embedding_size,hidden_size,batch_size=16,n_layers=1,dropout_p=0.1):
        super(Decoder, self).__init__()
        
        self.hidden_size = hidden_size
        self.slot_size = slot_size
        self.intent_size = intent_size
        self.n_layers = n_layers
        self.dropout_p = dropout_p
        self.embedding_size = embedding_size
        self.batch_size = batch_size
        self.embedding = nn.Embedding(self.slot_size, self.embedding_size)
        self.lstm = nn.LSTM(self.embedding_size+self.hidden_size*2, self.hidden_size, self.n_layers, batch_first=True)
        self.attn = nn.Linear(self.hidden_size,self.hidden_size) 
        self.slot_out = nn.Linear(self.hidden_size*2, self.slot_size)
        self.intent_out = nn.Linear(self.hidden_size*2,self.intent_size)
    
    def init_weights(self):
        self.embedding.weight.data.uniform_(-0.1, 0.1)
    
    def Attention(self, hidden, encoder_outputs, encoder_maskings):
        """
        hidden : 1,B,D
        encoder_outputs : B,T,D
        encoder_maskings : B,T # ByteTensor
        """
        
        hidden = hidden.squeeze(0).unsqueeze(2)  
        
        batch_size = encoder_outputs.size(0) 
        max_len = encoder_outputs.size(1) 
        energies = self.attn(encoder_outputs.contiguous().view(batch_size*max_len,-1))
        energies = energies.view(batch_size,max_len,-1) 
        attn_energies = energies.bmm(hidden).transpose(1,2)
        attn_energies = attn_energies.squeeze(1).masked_fill(encoder_maskings,-1e12)
        
        alpha = F.softmax(attn_energies) 
        alpha = alpha.unsqueeze(1) 
        context = alpha.bmm(encoder_outputs) 
        
        return context 
    
    def init_hidden(self,input):
        hidden = Variable(torch.zeros(self.n_layers*1, input.size(0), self.hidden_size)).cuda() if USE_CUDA else Variable(torch.zeros(self.n_layers*2,input.size(0), self.hidden_size))
        context = Variable(torch.zeros(self.n_layers*1, input.size(0), self.hidden_size)).cuda() if USE_CUDA else Variable(torch.zeros(self.n_layers*2, input.size(0), self.hidden_size))
        return (hidden,context)
    
    def forward(self, input,context,encoder_outputs,encoder_maskings,training=True):
        embedded = self.embedding(input)
        hidden = self.init_hidden(input)
        decode=[]
        aligns = encoder_outputs.transpose(0,1)
        length = encoder_outputs.size(1)
        for i in range(length):
            aligned = aligns[i].unsqueeze(1)# B,1,D
            _, hidden = self.lstm(torch.cat((embedded,context,aligned),2), hidden) 
            if i==0: 
                intent_hidden = hidden[0].clone() 
                intent_context = self.Attention(intent_hidden, encoder_outputs,encoder_maskings) 
                concated = torch.cat((intent_hidden,intent_context.transpose(0,1)),2)
                intent_score = self.intent_out(concated.squeeze(0)) # B,D

            concated = torch.cat((hidden[0],context.transpose(0,1)),2)
            score = self.slot_out(concated.squeeze(0))
            softmaxed = F.log_softmax(score)
            decode.append(softmaxed)
            _,input = torch.max(softmaxed,1)
            embedded = self.embedding(input.unsqueeze(1))
            context = self.Attention(hidden[0], encoder_outputs,encoder_maskings) 
        slot_scores = torch.cat(decode,1)
        return slot_scores.view(input.size(0)*length,-1), intent_score

## Training

In [14]:
LEARNING_RATE=0.001
EMBEDDING_SIZE=64
HIDDEN_SIZE=64
BATCH_SIZE=16
LENGTH=50
STEP_SIZE=10
encoder = Encoder(len(word2index),EMBEDDING_SIZE,HIDDEN_SIZE)
decoder = Decoder(len(tag2index),len(intent2index),len(tag2index)//3,HIDDEN_SIZE*2)
if USE_CUDA:
    encoder = encoder.cuda()
    decoder = decoder.cuda()
    
encoder.init_weights()
decoder.init_weights()

loss_function_1 = nn.CrossEntropyLoss(ignore_index=0)
loss_function_2 = nn.CrossEntropyLoss()
enc_optim= optim.Adam(encoder.parameters(), lr=LEARNING_RATE)
dec_optim = optim.Adam(decoder.parameters(),lr=LEARNING_RATE)
for step in range(STEP_SIZE):
    losses=[]
    for i, batch in enumerate(getBatch(BATCH_SIZE,train_data)):
        x,y_1,y_2 = zip(*batch)
        x = torch.cat(x)
        tag_target = torch.cat(y_1)
        intent_target = torch.cat(y_2)
        x_mask = torch.cat([Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))).cuda() if USE_CUDA else Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))) for t in x]).view(BATCH_SIZE,-1)
        y_1_mask = torch.cat([Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))).cuda() if USE_CUDA else Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))) for t in tag_target]).view(BATCH_SIZE,-1)
 
        encoder.zero_grad()
        decoder.zero_grad()

        output, hidden_c = encoder(x,x_mask)
        start_decode = Variable(torch.LongTensor([[word2index['<SOS>']]*BATCH_SIZE])).cuda().transpose(1,0) if USE_CUDA else Variable(torch.LongTensor([[word2index['<SOS>']]*BATCH_SIZE])).transpose(1,0)

        tag_score, intent_score = decoder(start_decode,hidden_c,output,x_mask)

        loss_1 = loss_function_1(tag_score,tag_target.view(-1))
        loss_2 = loss_function_2(intent_score,intent_target)

        loss = loss_1+loss_2
        losses.append(loss.data.cpu().numpy() if USE_CUDA else loss.data.numpy()[0])
        loss.backward()

        torch.nn.utils.clip_grad_norm(encoder.parameters(), 5.0)
        torch.nn.utils.clip_grad_norm(decoder.parameters(), 5.0)

        enc_optim.step()
        dec_optim.step()

        if i % 100==0:
            print("Step",step," epoch",i," : ",np.mean(losses))
            losses=[]

  allow_unreachable=True, accumulate_grad=True)  # Calls into the C++ engine to run the backward pass


Step 0  epoch 0  :  7.898047
Step 0  epoch 100  :  3.6044898
Step 0  epoch 200  :  2.6005187
Step 1  epoch 0  :  1.9219376
Step 1  epoch 100  :  2.076663
Step 1  epoch 200  :  1.81909
Step 2  epoch 0  :  2.1891344
Step 2  epoch 100  :  1.3031497
Step 2  epoch 200  :  1.0028843
Step 3  epoch 0  :  0.87785685
Step 3  epoch 100  :  0.83476037
Step 3  epoch 200  :  0.74223405
Step 4  epoch 0  :  0.51988137
Step 4  epoch 100  :  0.6814476
Step 4  epoch 200  :  0.6018121
Step 5  epoch 0  :  0.3834911
Step 5  epoch 100  :  0.55336744
Step 5  epoch 200  :  0.5272619
Step 6  epoch 0  :  0.4536983
Step 6  epoch 100  :  0.4360807
Step 6  epoch 200  :  0.42489213
Step 7  epoch 0  :  0.12765232
Step 7  epoch 100  :  0.3659593
Step 7  epoch 200  :  0.31658638
Step 8  epoch 0  :  0.17985414
Step 8  epoch 100  :  0.3044225
Step 8  epoch 200  :  0.25390166
Step 9  epoch 0  :  0.55665755
Step 9  epoch 100  :  0.24930415
Step 9  epoch 200  :  0.21158278


### Saving the models

In [15]:
import os
model_dir = './models/'
if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    
torch.save(decoder.state_dict(),os.path.join(model_dir,'decoder.pkl'))
torch.save(encoder.state_dict(),os.path.join(model_dir, 'encoder.pkl'))
print("Train Complete!")

Train Complete!


## Inferencing

In [16]:
from data import *
from model import Encoder,Decoder
_,word2index,tag2index,intent2index = preprocessing('/content/RNN-for-Joint-NLU/atis-2.train.w-intent.iob',60)
index2tag = {v:k for k,v in tag2index.items()}
index2intent = {v:k for k,v in intent2index.items()}
encoder = Encoder(len(word2index),64,64)
decoder = Decoder(len(tag2index),len(intent2index),len(tag2index)//3,64*2)

encoder.load_state_dict(torch.load('/content/RNN-for-Joint-NLU/models/encoder.pkl'))
decoder.load_state_dict(torch.load('/content/RNN-for-Joint-NLU/models/decoder.pkl'))
if USE_CUDA:
    encoder = encoder.cuda()
    decoder = decoder.cuda()
test = open("/content/RNN-for-Joint-NLU/atis-2.dev.w-intent.iob","r").readlines()
test = [t[:-1] for t in test]
test = [[t.split("\t")[0].split(" "),t.split("\t")[1].split(" ")[:-1],t.split("\t")[1].split(" ")[-1]] for t in test]
test = [[t[0][1:-1],t[1][1:],t[2]] for t in test]

processed_data_path : /content/RNN-for-Joint-NLU/data/
Successfully load data. # of set : 4478 
# of vocab : 867, # of slot_tag : 120, # of intent_tag : 22
Preprocessing complete!


### Inference #1

In [17]:
index = random.choice(range(len(test)))
test_raw = test[index][0]
test_in = prepare_sequence(test_raw,word2index)
test_mask = Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, test_in.data)))).cuda() if USE_CUDA else Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, test_in.data)))).view(1,-1)
start_decode = Variable(torch.LongTensor([[word2index['<SOS>']]*1])).cuda().transpose(1,0) if USE_CUDA else Variable(torch.LongTensor([[word2index['<SOS>']]*1])).transpose(1,0)

output, hidden_c = encoder(test_in.unsqueeze(0),test_mask.unsqueeze(0))
tag_score, intent_score = decoder(start_decode,hidden_c,output,test_mask)

v,i = torch.max(tag_score,1)
print("Input Sentence : ",*test[index][0])
print("Truth        : ",*test[index][1])
print("Prediction : ",*list(map(lambda ii:index2tag[ii],i.data.tolist())))
v,i = torch.max(intent_score,1)
print("Truth        : ",test[index][2])
print("Prediction : ",index2intent[i.data.tolist()[0]])

Input Sentence :  thanks and what's the last flight back from washington to boston
Truth        :  O O O O B-flight_mod O O O B-fromloc.city_name O B-toloc.city_name
Prediction :  O O O O B-flight_mod O O O B-fromloc.city_name O B-toloc.city_name
Truth        :  atis_flight
Prediction :  atis_flight


  attn_energies = attn_energies.squeeze(1).masked_fill(encoder_maskings,-1e12) # PAD masking
  alpha = F.softmax(attn_energies) # B,T
  softmaxed = F.log_softmax(score)


### Inference #2

In [19]:
index = random.choice(range(len(test)))
test_raw = test[index][0]
test_in = prepare_sequence(test_raw,word2index)
test_mask = Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, test_in.data)))).cuda() if USE_CUDA else Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, test_in.data)))).view(1,-1)
start_decode = Variable(torch.LongTensor([[word2index['<SOS>']]*1])).cuda().transpose(1,0) if USE_CUDA else Variable(torch.LongTensor([[word2index['<SOS>']]*1])).transpose(1,0)

output, hidden_c = encoder(test_in.unsqueeze(0),test_mask.unsqueeze(0))
tag_score, intent_score = decoder(start_decode,hidden_c,output,test_mask)

v,i = torch.max(tag_score,1)
print("Input Sentence : ",*test[index][0])
print("Truth        : ",*test[index][1])
print("Prediction : ",*list(map(lambda ii:index2tag[ii],i.data.tolist())))
v,i = torch.max(intent_score,1)
print("Truth        : ",test[index][2])
print("Prediction : ",index2intent[i.data.tolist()[0]])

Input Sentence :  does flight dl 1083 from philadelphia to denver fly on saturdays
Truth        :  O O B-airline_code B-flight_number O B-fromloc.city_name O B-toloc.city_name O O B-depart_date.day_name
Prediction :  O O B-airline_code I-airline_name O B-fromloc.city_name O B-toloc.city_name O O B-depart_date.day_name
Truth        :  atis_flight
Prediction :  atis_flight


  attn_energies = attn_energies.squeeze(1).masked_fill(encoder_maskings,-1e12) # PAD masking
  alpha = F.softmax(attn_energies) # B,T
  softmaxed = F.log_softmax(score)


### Inference #3

In [64]:
index = random.choice(range(len(test)))
test_raw = test[index][0]
test_in = prepare_sequence(test_raw,word2index)
test_mask = Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, test_in.data)))).cuda() if USE_CUDA else Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, test_in.data)))).view(1,-1)
start_decode = Variable(torch.LongTensor([[word2index['<SOS>']]*1])).cuda().transpose(1,0) if USE_CUDA else Variable(torch.LongTensor([[word2index['<SOS>']]*1])).transpose(1,0)

output, hidden_c = encoder(test_in.unsqueeze(0),test_mask.unsqueeze(0))
tag_score, intent_score = decoder(start_decode,hidden_c,output,test_mask)

v,i = torch.max(tag_score,1)
print("Input Sentence : ",*test[index][0])
print("Truth        : ",*test[index][1])
print("Prediction : ",*list(map(lambda ii:index2tag[ii],i.data.tolist())))
v,i = torch.max(intent_score,1)
print("Truth        : ",test[index][2])
print("Prediction : ",index2intent[i.data.tolist()[0]])

Input Sentence :  show me all flights from san francisco to boston philadelphia or baltimore
Truth        :  O O O O O B-fromloc.city_name I-fromloc.city_name O B-toloc.city_name B-toloc.city_name O B-toloc.city_name
Prediction :  O O O O O B-fromloc.city_name I-fromloc.city_name O B-toloc.city_name B-toloc.city_name I-toloc.city_name B-toloc.city_name
Truth        :  atis_flight
Prediction :  atis_flight


  attn_energies = attn_energies.squeeze(1).masked_fill(encoder_maskings,-1e12) # PAD masking
  alpha = F.softmax(attn_energies) # B,T
  softmaxed = F.log_softmax(score)


# Training SVM

## Downloading spacy

In [29]:
!python -m spacy download en_core_web_md

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting en-core-web-md==3.3.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.3.0/en_core_web_md-3.3.0-py3-none-any.whl (33.5 MB)
[K     |████████████████████████████████| 33.5 MB 1.4 MB/s 
Installing collected packages: en-core-web-md
Successfully installed en-core-web-md-3.3.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')


In [30]:
import spacy
import numpy as np

nlp = spacy.load("en_core_web_md")
print("Number of vectors: {}".format(nlp.vocab.vectors_length))

Number of vectors: 300


## Data preparation

In [55]:
import pandas as pd
df_train = pd.DataFrame(train_copy, columns = ['text', 'slots', 'label'])
df_train['text'] = [" ".join(i) for i in df_train['text'].values]
df_train['slots'] = [" ".join(i) for i in df_train['slots'].values]
df_train

Unnamed: 0,text,slots,label
0,i want to fly from baltimore to dallas round trip,O O O O O B-fromloc.city_name O B-toloc.city_n...,atis_flight
1,round trip fares from baltimore to philadelphi...,B-round_trip I-round_trip O O B-fromloc.city_n...,atis_airfare
2,show me the flights arriving on baltimore on j...,O O O O O O B-toloc.city_name O B-arrive_date....,atis_flight
3,what are the flights which depart from san fra...,O O O O O O O B-fromloc.city_name I-fromloc.ci...,atis_flight
4,which airlines fly from boston to washington d...,O O O O B-fromloc.city_name O B-toloc.city_nam...,atis_airline
...,...,...,...
4473,what's the first flight after 1 pm leaving was...,O O B-flight_mod O B-depart_time.time_relative...,atis_flight
4474,what are the nonstop flights on america west o...,O O O B-flight_stop O O B-airline_name I-airli...,atis_flight
4475,tell me about ground transportation between or...,O O O O O O B-fromloc.airport_name I-fromloc.a...,atis_ground_service
4476,i'd like a twa flight from las vegas to new yo...,O O O B-airline_code O O B-fromloc.city_name I...,atis_flight


In [56]:
import pandas as pd
df_test = pd.DataFrame(test, columns = ['text', 'slots', 'label'])
df_test['text'] = [" ".join(i) for i in df_test['text'].values]
df_test['slots'] = [" ".join(i) for i in df_test['slots'].values]
df_test

Unnamed: 0,text,slots,label
0,i want to fly from boston at 838 am and arrive...,O O O O O B-fromloc.city_name O B-depart_time....,atis_flight
1,show me all round trip flights between houston...,O O O B-round_trip I-round_trip O O B-fromloc....,atis_flight
2,i would like some information on a flight from...,O O O O O O O O O B-fromloc.city_name O B-tolo...,atis_flight
3,what are the coach flights between dallas and ...,O O O B-class_type O O B-fromloc.city_name O B...,atis_flight
4,i'm flying from boston to the bay area,O O O B-fromloc.city_name O O B-toloc.city_nam...,atis_flight
...,...,...,...
495,pm flights dallas to atlanta,B-depart_time.period_of_day O B-fromloc.city_n...,atis_flight
496,information on flights from baltimore to phila...,O O O O B-fromloc.city_name O B-toloc.city_name,atis_flight
497,what flights from atlanta to st. louis on tues...,O O O B-fromloc.city_name O B-toloc.city_name ...,atis_flight
498,show me ground transportation in san francisco,O O O O O B-city_name I-city_name,atis_ground_service


In [57]:
sen_train = df_train['text'].tolist()
labels_train = df_train['label'].tolist()
sen_test = df_test['text'].tolist()
labels_test = df_test['label'].tolist()
print(len(sen_train),len(labels_train),len(sen_test),len(labels_test))

4478 4478 500 500


## Encoding the sentences

In [58]:
def encode_sentences(sentences):
    n_sentences = len(sentences)
    X = np.zeros((n_sentences, 300))
    for idx, sentence in enumerate(sentences):
        doc = nlp(sentence)
        X[idx, :] = doc.vector
    return X

train_X = encode_sentences(sen_train)
test_X = encode_sentences(sen_test)

In [59]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

labels_test = le.fit_transform(labels_test)
labels_train = le.fit_transform(labels_train)

In [69]:
max(labels_train)

21

In [75]:

from sklearn.svm import SVC

clf = SVC(C=1)
clf.fit(train_X, labels_train)

SVC(C=1)

In [76]:
def validate_clf(X,y):
    y_pred = clf.predict(X)
    n_correct = 0
    for i in range(len(y)):
        if y_pred[i] == y[i]:
            n_correct += 1

    print("Predicted {} correctly out of {}".format(n_correct, len(y)))
    print("Model accuracy: {}%".format(round(n_correct/len(y)*100),2))
    
print('Validation on the train set results:')
validate_clf(train_X, labels_train)

Validation on the train set results:
Predicted 4063 correctly out of 4478
Model accuracy: 91%


In [77]:

print('Validation on the test set results:')
validate_clf(test_X, labels_test)

Validation on the test set results:
Predicted 44 correctly out of 500
Model accuracy: 9%


Q: Why is accuracy so low? <br>
A: `Here`.

#### References:
1. [Dataset](https://github.com/yvchen/JointSLU/tree/master/data)
2. [Attention-Based Recurrent Neural Network Models for Joint Intent Detection and Slot Filling](https://arxiv.org/pdf/1609.01454.pdf)
3. [Github - RNN-for-Joint-NLU](https://github.com/DSKSD/RNN-for-Joint-NLU)
4. [Intent Classification with SVM](https://www.kaggle.com/code/oleksandrarsentiev/intent-classification-with-svm)
5. [Airline-Travel-Information-System-ATIS-Text-Analysis](https://github.com/nawaz-kmr/Airline-Travel-Information-System-ATIS-Text-Analysis#airline-travel-information-system-atis-text-analysis)