In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset   
from torch.optim import SGD
from torch.nn.utils.rnn import pad_sequence
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from collections import Counter

In [2]:
# Creating a ner 2 label dictionary
ner2Labels = {'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3, 'I-ORG': 4, 'B-LOC': 5, 'I-LOC': 6, 'B-MISC': 7, 'I-MISC': 8}

In [3]:
#  function to read data and create list of indices, words and tags
def readData(filename):
  indices, words, ners = [], [], []
  with open(filename, "rt") as file:
    for line in file:
      td = line.split()
      if len(td) != 0 and td[1] != "-DOCSTART-":
        indices.append(int(td[0]))
        words.append(td[1])
        ners.append(td[2])

  return indices, words, ners

In [4]:
#  function to get the sample sentences
def getlistofSentences(*args):
  listofSent, sent = [], []
  for i, (idx, word) in enumerate(zip(args[0], args[1])):
    if idx == 1 and i > 0:
      listofSent.append(sent)
      sent = []
      sent.append(word)
    else: 
      sent.append(word)
  if sent:
    listofSent.append(sent)
  
  return listofSent

In [5]:
trainIndices, trainWords, trainNERs  = readData("data/train")
devIndices, devWords, devNERs = readData("data/dev")

In [6]:
listOfTrainSent = getlistofSentences(trainIndices, trainWords)
listOfTrainNER = getlistofSentences(trainIndices, trainNERs)

listOfDevSent = getlistofSentences(devIndices, devWords) 
listOfDevNER = getlistofSentences(devIndices, devNERs)

In [7]:
wordFrequencies = Counter()
for word in trainWords:
  wordFrequencies[word] += 1

In [8]:
len(wordFrequencies)

23623

In [9]:
len(listOfTrainSent)

14041

In [10]:
# creating vocab
vocab, ners = {}, []

for i, word in enumerate(wordFrequencies.keys(), start=2):
  if word not in vocab:
    vocab[word] = i

for ner in trainNERs:
  if ner not in ners:
    ners.append(ner)

In [11]:
vocab['<pad>'] = 0
vocab['<unk>'] = 1

In [12]:
len(vocab)

23625

In [13]:
labels2ner = {}
for i, ner in enumerate(ners):
  labels2ner[i] = ner

In [15]:
# creating the list of labels from tags using ner2Labels
listOfTrainLabels, listOfDevLabels, = [], []
temp1, temp2 = [], []
for nerSent in listOfTrainNER:
  for ner in nerSent:
    temp1.append(ner2Labels[ner])
  listOfTrainLabels.append(temp1)
  temp1 = []

for nerSent in listOfDevNER:
  for ner in nerSent:
    temp2.append(ner2Labels[ner])
  listOfDevLabels.append(temp2)
  temp2 = []

In [17]:
# function to get the list of word Indices
def getListOfWordIndices(dt):
  res = []
  for sent in dt:
    temp = [vocab[word] if word in vocab else vocab['<unk>'] for word in sent]
    res.append(temp)
  return res

In [18]:
trainWordIndices = getListOfWordIndices(listOfTrainSent)

In [20]:
print(len(trainWordIndices))
print(len(listOfTrainLabels))

14041
14041


In [31]:
class BiLSTM(nn.Module):
  def __init__(self, embedding_dim, hidden_dim , vocab_size, lstm_layers, dropout, output_dim, tagset_size):
    super(BiLSTM, self).__init__()
    # self.hidden_dim = hidden_dim
    # self.embedding_dim = embedding_dim
    self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
    self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=lstm_layers, bidirectional=True, batch_first=True)
    self.dropout = nn.Dropout(dropout)
    self.hidden2tag = nn.Linear(2*hidden_dim, output_dim)
    self.elu = nn.ELU()
    self.tag_output = nn.Linear(output_dim, tagset_size)

  def forward(self, sentence):
    embeds = self.word_embeddings(sentence)
    lstm_out, _ = self.lstm(embeds)
    lstmout = self.dropout(lstm_out)
    linear_out = self.hidden2tag(lstmout)
    elu_out = self.elu(linear_out)
    tag_space = self.tag_output(elu_out)
    # tag_scores = nn.functional.log_softmax(tag_space, dim=1)
    return tag_space

In [32]:
def padding(sentences, tags):
  paddedSentences = pad_sequence([torch.LongTensor(sent) for sent in sentences], batch_first=True, padding_value=0)
  paddedtags = pad_sequence([torch.LongTensor(label) for label in tags], batch_first=True, padding_value=9)
  return paddedSentences, paddedtags

In [33]:
paddedTrainSents, paddedTrainLabels = padding(trainWordIndices, listOfTrainLabels)

In [34]:
print(len(paddedTrainSents))
print(len(paddedTrainLabels))

14041
14041


In [38]:
# Hyperparameters
embDim = 100
epochs = 20
batch_size = 64

model = BiLSTM(
               embedding_dim=100,
               hidden_dim=256,
               vocab_size=len(vocab),
               tagset_size=len(ners),
               lstm_layers=1, 
               dropout=0.33,
               output_dim = 128
              )

optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
loss_function = nn.CrossEntropyLoss(ignore_index=9)

In [39]:
train_dataset = TensorDataset(torch.LongTensor(paddedTrainSents), torch.LongTensor(paddedTrainLabels))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [None]:
for epoch in range(epochs):
  model.train()
  total_loss = 0
  for sentences, tags in train_loader:
    optimizer.zero_grad()
    tag_scores = model(sentences)
    loss = loss_function(tag_scores.view(-1, len(ners)), tags.view(-1))
    loss.backward()
    optimizer.step()
    total_loss += loss.item()

In [76]:
torch.save(model.state_dict(), 'model/blstm1.pt')

In [137]:
model = BiLSTM(
               embedding_dim=100,
               hidden_dim=256,
               vocab_size=len(vocab),
               tagset_size=len(ners),
               lstm_layers=1, 
               dropout=0.33,
               output_dim = 128
              )
model.load_state_dict(torch.load('model/blstm1.pt'))

<All keys matched successfully>

In [41]:
trainTrue = []
trainPred = []

model.eval()
with torch.no_grad():
  for batch in train_loader:
    inputs, labels = batch
    logits = model(inputs)
    predictions = torch.argmax(logits, dim=2)
    validIndex = []
    for i in range(len(labels)):
      k=0
      for j in labels[i]:
        if j != 9:
          k+=1
      validIndex.append(k)               
    for i in range(len(labels)):
      trainTrue.extend(labels[i][:validIndex[i]].tolist())
      trainPred.extend(predictions[i][:validIndex[i]].tolist())

In [42]:
len(trainTrue)


203621

In [43]:
len(trainPred)

203621

In [44]:
trainTrue = [list(ner2Labels.keys())[list(ner2Labels.values()).index(label)] for label in trainTrue]
trainPred = [list(ner2Labels.keys())[list(ner2Labels.values()).index(label)] for label in trainPred]

In [45]:
precison, recall, f1, _ = precision_recall_fscore_support(trainTrue, trainPred, average="macro")
print("Precision: ",precison)
print("Recall: ", recall)
print("F1: ", f1)
print("accuracy:", accuracy_score(trainTrue, trainPred))

Precision:  0.9992525446640764
Recall:  0.9993710271996932
F1:  0.9993117188062439
accuracy: 0.9998919561341905


In [46]:
#  function to write predictions to a file so that we can use conll03eval
def writePredictionstoFile(opfilename, indices, words, preds):
  with open(opfilename, "w") as file:
    for i, (idx, word) in enumerate(zip(indices, words)):
      if idx == 1 and i > 0:
        file.write("\n")
      file.write("{} {} {}\n".format(idx, word, preds[i]))

In [47]:
writePredictionstoFile("predictions/task1/trainTrue.txt", trainIndices, trainWords, trainTrue)
writePredictionstoFile("predictions/task1/trainPred.txt", trainIndices, trainWords, trainPred)

In [48]:
!python eval.py -p "predictions/task1/trainPred.txt" -g "predictions/task1/trainTrue.txt" 

processed 203621 tokens with 24239 phrases; found: 24245 phrases; correct: 24221.
accuracy:  99.99%; precision:  99.90%; recall:  99.93%; FB1:  99.91
              LOC: precision:  99.99%; recall:  99.94%; FB1:  99.97  7216
             MISC: precision:  99.69%; recall:  99.69%; FB1:  99.69  3512
              ORG: precision:  99.86%; recall:  99.97%; FB1:  99.92  6593
              PER: precision:  99.96%; recall:  99.99%; FB1:  99.97  6924


Evaluating Dev Data using Task 1 Model

In [49]:
devWordIndices = getListOfWordIndices(listOfDevSent)

In [51]:
print(len(devWordIndices))
print(len(listOfDevLabels))

3250
3250


In [89]:
maxSeqLen = max(len(sent) for sent in devWordIndices)
paddedDevSent, paddedDevLabels = devWordIndices, listOfDevLabels
for seq in paddedDevSent:   
    if len(seq) < maxSeqLen:
        seq += [0] * (maxSeqLen - len(seq))
    elif len(seq) > maxSeqLen:
        seq = seq[:maxSeqLen]

for tags in paddedDevLabels:   
    if len(tags) < maxSeqLen:
        tags+= [9] * (maxSeqLen - len(tags))
    elif len(tags) > maxSeqLen:
        tags = tags[:maxSeqLen]

In [90]:
print(len(paddedDevSent))
print(len(paddedDevLabels))

3250
3250


In [91]:
dev_dataset = TensorDataset(torch.LongTensor(paddedDevSent), torch.LongTensor(paddedDevLabels))
dev_loader = DataLoader(dev_dataset, batch_size=32, shuffle=True)

In [92]:
devTrue = []
devPred = []

model.eval()
with torch.no_grad():
  for batch in dev_loader:
    inputs, labels = batch
    logits = model(inputs)
    predictions = torch.argmax(logits, dim=2)
    validIndex = []
    for i in range(len(labels)):
      k=0
      for j in labels[i]:
        if j != 9:
          k+=1
      validIndex.append(k)               
    for i in range(len(labels)):
      devTrue.extend(labels[i][:validIndex[i]].tolist())
      devPred.extend(predictions[i][:validIndex[i]].tolist())

In [93]:
print(len(devTrue))
print(len(devPred))

51362
51362


In [94]:
devTrue = [list(ner2Labels.keys())[list(ner2Labels.values()).index(label)] for label in devTrue]
devPred = [list(ner2Labels.keys())[list(ner2Labels.values()).index(label)] for label in devPred]

In [95]:
precison, recall, f1, _ = precision_recall_fscore_support(devTrue, devPred, average="macro")
print("Precision: ",precison)
print("Recall: ", recall)
print("F1: ", f1)
print("accuracy:", accuracy_score(devTrue, devPred))

Precision:  0.8982587510690817
Recall:  0.7766839458615533
F1:  0.8310114054116308
accuracy: 0.9563490518281998


In [96]:
writePredictionstoFile("predictions/task1/dev1.out", devIndices, devWords, devPred)
writePredictionstoFile("predictions/task1/devTrue.txt", devIndices, devWords, devTrue)

In [97]:
!python eval.py -p "predictions/task1/dev1.out" -g "predictions/task1/devTrue.txt"

processed 51362 tokens with 6109 phrases; found: 5665 phrases; correct: 4660.
accuracy:  95.63%; precision:  82.26%; recall:  76.28%; FB1:  79.16
              LOC: precision:  92.70%; recall:  83.42%; FB1:  87.82  1672
             MISC: precision:  85.84%; recall:  75.11%; FB1:  80.11  826
              ORG: precision:  74.89%; recall:  71.30%; FB1:  73.05  1314
              PER: precision:  76.47%; recall:  73.53%; FB1:  74.97  1853


In [70]:
def readt(filename):
  tindices, twords = [], []
  with open(filename, "r") as file:
    for line in file:
      td = line.split()
      if len(td) != 0 and td[1] != "-DOCSTART-":
        tindices.append(int(td[0]))
        twords.append(td[1])
      
    return tindices, twords

In [71]:
testIndices, testWords = readt("data/test")

In [72]:
print(len(testIndices))
print(len(testWords))

46435
46435


In [73]:
listOfTestSent = getlistofSentences(testIndices, testWords)

In [74]:
len(listOfTestSent)

3453

In [75]:
testWordIndices = getListOfWordIndices(listOfTestSent)

In [84]:
maxSeqLen = max(len(sent) for sent in testWordIndices)
paddedTestSent = testWordIndices
for seq in paddedTestSent:   
  if len(seq) < maxSeqLen:
    seq += [0] * (maxSeqLen - len(seq))
  elif len(seq) > maxSeqLen:
    seq = seq[:maxSeqLen]

In [98]:
paddedTestSent

[[1892,
  677,
  1,
  1,
  1,
  3396,
  72,
  21355,
  2338,
  1,
  10616,
  10,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0],
 [1,
  1,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0

In [147]:
test_dataset =  torch.LongTensor(paddedTestSent)
test_Loader = DataLoader(test_dataset, batch_size=32, shuffle=True)
print(type(test_Loader))

<class 'torch.utils.data.dataloader.DataLoader'>


In [148]:
testPred = []

model.eval()
with torch.no_grad():
  for inputs in test_Loader:
    
    logits = model(inputs)
    predictions = torch.argmax(logits, dim=2)
    validIndex=[]
    for i in range(len(inputs)):
      k = 0
      for j in inputs[i]:
        if j != 0:
          k+=1
      validIndex.append(k)
    for i in range(len(inputs)):
      testPred.extend(predictions[i][:validIndex[i]].tolist())    

In [151]:
testPred = [list(ner2Labels.keys())[list(ner2Labels.values()).index(label)] for label in testPred]

In [152]:
writePredictionstoFile("predictions/task1/test1.out", testIndices, testWords, testPred)

## Task 2: Using GloVe word embeddings

In [103]:
#  creating glove vocab and embeddings
gloVocab, embeddings = [], []
with open('glove.6B.100d/glove.6B.100d.txt', encoding="utf8", mode = 'rt') as file:
    line = file.read().strip().split('\n')
for i in range(len(line)):
    word = line[i].split(' ')[0]
    embedding = [float(val) for val in line[i].split(' ')[1:]]
    gloVocab.append(word)
    embeddings.append(embedding)

In [104]:
gloVocab, embeddings = np.array(gloVocab), np.array(embeddings)
gloVocab = np.insert(gloVocab, 0, '<pad>')
gloVocab = np.insert(gloVocab, 1, '<unk>')

In [105]:
padEmbedding = np.zeros((1, embeddings.shape[1]))
unkEmbedding = np.mean(embeddings, axis=0, keepdims=True)
embeddings = np.vstack((padEmbedding, unkEmbedding, embeddings))

In [106]:
word2idx = {val: idx[0] for idx, val in np.ndenumerate(gloVocab)}

In [107]:
word2idx


{'<pad>': 0,
 '<unk>': 1,
 'the': 2,
 ',': 3,
 '.': 4,
 'of': 5,
 'to': 6,
 'and': 7,
 'in': 8,
 'a': 9,
 '"': 10,
 "'s": 11,
 'for': 12,
 '-': 13,
 'that': 14,
 'on': 15,
 'is': 16,
 'was': 17,
 'said': 18,
 'with': 19,
 'he': 20,
 'as': 21,
 'it': 22,
 'by': 23,
 'at': 24,
 '(': 25,
 ')': 26,
 'from': 27,
 'his': 28,
 "''": 29,
 '``': 30,
 'an': 31,
 'be': 32,
 'has': 33,
 'are': 34,
 'have': 35,
 'but': 36,
 'were': 37,
 'not': 38,
 'this': 39,
 'who': 40,
 'they': 41,
 'had': 42,
 'i': 43,
 'which': 44,
 'will': 45,
 'their': 46,
 ':': 47,
 'or': 48,
 'its': 49,
 'one': 50,
 'after': 51,
 'new': 52,
 'been': 53,
 'also': 54,
 'we': 55,
 'would': 56,
 'two': 57,
 'more': 58,
 "'": 59,
 'first': 60,
 'about': 61,
 'up': 62,
 'when': 63,
 'year': 64,
 'there': 65,
 'all': 66,
 '--': 67,
 'out': 68,
 'she': 69,
 'other': 70,
 'people': 71,
 "n't": 72,
 'her': 73,
 'percent': 74,
 'than': 75,
 'over': 76,
 'into': 77,
 'last': 78,
 'some': 79,
 'government': 80,
 'time': 81,
 '$': 82,
 

In [108]:
# getting golve Indices 
trainWordGloveIndices = []

for sent in listOfTrainSent:
  temp = [word2idx.get(word.lower(), 1) for word in sent]
  trainWordGloveIndices.append(temp)
  temp = []

In [109]:
len(trainWordGloveIndices)

14041

In [110]:
# getting glove Features
gloveFeatures = [] 

for sent in listOfTrainSent:
  temp = [[1.0 if word.isupper() else 0.0, 1.0 if word.islower() else 0.0, 1.0 if word.istitle() else 0.0] for word in sent]
  gloveFeatures.append(temp)
  temp = []

In [129]:
len(gloveFeatures[0])

9

In [125]:
class BiLSTM2(nn.Module):
  def __init__(self, embedding_dim, hidden_dim , lstm_layers, bidirectional, dropout, output_dim, num_classes):
    super(BiLSTM2, self).__init__()
    self.embedding = nn.Embedding.from_pretrained(torch.from_numpy(embeddings).float(), freeze=True)
    self.bilstm = nn.LSTM(embedding_dim+3, hidden_size=hidden_dim, num_layers=lstm_layers, bidirectional=bidirectional, batch_first=True)
    self.dropout = nn.Dropout(dropout)
    self.fc = nn.Linear(2*hidden_dim, output_dim)
    self.elu = nn.ELU()
    self.classifier = nn.Linear(output_dim,num_classes)

  def forward(self, text, additional_features):
    embedded = self.embedding(text)
    concatenated_features = torch.cat((embedded, additional_features), dim=2)
    #packed_features = pack_padded_sequence(concatenated_features, input_lengths, batch_first=True, enforce_sorted=False)
    lstmout, _ = self.bilstm(concatenated_features)
    #lstmout, _ = pad_packed_sequence(lstmout, batch_first=True)
    lstmout = self.dropout(lstmout)
    linearout = self.fc(lstmout)
    eluout = self.elu(linearout)
    logits = self.classifier(eluout)

    return logits

In [168]:
embedding_dim = 100
hidden_dim = 256
lstm_layers = 1
bidirectional = True
droput = 0.33
output_dim = 128
num_classes = 9
model = BiLSTM2(embedding_dim=embedding_dim, hidden_dim=hidden_dim, lstm_layers=lstm_layers, bidirectional=bidirectional, dropout=droput, output_dim=output_dim, num_classes=num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
print(model)


BiLSTM2(
  (embedding): Embedding(400002, 100)
  (bilstm): LSTM(103, 256, batch_first=True, bidirectional=True)
  (dropout): Dropout(p=0.33, inplace=False)
  (fc): Linear(in_features=512, out_features=128, bias=True)
  (elu): ELU(alpha=1.0)
  (classifier): Linear(in_features=128, out_features=9, bias=True)
)


In [193]:
def addPadding(indices, labels, features):
  paddedIndices = pad_sequence([torch.LongTensor(inp) for inp in indices], batch_first=True, padding_value=0)
  paddedLabels = pad_sequence([torch.LongTensor(label) for label in labels], batch_first=True, padding_value=9)
  paddedFeatures = pad_sequence([torch.LongTensor(feat) for feat in features], batch_first=True, padding_value=9)
  return paddedIndices, paddedLabels, paddedFeatures

In [171]:
paddedGloveIndices, paddedGlovelabels, paddedGloveFeatures = addPadding(trainWordGloveIndices, listOfTrainLabels, gloveFeatures)

In [None]:
num_epochs = 10
train_data = TensorDataset(torch.LongTensor(paddedGloveIndices), torch.LongTensor(paddedGlovelabels), torch.LongTensor(paddedGloveFeatures))
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    for inputs, labels, feat in train_loader:
        optimizer.zero_grad()
        logits = model(text=inputs, additional_features=feat)
        loss = nn.CrossEntropyLoss(ignore_index=9)(logits.view(-1, num_classes), labels.view(-1))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

In [173]:
torch.save(model.state_dict(), 'model/blstm2.pt')

In [174]:
allGloveTrainTrue = []
allGloveTrainPred = []
model.eval()
with torch.no_grad():
  for batch in train_loader:
    inputs, labels, feat = batch
    logits = model(text=inputs, additional_features=feat)
    predictions = torch.argmax(logits, dim=2)
    validIndex = []
    for i in range(len(labels)):
      k=0
      for j in labels[i]:
          if j != 9:
              k+=1
      validIndex.append(k)
    for i in range(len(labels)):
      allGloveTrainTrue.extend(labels[i][:validIndex[i]].tolist())
      allGloveTrainPred.extend(predictions[i][:validIndex[i]].tolist())

In [176]:
allGloveTrainPred

[3,
 0,
 5,
 0,
 0,
 0,
 0,
 0,
 3,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 5,
 0,
 1,
 0,
 3,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 7,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 3,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 7,
 0,
 0,
 5,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 7,
 0,
 0,
 0,
 5,
 0,
 0,
 0,
 0,
 0,
 3,
 4,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 5,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 7,
 8,
 0,
 1,
 0,
 0,
 1,
 2,
 0,
 1,
 0,
 1,
 0,
 5,
 6,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 2,
 0,
 5,
 0,
 0,
 1,
 2,
 0,
 5,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 2,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [177]:
print(len(allGloveTrainTrue))
print(len(allGloveTrainPred))

203621
203621


In [178]:
allGloveTrainTrue = [list(ner2Labels.keys())[list(ner2Labels.values()).index(label)] for label in allGloveTrainTrue]
allGloveTrainPred = [list(ner2Labels.keys())[list(ner2Labels.values()).index(label)] for label in allGloveTrainPred]

In [133]:
precison, recall, f1, _ = precision_recall_fscore_support(allGloveTrainTrue, allGloveTrainPred, average="macro")
print("Precision: ",precison)
print("Recall: ", recall)
print("F1: ", f1)
print("accuracy_score", accuracy_score(allGloveTrainTrue, allGloveTrainPred))

Precision:  0.9955150391664197
Recall:  0.9930777623731883
F1:  0.9942778099948407
accuracy_score 0.9988999170026667


In [179]:
writePredictionstoFile("predictions/task2/trainGlovePred.txt", trainIndices, trainWords, allGloveTrainPred)
writePredictionstoFile("predictions/task2/trainGloveTrue.txt", trainIndices, trainWords, allGloveTrainTrue)

In [180]:
!python eval.py -p "predictions/task2/trainGlovePred.txt" -g "predictions/task2/trainGloveTrue.txt"

processed 203621 tokens with 24242 phrases; found: 24276 phrases; correct: 24035.
accuracy:  99.86%; precision:  99.01%; recall:  99.15%; FB1:  99.08
              LOC: precision:  99.31%; recall:  99.60%; FB1:  99.45  7242
             MISC: precision:  97.88%; recall:  98.01%; FB1:  97.95  3531
              ORG: precision:  98.81%; recall:  98.77%; FB1:  98.79  6580
              PER: precision:  99.45%; recall:  99.61%; FB1:  99.53  6923


Evaluating Dev Data

In [194]:
devWordGloveIndices = []

for sent in listOfDevSent:
  temp = [word2idx.get(word.lower(), 1) for word in sent]
  devWordGloveIndices.append(temp)
  temp = []

In [195]:
# getting glove Features
gloveDevFeatures = [] 

for sent in listOfDevSent:
  temp = [[1.0 if word.isupper() else 0.0, 1.0 if word.islower() else 0.0, 1.0 if word.istitle() else 0.0] for word in sent]
  gloveDevFeatures.append(temp)
  temp = []

In [196]:
paddedDevGloveIndices, paddedDevGlovelabels, paddedDevGloveFeatures = addPadding(devWordGloveIndices, listOfDevLabels, gloveDevFeatures)

In [198]:
dev_dataset = TensorDataset(torch.LongTensor(paddedDevGloveIndices), torch.LongTensor(paddedDevGlovelabels), torch.LongTensor(paddedDevGloveFeatures))
dev_loader = DataLoader(dev_dataset, batch_size=32, shuffle=True)

In [200]:
devGloveTrue = []
devGlovePred = []

model.eval()
with torch.no_grad():
  for batch in dev_loader:
    inputs, labels, feat = batch
    logits = model(text=inputs, additional_features=feat)
    predictions = torch.argmax(logits, dim=2)
    validIndex = []
    for i in range(len(labels)):
      k=0
      for j in labels[i]:
        if j != 9:
          k+=1
      validIndex.append(k)               
    for i in range(len(labels)):
      devGloveTrue.extend(labels[i][:validIndex[i]].tolist())
      devGlovePred.extend(predictions[i][:validIndex[i]].tolist())

In [201]:
devGloveTrue = [list(ner2Labels.keys())[list(ner2Labels.values()).index(label)] for label in devGloveTrue]
devGlovePred = [list(ner2Labels.keys())[list(ner2Labels.values()).index(label)] for label in devGlovePred]

In [None]:
precison, recall, f1, _ = precision_recall_fscore_support(devGloveTrue, devGlovePred, average="macro")
print("Precision: ",precison)
print("Recall: ", recall)
print("F1: ", f1)
print("accuracy:", accuracy_score(devGloveTrue, devGlovePred))

In [204]:
writePredictionstoFile("predictions/task2/dev2.out", devIndices, devWords, devGlovePred)
writePredictionstoFile("predictions/task2/devGloveTrue.txt", devIndices, devWords, devGloveTrue)

In [205]:
!python eval.py -p "predictions/task2/dev2.out" -g "predictions/task2/devGloveTrue.txt"

processed 51362 tokens with 6109 phrases; found: 6216 phrases; correct: 5671.
accuracy:  98.61%; precision:  91.23%; recall:  92.83%; FB1:  92.02
              LOC: precision:  94.48%; recall:  96.11%; FB1:  95.29  1885
             MISC: precision:  84.96%; recall:  86.05%; FB1:  85.50  951
              ORG: precision:  86.66%; recall:  88.35%; FB1:  87.50  1417
              PER: precision:  94.45%; recall:  96.21%; FB1:  95.32  1963


Predicting Test Data

In [232]:
testWordGloveIndices = []

for sent in listOfTestSent:
  temp = [word2idx.get(word.lower(), 1) for word in sent]
  testWordGloveIndices.append(temp)
  temp = []

In [233]:
len(testWordGloveIndices)

3453

In [234]:
# getting glove Features
gloveTestFeatures = [] 

for sent in listOfTestSent:
  temp = [[1.0 if word.isupper() else 0.0, 1.0 if word.islower() else 0.0, 1.0 if word.istitle() else 0.0] for word in sent]
  gloveTestFeatures.append(temp)
  temp = []

In [235]:
len(gloveTestFeatures)

3453

In [236]:
paddedTestIndices = pad_sequence([torch.LongTensor(inp) for inp in testWordGloveIndices], batch_first=True, padding_value=0)
paddedTestGloveFeatures = pad_sequence([torch.LongTensor(feat) for feat in gloveTestFeatures], batch_first=True, padding_value=9)

In [237]:
len(paddedTestGloveFeatures)

3453

In [238]:
test_dataset = TensorDataset(torch.LongTensor(paddedDevGloveIndices), torch.LongTensor(paddedDevGloveFeatures))
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [239]:
testGloveTrue = []
testGlovePred = []

model.eval()
with torch.no_grad():
  for batch in test_loader:
    inputs, feat = batch
    logits = model(text=inputs, additional_features=feat)
    predictions = torch.argmax(logits, dim=2)
    validIndex = []
    for i in range(len(inputs)):
      k = 0
      for j in inputs[i]:
        if j != 0:
          k+=1
      validIndex.append(k)
    for i in range(len(inputs)):
      testGlovePred.extend(predictions[i][:validIndex[i]].tolist()) 

In [229]:
len(testGlovePred)

51362

In [240]:
testGlovePred = [list(ner2Labels.keys())[list(ner2Labels.values()).index(label)] for label in testGlovePred]

In [215]:
len(testGlovePred)

32538

In [216]:
len(testIndices)

46435

In [241]:
writePredictionstoFile("predictions/task2/test2.out", testIndices, testWords, testGlovePred)