In [1]:
import numpy as np
import pandas as pd
import re
import nltk
import sklearn
import warnings
from platform import python_version
print(python_version())

3.9.5


In [2]:
### Read in data:
tr_headers = ["index", "word", "ner_tag"]
train_df = pd.read_csv("./data/train", sep=' ', header=None, quoting=3)
train_df.columns = tr_headers

dev_df = pd.read_csv("./data/dev", sep=' ', header=None, quoting=3)
dev_df.columns = tr_headers

test_headers = ["index", "word"]
test_df = pd.read_csv("./data/test", sep=' ', header=None, engine='python', error_bad_lines=False, quoting=3)
test_df.columns = test_headers

In [3]:
train_df.iloc[70:76]

Unnamed: 0,index,word,ner_tag
70,28,advice,O
71,29,was,O
72,30,clearer,O
73,31,.,O
74,1,"""",O
75,2,We,O


In [4]:
train_df.head(15)

Unnamed: 0,index,word,ner_tag
0,1,EU,B-ORG
1,2,rejects,O
2,3,German,B-MISC
3,4,call,O
4,5,to,O
5,6,boycott,O
6,7,British,B-MISC
7,8,lamb,O
8,9,.,O
9,1,Peter,B-PER


In [5]:
# Slight cleaning on num:
train_df["word"] = train_df["word"].str.replace(r'^\d+|.\d+$', "<num>", regex=True)
dev_df["word"] = dev_df["word"].str.replace(r'^\d+|.\d+$', "<num>", regex=True)
test_df["word"] = test_df["word"].str.replace(r'^\d+|.\d+$', "<num>", regex=True)

In [6]:
# Get the count of each word:
#word-type = word
cnt_d = {}
for row in train_df.iterrows():
    if row[1]["word"] in cnt_d:
        cnt_d[row[1]["word"]] += 1
    else:
        cnt_d[row[1]["word"]] = 1

In [7]:
threshold = 2 #No threshold = 1
#unknown_cnt = 0
unknown_word_lst = []   #We want to keep track of unknown words but group together
for k, v in cnt_d.items():
    if v < threshold:
        #unknown_cnt += v
        unknown_word_lst.append(k)
    else:
        continue

In [8]:
def replace_unk_train(word):
    if word in unknown_word_lst:
        return "<unk>"
    else:
        return word

In [9]:
def replace_unk_dev(word):
    if word in unknown_word_lst:
        return "<unk>"
    elif word not in train_words:
        return "<unk>"
    else:
        return word

In [10]:
#Replace with <unk> Train:
train_df["word"] = train_df["word"].apply(replace_unk_train)

In [11]:
train_df["word"] = train_df["word"].astype(str)
train_vocab_size = len(np.unique(train_df["word"]))
train_words = np.unique(train_df["word"])

In [12]:
#Replace with <unk> Dev:
dev_df["word"] = dev_df["word"].apply(replace_unk_dev)
dev_df["word"] = dev_df["word"].astype(str)

In [13]:
dev_df

Unnamed: 0,index,word,ner_tag
0,1,CRICKET,O
1,2,-,O
2,3,<unk>,B-ORG
3,4,TAKE,O
4,5,OVER,O
...,...,...,...
51573,1,--,O
51574,2,Dhaka,B-ORG
51575,3,Newsroom,I-ORG
51576,4,<num>-2<num>,O


In [14]:
#Format the data by sentences TRAIN:
def format_data(df):
    train_formatted = []
    #init beginning:
    first_word = df.iloc[0]
    sentence_x = [first_word["word"]]
    sentence_y = [first_word["ner_tag"]]
    
    for row in df.iloc[1:].iterrows():
        #print(row)
        if row[1]["index"] == 1:
            #print(row[1]["word"])
            train_formatted.append([sentence_x, sentence_y])

            sentence_x, sentence_y = [], []
            sentence_x.append(row[1]["word"])
            sentence_y.append(row[1]["ner_tag"])
            if row[0] == (df.shape[0]-1):
                train_formatted.append([sentence_x, sentence_y])
        else:
            sentence_x.append(row[1]["word"])
            sentence_y.append(row[1]["ner_tag"])
    return train_formatted

In [15]:
#Format the data by sentences TEST:
def format_data_test(df):
    test_formatted = []
    #init beginning:
    first_word = df.iloc[0]
    sentence_x = [first_word["word_formatted"]]
    
    for row in df.iloc[1:].iterrows():
        if row[1]["index"] == 1:
            test_formatted.append(sentence_x)

            sentence_x = []
            sentence_x.append(row[1]["word_formatted"])
            if row[0] == (df.shape[0]-1):
                train_formatted.append([sentence_x, sentence_y])
        else:
            sentence_x.append(row[1]["word_formatted"])
    
    return test_formatted

In [16]:
train_formatted = format_data(train_df)
dev_formatted = format_data(dev_df)

In [17]:
#Create Word Map for vocab:
word_map = {"<pad>":0}
for i, word in enumerate(set(train_df["word"])):
    word_map[word] = i+1
word_map

{'<pad>': 0,
 'Presidential': 1,
 'Cuba': 2,
 'Harrison': 3,
 'Petah': 4,
 'ineligible': 5,
 'Ginebra': 6,
 'Winner': 7,
 'Kafelnikov': 8,
 'Maynard': 9,
 'Munton': 10,
 'Leander': 11,
 'offering': 12,
 'nationalists': 13,
 'Molenbeek': 14,
 'Hollywood': 15,
 'overcome': 16,
 'Note': 17,
 'devoted': 18,
 'blow': 19,
 'Perhaps': 20,
 'Polish': 21,
 'extortion': 22,
 'Gopal': 23,
 'ON': 24,
 'background': 25,
 'liberal': 26,
 'northern': 27,
 'Das': 28,
 'Bystrica': 29,
 'furlongs': 30,
 '.': 31,
 'billions': 32,
 'say': 33,
 'contenders': 34,
 'Hinckley': 35,
 'continue': 36,
 'buys': 37,
 'two-goal': 38,
 'depressed': 39,
 'TWA': 40,
 'combatants': 41,
 'DIV': 42,
 'carrying': 43,
 'protects': 44,
 'storming': 45,
 'slammed': 46,
 'some': 47,
 'Mullally': 48,
 'Israel': 49,
 'resumed': 50,
 'ANKARA': 51,
 'Soyoil': 52,
 'bushland': 53,
 'claimed': 54,
 'mouth': 55,
 'focusing': 56,
 'secretary-general': 57,
 'run-scoring': 58,
 'Elect': 59,
 'Adam': 60,
 'weathered': 61,
 'quell': 62,


In [18]:
#Create Word Map for ner_tag:
ner_map = {"<pad>":-1}
for i, word in enumerate(set(train_df["ner_tag"])):
    ner_map[word] = i
ner_map

{'<pad>': -1,
 'B-PER': 0,
 'I-ORG': 1,
 'O': 2,
 'I-MISC': 3,
 'B-MISC': 4,
 'I-PER': 5,
 'I-LOC': 6,
 'B-LOC': 7,
 'B-ORG': 8}

In [19]:
ner_map_without_pad = {}
for i, word in enumerate(set(train_df["ner_tag"])):
    ner_map_without_pad[word] = i
ner_map_without_pad

{'B-PER': 0,
 'I-ORG': 1,
 'O': 2,
 'I-MISC': 3,
 'B-MISC': 4,
 'I-PER': 5,
 'I-LOC': 6,
 'B-LOC': 7,
 'B-ORG': 8}

In [20]:
longest_train_sent = 0
for sentence in train_formatted:
    sentence_len = len(sentence[0])
    if sentence_len > longest_train_sent:
        longest_train_sent = sentence_len

In [21]:
longest_train_sent

113

In [22]:
#Map words in sentences to corresponding values:
def pad_sentences(sentences_formatted):
    train_padded = []
    cnt = 0
    for sentence in sentences_formatted:
        word_lst = sentence[0]
        ner_lst = sentence[1]
        mapped_word_lst, mapped_ner_lst = [], []
        cnt += len(word_lst)
        for word in word_lst:
            mapped_word_lst.append(word_map[word])
        for ner in ner_lst:
            mapped_ner_lst.append(ner_map[ner])

        word_cnt = len(mapped_word_lst)
        diff_ = longest_train_sent - word_cnt
        mapped_word_lst = mapped_word_lst + [0] * diff_
        mapped_ner_lst = mapped_ner_lst + [-1] * diff_

        train_padded.append([mapped_word_lst, mapped_ner_lst])
    print(cnt)
    return train_padded

In [23]:
train_padded = pad_sentences(train_formatted)
dev_padded = pad_sentences(dev_formatted)

204567
51578


In [24]:
#Map words in sentences to corresponding values:
def pad_test_sentences(sentences_formatted):
    test_padded = []
    for sentence in sentences_formatted:
        mapped_word_lst = []
        for word in sentence:
            mapped_word_lst.append(word_map[word])

        word_cnt = len(mapped_word_lst)
        diff_ = longest_train_sent - word_cnt
        mapped_word_lst = mapped_word_lst + [0] * diff_

        test_padded.append(mapped_word_lst)
    return test_padded

### Task 1: Simple Bidirectional LSTM Model:

In [25]:
len(dev_padded)

3466

In [26]:
len(train_padded)

14987

In [27]:
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
from torch import nn

In [28]:
class BLSTM(nn.Module):
    
    def __init__(self, vocab_size):
        super().__init__()
        
        lstm_hidden_dim = 256
        lstm_num_layers = 1
        linear_output_dim =128
        output_dim = 10
        
        self.embeddings = nn.Embedding(vocab_size, 100)
        self.lstm = nn.LSTM(input_size=100, hidden_size=256,
                          num_layers=1, batch_first=True, bidirectional=True)
        self.dropout = nn.Dropout(0.33)
        self.linear1 = nn.Linear(512, 128)
        self.linear2 = nn.Linear(128, 9)
        self.elu = nn.ELU()
        
    def forward(self, inputs):
        #print(inputs.shape)
        embeds = self.embeddings(inputs)
        #print(embeds.shape)
        lstm_out, self.hidden = self.lstm(embeds.view(len(inputs), 1, -1))
        lstm_out_dropped = self.dropout(lstm_out)
        out = self.linear1(lstm_out_dropped.view(len(inputs), -1))
        linear_out_dropped = self.dropout(out)
        #l2_out = self.linear2(linear_out_dropped)
        elu_out = self.elu(linear_out_dropped)
        l2_out = self.linear2(elu_out)
        log_probs = F.log_softmax(l2_out, dim=1)
        return log_probs


In [82]:
EMBEDDING_DIM = 100
VOCAB_SIZE = train_vocab_size+1 #added <pad> word
n_epochs = 20
trainloader = torch.utils.data.DataLoader(train_padded, batch_size=12, num_workers=1)
devloader = torch.utils.data.DataLoader(dev_padded, batch_size=12, num_workers=1)
blstm = BLSTM(VOCAB_SIZE)
criterion = nn.CrossEntropyLoss(ignore_index=-1, size_average=True) #therefore no need for softmax
#criterion = nn.NLLLoss()
# optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-4)
optimizer = torch.optim.SGD(blstm.parameters(), lr=0.25, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

test_loss_min = 10000

for epoch in range(n_epochs):
    scheduler.step()
    print('Epoch-{0} lr: {1}'.format(epoch, optimizer.param_groups[0]['lr']))
    train_loss = 0
    test_loss = 0
    
    blstm.train()
    for data, target in trainloader:
        # Zero the gradients
        optimizer.zero_grad()

        # Perform forward pass
#         print(data)
#         print(target)
        #print(torch.cat(data,dim=0).reshape(1,400,316))
        #print(torch.cat(data,dim=0).size(0)) I think the problem is here.
        output = blstm(torch.cat(data,dim=0))

        # Compute loss
        loss = criterion(output, torch.cat(target,dim=0))

        # Perform backward pass
        loss.backward()

        # Perform optimization
        optimizer.step()

                # Print statistics
        #train_loss += loss.item()*torch.cat(data,dim=0).size(0)
        train_loss += loss
        
    with torch.no_grad():
        for data, target in devloader:
            output = blstm(torch.cat(data,dim=0))
            loss = criterion(output, torch.cat(target,dim=0))
#             test_loss += loss.item()*torch.cat(data,dim=0).size(0)
            test_loss += loss
    train_loss = train_loss/len(trainloader.dataset)
    test_loss = test_loss/len(devloader.dataset)
    
    print('Epoch: {} \tTraining Loss: {:.6f} \tTest Loss: {:.6f}'.format(
        epoch+1, 
        train_loss,
        test_loss
        ))
    
    if test_loss <= test_loss_min:
        torch.save(blstm.state_dict(), 'blstm1.pt')
        test_loss_min = test_loss

  # Process is complete.
print('All done.')

Epoch: 1 	Training Loss: 0.061716 	Test Loss: 0.053083
Epoch: 2 	Training Loss: 0.048122 	Test Loss: 0.046673
Epoch: 3 	Training Loss: 0.039629 	Test Loss: 0.039134
Epoch: 4 	Training Loss: 0.033555 	Test Loss: 0.033525
Epoch: 5 	Training Loss: 0.024118 	Test Loss: 0.028855
Epoch: 6 	Training Loss: 0.020382 	Test Loss: 0.027672
Epoch: 7 	Training Loss: 0.019041 	Test Loss: 0.027528
Epoch: 8 	Training Loss: 0.018215 	Test Loss: 0.026919
Epoch: 9 	Training Loss: 0.017680 	Test Loss: 0.026281
Epoch: 10 	Training Loss: 0.016589 	Test Loss: 0.026506
Epoch: 11 	Training Loss: 0.016186 	Test Loss: 0.026321
Epoch: 12 	Training Loss: 0.015937 	Test Loss: 0.025970


KeyboardInterrupt: 

In [30]:
#Format the test_data:
test_df["word_formatted"] = test_df["word"].apply(replace_unk_dev)
test_df["word_formatted"] = test_df["word_formatted"].astype(str)

test_formatted = format_data_test(test_df)
test_padded = pad_test_sentences(test_formatted)

NameError: name 'sentence_y' is not defined

In [76]:
blstm.load_state_dict(torch.load('blstm1.pt'))
#testloader = torch.utils.data.DataLoader(test_padded[0:100], batch_size=1, num_workers=1)

<All keys matched successfully>

In [32]:
# Calculate Accuracy from trained model:
def predict_test(model, dataloader):
    prediction_list = []
    with torch.no_grad():
        for data in dataloader:
            output = model(torch.cat(data,dim=0))
            _, predicted = torch.max(output.data, 1) 
            prediction_list.append(predicted)
    return prediction_list

In [33]:
# Calculate Accuracy from trained model:
def predict(model, dataloader):
    prediction_list = []
    with torch.no_grad():
        for data, target in dataloader:
            output = model(torch.cat(data,dim=0))
            _, predicted = torch.max(output.data, 1) 
            prediction_list.append(predicted)
    return prediction_list

In [34]:
def unravel_predictions_test(data, pred):
    overall_pred = []
    for i, sentence in enumerate(data):
        non_padded_pred = len(np.nonzero(sentence)[0])
        pred_i = pred[i].tolist()[0:non_padded_pred]
        overall_pred.append(pred_i)
    return overall_pred

In [38]:
def unravel_predictions(data, pred):
    overall_pred = []
    for i, sentence in enumerate(data):
        actual_sentence = sentence[0]
        non_padded_pred = len(np.nonzero(actual_sentence)[0])
        pred_i = pred[i].tolist()[0:non_padded_pred]
        overall_pred.append(pred_i)
    return overall_pred

In [35]:
def convert_predictions(pred):
    overall_pred = []
    for sentence in pred:
        for idx in sentence:
            overall_pred.append(list(ner_map_without_pad.keys())[idx])
    return overall_pred

In [36]:
def accuracy(y_true, y_pred):
    score = sum(y_true == y_pred)/len(y_pred)
    return score

In [77]:
#predict on dev:
devloader = torch.utils.data.DataLoader(dev_padded, batch_size=1, num_workers=1) #need to do 1 at a time:
predictions_dev = predict(blstm, devloader)
predictions_dev = unravel_predictions(dev_padded, predictions_dev)
predictions_dev = convert_predictions(predictions_dev)

In [669]:
predictions_dev

['I-LOC',
 'O',
 'I-ORG',
 'I-LOC',
 'O',
 'O',
 'O',
 'O',
 'B-PER',
 'B-LOC',
 'O',
 'B-LOC',
 'O',
 'B-MISC',
 'O',
 'O',
 'B-LOC',
 'I-ORG',
 'O',
 'O',
 'O',
 'O',
 'O',
 'B-PER',
 'O',
 'B-MISC',
 'I-PER',
 'B-ORG',
 'O',
 'O',
 'O',
 'O',
 'O',
 'O',
 'B-ORG',
 'O',
 'B-LOC',
 'O',
 'B-ORG',
 'B-LOC',
 'O',
 'B-MISC',
 'O',
 'O',
 'B-MISC',
 'I-LOC',
 'O',
 'O',
 'B-LOC',
 'O',
 'O',
 'I-LOC',
 'B-LOC',
 'B-LOC',
 'B-LOC',
 'B-ORG',
 'O',
 'I-ORG',
 'B-PER',
 'O',
 'B-ORG',
 'B-ORG',
 'O',
 'O',
 'O',
 'B-PER',
 'B-ORG',
 'B-LOC',
 'O',
 'O',
 'O',
 'I-LOC',
 'B-LOC',
 'O',
 'B-LOC',
 'I-LOC',
 'O',
 'O',
 'O',
 'B-MISC',
 'B-ORG',
 'B-MISC',
 'B-LOC',
 'B-ORG',
 'O',
 'O',
 'O',
 'B-ORG',
 'O',
 'O',
 'O',
 'B-PER',
 'O',
 'B-MISC',
 'B-LOC',
 'O',
 'B-PER',
 'B-ORG',
 'O',
 'B-MISC',
 'B-PER',
 'B-LOC',
 'I-PER',
 'O',
 'B-ORG',
 'O',
 'B-ORG',
 'O',
 'B-LOC',
 'O',
 'O',
 'O',
 'O',
 'B-MISC',
 'B-LOC',
 'B-LOC',
 'O',
 'B-PER',
 'O',
 'O',
 'I-LOC',
 'O',
 'O',
 'B-MISC',
 '

In [78]:
y_true = np.array(dev_df["ner_tag"])
print(len(y_true))
print(len(predictions_dev))
print("Dev Accuracy:", accuracy(y_true, predictions_dev))

51578
51578
Dev Accuracy: 0.9178331846911474


In [None]:
###Best score: lr=.1, momentum=.9, epoch=30, batch_size-64 - 91.77% acc, 55.89 F1, ~2hrs

#Most promising: lr=.25, momentum=.9, epoch=10, batch_size=212 - 91.78%, 55.07 F1 ~45min

In [79]:
def write_results(name, y_true, y_pred, df):
    with open(name, 'w') as f:
        for row in df.iloc[0:].iterrows():
            f.write(str(row[1]["index"]))
            f.write(" ")
            f.write(row[1]["word"])
            f.write(" ")
            f.write(y_true[row[0]])
            f.write(" ")
            f.write(y_pred[row[0]])
            f.write("\n")

In [80]:
write_results("dev1.out", y_true, predictions_dev, dev_df)

In [504]:
#predict on test:
predictions = predict_test(blstm, testloader)
predictions = unravel_predictions_test(test_padded[0:100], predictions)
predictions = convert_predictions(predictions)

### Task 2: Using GloVe Word Embeddings:

In [85]:
with open("./glove.6B.100d","r",encoding="UTF-8") as f:
    word2vec={}
    for word_embedding in f:
        word_split = word_embedding.split()
        word = word_split[0]
        word2vec[word] = np.array(word_split[1:], dtype=np.float64)

In [87]:
EMBEDDING_DIM = 100
VOCAB_SIZE = train_vocab_size+1

In [90]:
embedding_matrix = np.zeros((VOCAB_SIZE, EMBEDDING_DIM))
for word, idx in word_map.items():
    if word in word2vec:
        word_embedding = word2vec[word]
        embedding_matrix[idx,:] = word_embedding
    else:
        continue

embedding_blstm2 = nn.Embedding(VOCAB_SIZE, EMBEDDING_DIM)
embedding_blstm2.load_state_dict({"weight": torch.tensor(embedding_matrix)})

<All keys matched successfully>

In [91]:
class BLSTM_2(nn.Module):
    
    def __init__(self, embeddings):
        super().__init__()
        
        lstm_hidden_dim = 256
        lstm_num_layers = 1
        linear_output_dim =128
        output_dim = 10
        
        self.embeddings = embeddings
        self.lstm = nn.LSTM(input_size=100, hidden_size=256,
                          num_layers=1, batch_first=True, bidirectional=True)
        self.dropout = nn.Dropout(0.33)
        self.linear1 = nn.Linear(512, 128)
        self.linear2 = nn.Linear(128, 9)
        self.elu = nn.ELU()
        
    def forward(self, inputs):
        #print(inputs.shape)
        embeds = self.embeddings(inputs)
        #print(embeds.shape)
        lstm_out, self.hidden = self.lstm(embeds.view(len(inputs), 1, -1))
        lstm_out_dropped = self.dropout(lstm_out)
        out = self.linear1(lstm_out_dropped.view(len(inputs), -1))
        linear_out_dropped = self.dropout(out)
        #l2_out = self.linear2(linear_out_dropped)
        elu_out = self.elu(linear_out_dropped)
        l2_out = self.linear2(elu_out)
        log_probs = F.log_softmax(l2_out, dim=1)
        return log_probs


In [93]:
EMBEDDING_DIM = 100
VOCAB_SIZE = train_vocab_size+1 #added <pad> word
n_epochs = 20
trainloader = torch.utils.data.DataLoader(train_padded, batch_size=12, num_workers=1)
devloader = torch.utils.data.DataLoader(dev_padded, batch_size=12, num_workers=1)
blstm2 = BLSTM_2(embedding_blstm2)
criterion = nn.CrossEntropyLoss(ignore_index=-1, size_average=True) #therefore no need for softmax
#criterion = nn.NLLLoss()
# optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-4)
optimizer = torch.optim.SGD(blstm2.parameters(), lr=0.25, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

test_loss_min = 10000

for epoch in range(n_epochs):
    scheduler.step()
    print('Epoch-{0} lr: {1}'.format(epoch, optimizer.param_groups[0]['lr']))
    train_loss = 0
    test_loss = 0
    
    blstm2.train()
    for data, target in trainloader:
        # Zero the gradients
        optimizer.zero_grad()

        # Perform forward pass
#         print(data)
#         print(target)
        #print(torch.cat(data,dim=0).reshape(1,400,316))
        #print(torch.cat(data,dim=0).size(0)) I think the problem is here.
        output = blstm2(torch.cat(data,dim=0))

        # Compute loss
        loss = criterion(output, torch.cat(target,dim=0))

        # Perform backward pass
        loss.backward()

        # Perform optimization
        optimizer.step()

                # Print statistics
        #train_loss += loss.item()*torch.cat(data,dim=0).size(0)
        train_loss += loss
        
    with torch.no_grad():
        for data, target in devloader:
            output = blstm2(torch.cat(data,dim=0))
            loss = criterion(output, torch.cat(target,dim=0))
#             test_loss += loss.item()*torch.cat(data,dim=0).size(0)
            test_loss += loss
    train_loss = train_loss/len(trainloader.dataset)
    test_loss = test_loss/len(devloader.dataset)
    
    print('Epoch: {} \tTraining Loss: {:.6f} \tTest Loss: {:.6f}'.format(
        epoch+1, 
        train_loss,
        test_loss
        ))
    
    if test_loss <= test_loss_min:
        torch.save(blstm2.state_dict(), 'blstm2.pt')
        test_loss_min = test_loss

  # Process is complete.
print('All done.')

Epoch-0 lr: 0.25
Epoch: 1 	Training Loss: 0.045912 	Test Loss: 0.037665
Epoch-1 lr: 0.25
Epoch: 2 	Training Loss: 0.036792 	Test Loss: 0.036313
Epoch-2 lr: 0.25
Epoch: 3 	Training Loss: 0.028793 	Test Loss: 0.029757
Epoch-3 lr: 0.25
Epoch: 4 	Training Loss: 0.021845 	Test Loss: 0.025563
Epoch-4 lr: 0.25
Epoch: 5 	Training Loss: 0.019020 	Test Loss: 0.024352
Epoch-5 lr: 0.25
Epoch: 6 	Training Loss: 0.016951 	Test Loss: 0.023817
Epoch-6 lr: 0.25
Epoch: 7 	Training Loss: 0.016222 	Test Loss: 0.023508
Epoch-7 lr: 0.25
Epoch: 8 	Training Loss: 0.015854 	Test Loss: 0.023464
Epoch-8 lr: 0.25
Epoch: 9 	Training Loss: 0.015519 	Test Loss: 0.023307
Epoch-9 lr: 0.125
Epoch: 10 	Training Loss: 0.014464 	Test Loss: 0.024054
Epoch-10 lr: 0.125
Epoch: 11 	Training Loss: 0.014135 	Test Loss: 0.023769
Epoch-11 lr: 0.125
Epoch: 12 	Training Loss: 0.014027 	Test Loss: 0.023911
Epoch-12 lr: 0.125
Epoch: 13 	Training Loss: 0.013873 	Test Loss: 0.023952
Epoch-13 lr: 0.125


KeyboardInterrupt: 

In [94]:
# Load in the best model from the given run:
blstm2.load_state_dict(torch.load('blstm2.pt'))

<All keys matched successfully>

In [96]:
#predict on dev:
devloader = torch.utils.data.DataLoader(dev_padded, batch_size=1, num_workers=1) #need to do 1 at a time:
predictions_dev = predict(blstm2, devloader)
predictions_dev = unravel_predictions(dev_padded, predictions_dev)
predictions_dev = convert_predictions(predictions_dev)

In [97]:
#Dev accuracy:
y_true = np.array(dev_df["ner_tag"])
print(len(y_true))
print(len(predictions_dev))
print("Dev Accuracy:", accuracy(y_true, predictions_dev))

51578
51578
Dev Accuracy: 0.925045562061344


In [98]:
write_results("dev2.out", y_true, predictions_dev, dev_df)

In [None]:
#RUN Command line: