**POS-Tagging**

Welcome to the third lab! In this excercise you will build a simple pos-tagger.
The excercise is inspired from Pytorch tutorial site: https://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x1e1c68ed150>

In [2]:
lstm = nn.LSTM(3, 3)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5

# initialize the hidden state.
hidden = (torch.randn(1, 1, 3),
          torch.randn(1, 1, 3))
for i in inputs:
    # Step through the sequence one element at a time.
    # after each step, hidden contains the hidden state.
    out, hidden = lstm(i.view(1, 1, -1), hidden)

# alternatively, we can do the entire sequence all at once.
# the first value returned by LSTM is all of the hidden states throughout
# the sequence. the second is just the most recent hidden state
# (compare the last slice of "out" with "hidden" below, they are the same)
# The reason for this is that:
# "out" will give you access to all hidden states in the sequence
# "hidden" will allow you to continue the sequence and backpropagate,
# by passing it as an argument  to the lstm at a later time
# Add the extra 2nd dimension
inputs = torch.cat(inputs).view(len(inputs), 1, -1)
hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))  # clean out hidden state
out, hidden = lstm(inputs, hidden)
print(out)
print(hidden)




tensor([[[-0.0187,  0.1713, -0.2944]],

        [[-0.3521,  0.1026, -0.2971]],

        [[-0.3191,  0.0781, -0.1957]],

        [[-0.1634,  0.0941, -0.1637]],

        [[-0.3368,  0.0959, -0.0538]]], grad_fn=<StackBackward>)
(tensor([[[-0.3368,  0.0959, -0.0538]]], grad_fn=<StackBackward>), tensor([[[-0.9825,  0.4715, -0.0633]]], grad_fn=<StackBackward>))


**Task:**

Load the `training_data` from `corpus-small.train`, and modify the `tag_to_ix` dictionary to have all different tags.
Build `ix_to_tag` dictionay, you will need it at the last task. 

In [3]:
def prepare_sequence(sent, to_ix):
    answer = []
    
    for word in sent:
        if word in to_ix:
            answer.append(to_ix[word])
        else:
            print(word)
            key, _ = random.choice(list(to_ix.items()))
            answer.append(to_ix[key])
    return  torch.tensor(answer)


training_data = []

train_file = open("corpus-small.train")
all_tags = set()

for line in train_file:
    sentence = line.split()
    words = [word.split("/")[0] for word in sentence]
    tags = [word.split("/")[1] for word in sentence]
    for word in sentence:
        all_tags.add(word.split("/")[1]) 
    training_data.append((words,tags))
    

word_to_ix = {}
for sent, tags in training_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
#print(word_to_ix)
tag_to_ix = dict([(v,k)for k,v in enumerate(all_tags)])
ix_to_tag = dict([(v,k) for k,v in tag_to_ix.items()])
# print(training_data)
# print(tag_to_ix)
# print(ix_to_tag)

# These will usually be more like 32 or 64 dimensional.
# We will keep them small, so we can see how the weights change as we train.
EMBEDDING_DIM = 6
HIDDEN_DIM = 6

In [4]:
class LSTMTagger(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = hidden_dim

        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = F.log_softmax(tag_space, dim=1)
        return tag_scores

In [10]:
model = LSTMTagger(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

# See what the scores are before training
# Note that element i,j of the output is the score for tag j for word i.
# Here we don't need to train, so the code is wrapped in torch.no_grad()
with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)
    print(tag_scores)

### Reduce number of epochs, if training data is big
for epoch in range(300):  # again, normally you would NOT do 300 epochs, it is toy data
    print(epoch)
    losses =[]
    for sentence, tags in training_data:
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Step 2. Get our inputs ready for the network, that is, turn them into
        # Tensors of word indices.
        sentence_in = prepare_sequence(sentence, word_to_ix)
        targets = prepare_sequence(tags, tag_to_ix)


        # Step 3. Run our forward pass.
        tag_scores = model(sentence_in)
       
        # Step 4. Compute the loss, gradients, and update the parameters by
        #  calling optimizer.step()
        loss = loss_function(tag_scores, targets)
        losses.append(loss.item())
        loss.backward()
        optimizer.step()
    print(np.mean(losses))

# See what the scores are after training
import numpy as np
with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)
    # The sentence is "the dog ate the apple".  i,j corresponds to score for tag j
    # for word i. The predicted tag is the maximum scoring tag.
    # Here, we can see the predicted sequence below is 0 1 2 0 1
    # since 0 is index of the maximum value of row 1,
    # 1 is the index of maximum value of row 2, etc.
    # Which is DET NOUN VERB DET NOUN, the correct sequence!
    print(tag_scores)

tensor([[-3.3549, -3.8517, -3.9639,  ..., -4.0011, -3.4143, -3.9687],
        [-3.3294, -3.8163, -4.0067,  ..., -3.9499, -3.4615, -3.9072],
        [-3.3552, -3.8519, -3.9976,  ..., -3.8255, -3.5273, -3.7978],
        ...,
        [-3.6812, -3.9090, -3.9405,  ..., -3.6726, -3.7509, -3.8590],
        [-3.4608, -3.7896, -3.9431,  ..., -3.8852, -3.5621, -3.9167],
        [-3.4595, -3.8127, -4.0111,  ..., -3.7979, -3.5942, -3.8593]])
0
3.5960139727592466
1
3.2824771356582643
2
3.139972198009491
3
3.0689300894737244
4
3.01742858171463
5
2.9660538458824157
6
2.9084667491912843
7
2.845524559020996
8
2.7799229979515077
9
2.713484070301056
10
2.6480898690223693
11
2.5853187012672425
12
2.5264183473587036
13
2.4727591061592102
14
2.4250386786460876
15
2.383109860420227
16
2.346331046819687
17
2.3139590442180635
18
2.2852754151821135
19
2.259475265741348
20
2.23583904504776
21
2.213909590244293
22
2.1934573686122896
23
2.174330139160156
24
2.1563374829292297
25
2.139257422685623
26
2.122967178821

**Task:**

Read the test data `corpus-small.test` and process it then get the predicitions.Write down the output tagged predicitons in file `corpus-small.out` in the same form as `corpus-small.answer`.

Note: in-case of unseen word in the testing dataset, replace it with a random seen one! (There's a better solution).

At the end, run the last cell, to get the accuracy of your model.

In [11]:
import numpy as np
import random


def idx_to_tag(idx):
    if  idx in ix_to_tag:
        return ix_to_tag[idx]
    else:
        return ix_to_tag[0]


test = open("corpus-small.test")
out = open("corpus-small.out","w")
sentences = []

for line in test:
    sentence = line.split()
    sentence_in = prepare_sequence(sentence,word_to_ix)
    print(model(sentence_in).data.numpy())
    print(np.argmax(model(sentence_in).data.numpy(),axis=1))
    tags =  np.argmax(model(sentence_in).data.numpy(),axis=1)
    #print(tags)
    answer = ""
    for i in range(len(sentence_in)):
        answer+=sentence[i]+"/"+str(idx_to_tag(tags[i]))+" "
    answer+="\n"
    out.write(answer)
test.close()
out.close()
#print(answer)


six
T.
Marshall
Hahn
Jr.
made
acquisitions
George
mode
kind
gentle
[[-1.16512976e+01 -4.67790079e+00 -1.45515699e+01 -5.42698316e-02
  -1.76723366e+01 -1.06070900e+01 -1.28986521e+01 -5.50959682e+00
  -9.61270809e+00 -8.68315792e+00 -8.70837688e+00 -8.71749973e+00
  -1.32046337e+01 -6.53897524e+00 -6.04128265e+00 -1.13365536e+01
  -4.45241022e+00 -1.26791725e+01 -9.64540005e+00 -1.05561457e+01
  -1.16144304e+01 -6.96325111e+00 -1.20965805e+01 -1.32658558e+01
  -7.64565277e+00 -5.92842579e+00 -1.00854568e+01 -7.80759764e+00
  -8.10045719e+00 -1.02211580e+01 -7.37632036e+00 -9.51389790e+00
  -6.65416241e+00 -1.04370832e+01 -1.09325590e+01 -1.07464943e+01
  -1.00897474e+01 -6.94047546e+00 -9.71828365e+00 -4.42580414e+00
  -1.40602016e+01 -5.71087694e+00]
 [-9.17197514e+00 -8.67732430e+00 -3.54215741e+00 -1.05450773e+01
  -8.92144299e+00 -8.59213924e+00 -8.80959225e+00 -9.72224045e+00
  -6.31919003e+00 -5.23461628e+00 -5.88164902e+00 -6.01651096e+00
  -3.59002328e+00 -2.39140248e+00 -8.477

  -1.36322384e+01 -7.34990025e+00]]
[ 3 37 29 17 29 15 41  3  7 12 37 29  3  2 41 37 37 40 41 25 12 32]
question
Can
act
hard-charging
Teddy
Roosevelt
[[-1.32128687e+01 -1.10376730e+01 -5.70810772e-03 -1.71910019e+01
  -6.50405931e+00 -1.24738169e+01 -1.72887402e+01 -1.80980415e+01
  -1.28813543e+01 -9.77110291e+00 -1.21457567e+01 -1.39470911e+01
  -9.08359814e+00 -1.45035734e+01 -1.31975679e+01 -1.26201344e+01
  -1.33333654e+01 -1.84077549e+01 -7.27850342e+00 -1.30182877e+01
  -1.30294409e+01 -8.30816555e+00 -1.73428020e+01 -1.13013668e+01
  -6.24471712e+00 -1.77162628e+01 -1.28421850e+01 -7.95026875e+00
  -1.22325516e+01 -1.00342474e+01 -1.06178751e+01 -1.48556566e+01
  -9.36816883e+00 -1.34534359e+01 -1.25918512e+01 -1.03452024e+01
  -1.28110094e+01 -1.01778440e+01 -1.16710443e+01 -1.69944229e+01
  -7.99367476e+00 -8.78034210e+00]
 [-9.24078560e+00 -7.37262821e+00 -8.09400463e+00 -1.87427115e+00
  -1.43688536e+01 -8.42692566e+00 -7.69327021e+00 -4.72610092e+00
  -6.23774338e+00 -5.6

  -9.02071285e+00 -6.76801300e+00]]
[ 2 39 37 12 39 34 39 21  3 39 41 39 32]
Hahn
62-year-old
chairman
Georgia-Pacific
Corp.
leading
forest-product
unsolicited
3.19
bid
Great
Nekoosa
Corp
[[ -9.16388     -7.402696   -13.06083    ...  -0.39250094  -7.9498
   -5.093212  ]
 [ -8.608678    -9.395795    -6.8931246  ...  -2.2308142   -5.0554943
   -1.0528839 ]
 [-13.854734   -15.094054   -19.867525   ...  -6.658178   -11.268307
   -9.056038  ]
 ...
 [ -9.453927   -13.285619   -11.746086   ...  -3.7578182   -6.4544873
   -1.1337738 ]
 [ -9.060516   -10.578652    -7.7089725  ...  -2.0492964   -5.193154
   -1.110272  ]
 [-15.090471   -10.126562   -16.505219   ... -11.028799   -15.01694
   -7.916395  ]]
[39 41 17  2 41 29 25 37 41 41  3 41  3  7  7  2 24 37 28 37 37 37 39 41
  3 41 39 29 41 32]
Nekoosa
given
public
cold
shoulder
reaction
Hahn
faced
18
earlier
acquisitions
negotiated
behind
scenes
[[-8.0223875e+00 -1.0467997e+01 -1.4240031e+00 ... -6.1210046e+00
  -2.8573918e+00 -3.2947803e+00]
 

  -1.48294773e+01 -7.86876488e+00]]
[29 41 17 39 41  7 29  4 12 22  3 29  2 41 41 41 39 39 32]
We
prepared
pursue
aggressively
completion
says
[[-1.02544899e+01 -3.03066134e+00 -4.38373089e+00 -8.77101421e+00
  -1.33779249e+01 -9.40238380e+00 -1.36988144e+01 -1.08248930e+01
  -8.20930099e+00 -7.02900934e+00 -8.23016453e+00 -1.02703257e+01
  -7.51761246e+00 -3.42511010e+00 -7.44403267e+00 -8.11842918e+00
  -6.53231096e+00 -1.82088795e+01 -9.28904057e+00 -1.25577059e+01
  -1.18620729e+01 -3.96994901e+00 -4.64755583e+00 -1.75939350e+01
  -4.69994068e+00 -1.34118242e+01 -1.01147261e+01 -4.37916994e+00
  -9.43308926e+00 -1.11932364e+01 -5.45338964e+00 -8.85934353e+00
  -1.16635246e+01 -1.05223818e+01 -7.98827600e+00 -1.72959521e-01
  -9.76836967e+00 -5.26350975e+00 -8.89164352e+00 -8.10522842e+00
  -1.11439753e+01 -9.61170292e+00]
 [-1.14060268e+01 -1.28152866e+01 -1.57776880e+01 -5.75905943e+00
  -1.54340343e+01 -1.13989344e+01 -7.42577267e+00 -6.50690460e+00
  -8.58514214e+00 -7.96392488e

  -1.46908321e+01 -7.79194546e+00]]
[35 41 37  3  4 34 39 37  3  2 41 17 23 34 37 32]
battle
opens
possibility
bidding
war
implies
[[-9.36292171e+00 -7.02192497e+00 -1.43826065e+01 -3.29217339e+00
  -1.16324224e+01 -8.71307945e+00 -7.71316910e+00 -2.28763008e+00
  -7.18620872e+00 -6.45694351e+00 -6.37793446e+00 -4.95262051e+00
  -5.77489758e+00 -5.06174994e+00 -5.83943796e+00 -5.96310139e+00
  -6.46427965e+00 -5.64312077e+00 -6.10473394e+00 -6.81556702e+00
  -8.73800564e+00 -8.18838024e+00 -8.78996372e+00 -7.08105516e+00
  -6.27094793e+00 -7.07890391e-01 -8.57052708e+00 -8.37138271e+00
  -1.06743021e+01 -6.78438663e+00 -7.29868555e+00 -6.79600286e+00
  -8.64687634e+00 -7.38684893e+00 -5.75405884e+00 -1.46051264e+01
  -8.19199753e+00 -5.89658117e+00 -6.64831114e+00 -1.15801275e+00
  -8.78623295e+00 -5.69665527e+00]
 [-1.45442381e+01 -1.35115089e+01 -3.24283191e-03 -1.71673183e+01
  -9.74234962e+00 -1.40030909e+01 -1.78588600e+01 -2.02679958e+01
  -1.40007391e+01 -1.09198074e+01 -1.34186

  -1.47436638e+01 -7.88697624e+00]]
[25  2 41 29 37  3  2 41  3  2 41 29 17  3 13  3 37 32]
If
competitor
enters
game
Hahn
face
dilemma
paying
premium
Nekoosa
seeing
fall
arms
rival
[[-6.4686685e+00 -9.0702152e+00 -9.3415956e+00 ... -6.7616277e+00
  -8.2000256e+00 -4.4674048e+00]
 [-1.2807113e+01 -1.1477259e+01 -4.1359845e-03 ... -1.8135605e+01
  -8.7015820e+00 -9.8196421e+00]
 [-8.4684343e+00 -7.8837838e+00 -2.2058916e+00 ... -6.0993433e+00
  -4.9853349e+00 -8.2318872e-01]
 ...
 [-1.4557577e+01 -1.4761405e+01 -5.5680916e-02 ... -1.7859007e+01
  -9.1557732e+00 -3.3032396e+00]
 [-1.3000325e+01 -1.3655271e+01 -5.3425312e+00 ... -9.3021517e+00
  -9.1070576e+00 -8.4425581e-01]
 [-1.4410124e+01 -9.4294081e+00 -1.5401391e+01 ... -1.0284744e+01
  -1.4327571e+01 -6.8520823e+00]]
[29  2 41 37  2 41 17  3 41 17 39 41 41 13  2 41  3 41  2 37  3 41 25 39
  2 41 13  3  2 41  3  2 37 32]
Given
associates
Hahn
industry
observers
former
university
developed
reputation
overpaying
anything
fold
[[ -9.21

  -1.39739380e+01 -7.20939827e+00]]
[34 13  7  2 41 37 17 29 17 32]
bid
Nekoosa
made
six
Georgia-Pacific
management
committee
signed
onto
deal
Hahn
knew
wanted
go
says
Correll
[[-1.32128687e+01 -1.10376730e+01 -5.70810772e-03 ... -1.69944229e+01
  -7.99367476e+00 -8.78034210e+00]
 [-1.22643280e+01 -1.34900656e+01 -8.92562199e+00 ... -1.13176346e+01
  -1.05649176e+01 -4.26970273e-02]
 [-1.46992712e+01 -1.79858379e+01 -6.33670950e+00 ... -1.56555252e+01
  -6.66984081e+00 -1.22697878e+01]
 ...
 [-1.07940187e+01 -7.65466404e+00 -1.29960575e+01 ... -1.87394172e-01
  -8.74325848e+00 -5.06911755e+00]
 [-1.04318953e+01 -8.22438812e+00 -8.92875481e+00 ... -8.30388129e-01
  -7.42025328e+00 -3.24992037e+00]
 [-1.50744343e+01 -1.00671158e+01 -1.62163754e+01 ... -1.11244431e+01
  -1.49316273e+01 -7.89651442e+00]]
[ 2 41  4  7  2 37  3 41 17  3 41 17 22 29 39 39 13 13 39  3 39 28 13 41
 41 37  2 41  2 13 39 39 39 29 34 39  4 12 39  2 41 13  3 17  7 39 39 32]
Associates
Hahn
picked
careful
management

  -1.47272558e+01 -7.60925674e+00]]
[37 25 39 37 41  7  3 13 39  4 15 39 41  3 41 37 29 32]
Assuming
post
age
35
universities
says
Warren
H.
Strother
university
official
researching
book
Hahn
[[ -8.54129    -7.762334   -9.074556  ...  -1.6796613  -7.190757
   -2.6458535]
 [ -9.128885   -4.7403307 -10.883323  ...  -7.329331  -12.420378
   -6.948624 ]
 [ -7.737363   -1.8935393  -9.265485  ...  -1.2040018  -8.269708
   -6.6676126]
 ...
 [-11.310054  -10.367579  -16.682907  ...  -0.4784997 -10.692769
   -2.658553 ]
 [ -9.513903  -13.095602  -10.1379795 ...  -4.333945   -6.0318274
   -0.8985076]
 [-15.004377  -10.135673  -16.455408  ... -10.94428   -14.888194
   -7.8515253]]
[37  3 39  3  2 41  3 41 17 34 37  3 41 17  3  7  2 41  3 12 17 41 25  2
 41 17  2 41 41 19  7 35  2 41  3 39 29 32]
showed
willingness
strong
stand
[[-9.36292171e+00 -7.02192497e+00 -1.43826065e+01 -3.29217339e+00
  -1.16324224e+01 -8.71307945e+00 -7.71316910e+00 -2.28763008e+00
  -7.18620872e+00 -6.45694351e+00 -6.377

  -1.49370289e+01 -7.93472767e+00]]
[25 34 13 37  2 37  4 12  2 41 41 32]
1970
Hahn
called
state
police
arrest
student
protesters
occupying
university
[[-9.28283691e+00 -9.07416725e+00 -1.39815350e+01 -1.96065331e+00
  -1.49453354e+01 -9.07947350e+00 -5.67427158e+00 -2.59906101e+00
  -6.65483570e+00 -6.26234770e+00 -6.30395842e+00 -3.60295606e+00
  -8.79967117e+00 -2.91325665e+00 -5.69336319e+00 -5.72151423e+00
  -5.54159594e+00 -8.66260147e+00 -5.38063002e+00 -6.50279570e+00
  -9.52325630e+00 -7.57318544e+00 -6.00195169e+00 -1.05679226e+01
  -5.58299398e+00 -4.42863131e+00 -8.88554001e+00 -8.26035881e+00
  -1.05829945e+01 -3.76185942e+00 -6.98256445e+00 -6.17922449e+00
  -1.35792351e+01 -7.51752615e+00 -7.87917423e+00 -1.35923729e+01
  -8.33157730e+00 -3.38848829e+00 -6.57456493e+00 -6.45452976e-01
  -8.80037498e+00 -2.64169741e+00]
 [-9.54654312e+00 -1.29277420e+01 -2.31128860e+00 -1.59846888e+01
  -6.04975986e+00 -9.56195450e+00 -7.85250568e+00 -1.23749390e+01
  -7.17564678e+00 -5.6

  -1.54382992e+01 -7.89628363e+00]]
[39 37 17 39 37 18  3 39 41  4 12 41 29  7 13  2  2 41 41 32]
impressed
B.
Pamplin
Georgia-Pacific
whom
Hahn
met
fundraising
institute
[[-9.82348156e+00 -3.81680679e+00 -1.20053978e+01 ... -2.46801853e+00
  -1.19933052e+01 -1.39026892e+00]
 [-1.14180298e+01 -1.35844936e+01 -1.43357906e+01 ... -4.41193008e+00
  -1.04377394e+01 -2.82997936e-01]
 [-9.93116570e+00 -8.73470688e+00 -1.12076435e+01 ... -4.06207353e-01
  -7.60227108e+00 -2.58165288e+00]
 ...
 [-1.43353195e+01 -1.50163221e+01 -6.44770311e-03 ... -1.73288879e+01
  -7.70078611e+00 -6.21242666e+00]
 [-1.19239035e+01 -1.48992224e+01 -6.77163935e+00 ... -8.77393723e+00
  -8.00001335e+00 -3.73696625e-01]
 [-1.45969954e+01 -9.45715904e+00 -1.55662231e+01 ... -1.04314680e+01
  -1.44563885e+01 -7.30848360e+00]]
[ 3 41 39 37 37 17 13 28 41 41  3  2 37 17 39 39 39 22 15 41 13  3  2 41
 32]
1975
Pamplin
enticed
Hahn
charge
chemicals
;
move
befuddled
Georgia-Pacific
university
administrator
transition
[[ 

  -1.47740479e+01 -7.84032631e+00]]
[25 39 18 28 19 39  2  2 17 39  2 41 39  3 34  1 34 39 34 37  1  3 32]
son
physicist
Hahn
skipped
grade
reading
far
above
classmates
[[-1.32128687e+01 -1.10376730e+01 -5.70810772e-03 -1.71910019e+01
  -6.50405931e+00 -1.24738169e+01 -1.72887402e+01 -1.80980415e+01
  -1.28813543e+01 -9.77110291e+00 -1.21457567e+01 -1.39470911e+01
  -9.08359814e+00 -1.45035734e+01 -1.31975679e+01 -1.26201344e+01
  -1.33333654e+01 -1.84077549e+01 -7.27850342e+00 -1.30182877e+01
  -1.30294409e+01 -8.30816555e+00 -1.73428020e+01 -1.13013668e+01
  -6.24471712e+00 -1.77162628e+01 -1.28421850e+01 -7.95026875e+00
  -1.22325516e+01 -1.00342474e+01 -1.06178751e+01 -1.48556566e+01
  -9.36816883e+00 -1.34534359e+01 -1.25918512e+01 -1.03452024e+01
  -1.28110094e+01 -1.01778440e+01 -1.16710443e+01 -1.69944229e+01
  -7.99367476e+00 -8.78034210e+00]
 [-6.75549889e+00 -8.25161934e+00 -7.42307854e+00 -3.56838298e+00
  -1.06870613e+01 -7.15709114e+00 -3.36848664e+00 -2.76212358e+00
  -5

  -1.52298317e+01 -7.95891047e+00]]
[ 2 29  3  2 37 17 39 25 39 25  7  3  1 41  3 22 13 37 29  1 41 32]


In [12]:
%run tagger_eval.py corpus-small.out corpus-small.answer

Accuracy= 0.5592885375494071
