In [1]:
from datasetUtil import loadNoisyInputLabel, saveNoisyInputLabel, saveNoisyInput, readVocab
from tokenization import FullTokenizer
import numpy as np
from sklearn.metrics import confusion_matrix
from copy import copy
from nltk.metrics.distance import edit_distance
from time import time

In [2]:
char_path = "data/charVocab.txt"
word_path = "data/wordVocab.txt"
charVocab, wordVocab = readVocab(char_path, word_path)

In [3]:
def adjust(prediction, gold, i):
    prediction = copy(prediction)
    if len(prediction) < len(gold):
        prediction.insert(i,"")
    else:
        prediction.pop(i)
    return prediction

In [4]:
def adjust1token(prediction, gold, start, time_threshold):
    scores = []
    for i in range(max(len(prediction), len(gold))):
        if time() - start > time_threshold:
            return None, None
        adjusted_prediction = adjust(prediction, gold, i)
        scores.append(edit_distance(" ".join(adjusted_prediction), " ".join(gold)))
    best_index = np.argmin(scores)
    adjusted_prediction = adjust(prediction, gold, best_index)
    return adjusted_prediction, gold

In [5]:
def adjustTokens(prediction, gold):
    start = time()
    while len(prediction)!=len(gold):
        prediction, gold = adjust1token(prediction, gold, start, 1)
        if prediction is None:
            break
    return prediction

In [15]:
prediction_file = "predictions/vanilla_mix_test.0.final"
input_file = "data/test.txt"
label_file = "data/test_label.jsonl"

In [16]:
labels = loadNoisyInputLabel(label_file)

In [17]:
# Align by prediction

predictions = []
all_cleaned_tokens = []
all_original_tokens = []
all_labels = []
count = 0 
with open(prediction_file) as f:
    with open(input_file) as fi:
        for j, (line, input_line,label) in enumerate(zip(f,fi,labels)):
            if j % 100 == 0:
                print(j)
            input_tokens = input_line.strip().split()
            input_original_tokens = input_line.strip().split()
            raw_text, raw_prediction, raw_score = line.strip().split("\t")
            raw_tokens = raw_text.split()
            raw_prediction = raw_prediction.split()
            raw_score = raw_score.split()
            cleaned_tokens = [] 
            cleaned_prediction = []
            prev_prediction = ""
            for i, (token, prediction) in enumerate(zip(raw_tokens, raw_prediction)):
                if prediction[:2]=="B-":
                    cleaned_prediction.append(prediction[2:])
                elif prediction=="O":
                    if token[:2] == "##":
                        cleaned_prediction.append(prediction[2:])
                    else:
                        cleaned_prediction.append(prediction)

            if not (len(cleaned_prediction) == len(input_tokens)):
                diff = abs(len(cleaned_prediction) - len(input_tokens))
                for k,v in label.items():
                    input_tokens[k] = v
                cleaned_prediction = adjustTokens(cleaned_prediction, input_tokens)
                    #for tok1, tok2 in zip(input_tokens, cleaned_prediction):
                        #print(tok1, tok2)
                    #print("#"*10)
                if cleaned_prediction is None:
                    print("Skipping.")
                    count += 1
                    continue
                else:
                    print("Aligned!")
            predictions.append(cleaned_prediction)   
            all_cleaned_tokens.append(cleaned_tokens)
            all_original_tokens.append(input_original_tokens)
            all_labels.append(label)

0
100
200
Aligned!
Aligned!
300
Aligned!
400
Skipping.
Aligned!
Aligned!
500
Skipping.
Aligned!
600
700
Aligned!
Aligned!
Skipping.
800
Aligned!
900
Aligned!
1000
Aligned!
Aligned!
Aligned!
1100
Aligned!
Aligned!
1200
Aligned!
Skipping.
Skipping.
Aligned!
Aligned!
1300
1400
Skipping.
Aligned!
Aligned!
1500
1600
1700
Aligned!
1800
1900
2000
Aligned!
Aligned!
2100
2200
2300
Aligned!
2400
Skipping.
Skipping.
2500
Aligned!
Aligned!
Aligned!
Skipping.
2600
Aligned!
2700
Aligned!
2800
Aligned!
2900
3000
Aligned!
Aligned!
3100
3200
3300
Aligned!
3400
Aligned!
3500
3600
Skipping.
3700
3800
Aligned!
Aligned!
3900
4000
Aligned!
Aligned!
4100
4200
Aligned!
Aligned!
4300
Aligned!
Aligned!
Aligned!
4400
Aligned!
4500
4600
Aligned!
4700
Aligned!
4800
Aligned!
Aligned!
Aligned!
Skipping.
4900
Aligned!
Skipping.
5000
Aligned!
5100
Aligned!
5200
Aligned!
Aligned!
5300
Aligned!
Aligned!
5400
Aligned!
5500
Skipping.
5600
Aligned!
5700
5800
Aligned!


In [18]:
count

13

In [19]:
len(labels)

5857

In [20]:
len(predictions)

5844

In [21]:
def scores(TP, FP, FN, TN, beta = 0.5):
    precision = TP / (TP+FP)
    recall = TP / (TP+FN)
    accuracy = (TP + TN)/(TP+FP+FN+TN)
    F_score = (1+beta**2) * (precision * recall) / (beta**2 * precision + recall)
    return precision, recall, accuracy, F_score

In [22]:
TP = 0
FP = 0
FN = 0
TN = 0

real_TP = 0
real_FP = 0
real_FN = 0
real_TN = 0

non_TP = 0
non_FP = 0
non_FN = 0
non_TN = 0

for tokens, prediction, ground_truth in zip(all_original_tokens, predictions, all_labels):
    correct_tokens = copy(tokens)
    for i, gold in ground_truth.items():
        correct_tokens[i] = gold
    print(" ".join(tokens))
    print(" ".join(correct_tokens))
    print(" ".join(prediction))
    print()
    for token, gold, pred in zip(tokens, correct_tokens, prediction):
        if token!=pred:
            if pred==gold:
                TP += 1
                if token in wordVocab:
                    real_TP += 1
                else:
                    non_TP += 1
            else:
                FP += 1
                if token in wordVocab:
                    real_FP += 1
                else:
                    non_FP += 1
        else:
            if pred==gold:
                TN += 1
                if token in wordVocab:
                    real_TN += 1
                else:
                    non_TN += 1
            else:
                FN += 1
                if token in wordVocab:
                    real_FN += 1
                else:
                    non_FN += 1

precision, recall, accuracy, F_score = scores(TP, FP, FN, TN, beta = 0.5)

print("Accuracy:", accuracy)
print("Precision:",precision)
print("Recall:",recall)
print("F 0.5:",  F_score)

real_precision, real_recall, real_accuracy, real_F_score = scores(real_TP, real_FP, real_FN, real_TN, beta = 0.5)

print("Real-word Accuracy:", real_accuracy)
print("Real-word Precision:",real_precision)
print("Real-word Recall:",real_recall)
print("Real-word F 0.5:",  real_F_score)

non_precision, non_recall, non_accuracy, non_F_score = scores(non_TP, non_FP, non_FN, non_TN, beta = 0.5)

print("Non-word Accuracy:", non_accuracy)
print("Non-word Precision:",non_precision)
print("Non-word Recall:",non_recall)
print("Non-word F 0.5:",  non_F_score)

Peers fear the judgment of their peers and try to talk sense .
Peers fear the judgement of their peers and try to talk sense .
Peers fear the judgment of their peers and try to make sense .

Many friends in London sim puzzled by our decision .
Many friends in London seem puzzled by our decision .
Many friends in London seem puzzled by our decision .

The largest category of athletic footware revenue in 2007 was " walking shoes , " which accounted for $ 4.2 bilion in retail salses .
The largest category of athletic footwear revenue in 2007 was " walking shoes , " which accounted for $ 4.2 billion in retail sales .
The largest category of athletic footwear revenue in 2007 was " walking shoes , " which accounted for $ 4.2 billion in retail sales .

Former employees confirm that their bosses where very resistant to paying anyone back .
Former employees confirm that their bosses were very resistant to paying anyone back .
Former employees confirm that their bosses where very resistant to pa

I 've haved houas nand hours of therapy on this very point .
I 've had hours and hours of therapy on this very point .
I 've had hours and hours of therapy on this very point .

Keller , born and rise in Indiana , was originally recruited by the Boilermakers to play wide receiver .
Keller , born and raised in Indiana , was originally recruited by the Boilermakers to play wide receiver .
Keller , born and raised in Indiana , was originally recruited by the Boilermakers to play wide receiver .

N'º rest for trhe wery .
No rest for the weary .
No rest for The weary .

I made the average Joe ror Mary salling stuff froom their garage , home or trailler look like a respectable large merchent .
It made the average Joe or Mary selling stuff from their garage , home or trailer look like a respectable large merchant .
It made the average Joe or Mary selling stuff from their garage , home or trailer look like a respectable large machine .

He also said Bay would help the defense in lost field , a


You can 't hely but love her and her music !
You can 't help but love her and her music !
You can 't help but love her and her music !

Unlike churches that draw crowds only on Sunndays , Magnus said , the buddhist centre will create traffic and parking problems and disrupt Athe Massachusetts Avenue Hights neighborhood sereval days of the waek .
Unlike churches that draw crowds only on Sundays , Magnus said , the Buddhist center will create traffic and parking problems and disrupt the Massachusetts Avenue Heights neighborhood several days of the week .
Unlike churches that draw crowds only on Sundays , Magnus said , the Buddhist center will create traffic and parking problems and disrupt the Massachusetts Avenue Heights neighborhood several days of the week .

25 ( UPI ) -- Authorities say de black Connecticut judge encharged with drunken driving allegedly used racial slurs during here arrest by a black state police sergant .
25 ( UPI ) -- Authorities say a black Connecticut judge cha

BUENOS AIRES , Argentina ( CNN ) -- Argentina 's President Cristina Fernandez de Kirchner has held power since 2003 , and Sunday 's midterm elections will prove pivotal to her hold on power .
BUENOS AIRES , Argentina ( CNN ) -- Argentina 's President Cristina Fernandez de Kirchner has held power since 2003 , and Sunday 's midterm elections will prove pivotal to her hold on power .

Both have alsl pointed to their remarks , made befroe the inavassion , if which they said they would like to see weapons inspectors finish thei work in Iraq bef_for launching and attack - a distinction that has allowed both Clintons tu claim consistency and Iraq .
Both have also pointed to their remarks , made before the invasion , in which they said they would like to see weapons inspectors finish their work in Iraq before launching an attack - a distinction that has allowed both Clintons to claim consistency on Iraq .
Both have also pointed to their remarks , made before the invasion , in which they said t

Newspaper lifestyle supplements are full of advice oon " lookking good for less " and Haw to cook cheap cuts of meat .
Newspaper lifestyle supplements are full of advice on " looking good for less " and how to cook cheap cuts of meat .
Newspaper lifestyle supplements are full of advice on " looking good for less " and how to cook cheap cuts of meat .

We simplx won 't realize it for six more months .
We simply won 't realize it for six more months .
We simply won 't realize it for six more months .

I 'm guessing hes did .
I 'm guessing he did .
I 'm guessing he did .

Q Im are recent column aout listing agreements , you wrote tkat na never advice sellers to sing in agreement loger tnan 90 days .
Q In a recent column about listing agreements , you wrote that you never advise sellers to sign an agreement longer than 90 days .
Q In a recent column about listing agreements , you wrote that you never advise sellers to sign an agreement longer than 90 days .

Aer Lingus , the loss-making Ir

He was also a skilful painter and illustrator .

He had better get yourst to the limelight .
He had better get used to the limelight .
He had better get used to the limelight .

That air oft uncertainty specifically surrounded two key members of the franchise -- the thirty baseman Mike Lowell and the pitcher Curt Schilling .
That air of uncertainty specifically surrounded two key members of the franchise -- the third baseman Mike Lowell and the pitcher Curt Schilling .
That air of uncertainty specifically surrounded two key members of the franchise -- the third baseman Mike Lowell and the pitcher Curt Schilling .

Iff it 's too thick add a little hot water .
If it 's too thick add a little hot water .
If it 's too thick add a little hot water .

He 's spent more wich $ 23th million soooo far o his re-election effert almost all of it his own money .
He 's spent more than $ 23 million so far on his re-election effort almost all of it his own money .
He 's spent more than $ 23 million so 

The then Met commissioner , Sir John Stevens , was investigating allegations of collusion between loyalist paramilitaries and elements of d security forces .
The then Met commissioner , Sir John Stevens , was investigating allegations of collusion between loyalist paramilitaries and elements of the security forces .
The then Met commissioner , Sir John Stevens , was investigating allegations of collusion between loyalist paramilitaries and elements of the security forces .

howeve , manufacturers ere startying TO eliminate some toxic chemicals from thear productos , Greenpeace said .
However , manufacturers are starting to eliminate some toxic chemicals from their products , Greenpeace said .
However , manufacturers are starting to eliminate some toxic chemicals from their products , Greenpeace said .

It was my wire , then a former student , who uggested tath we incorporate .
It was my wife , then a former student , who suggested that we incorporate .
It was my wire , then a former st

Theo Walcott threatened when the winger exchanged passes with Denilson before curling a shot past the far post .
Theo Walcott threatened when the winger exchanged passes with Denilson before curling a shot past the far post .

US dolare 2.5 million .
US $ 2.5 million .
US $ 2.5 million .

This netry was posted be Gwen Robinson on Tuesday , January 26th , 2010 at 4 : 44 and is fild under Briefings , Capital markets .
This entry was posted by Gwen Robinson on Tuesday , January 26th , 2010 at 4 : 44 and is filed under Briefings , Capital markets .
This entry was posted by Gwen Robinson on Tuesday , January 26th , 2010 at 4 : 44 and is filed under Briefings , Capital markets .

Ward 's conditions were unsual .
Ward 's conditions were unusual .
Ward 's conditions were unusual .

Know those countries should help solve it .
Now those countries should help solve it .
Now those countries should help solve it .

When yau were asked to direct that you certainly didn 't have aene experience at ein

The first of the Starz titles -- about 1,000 -- are available immediately at the Netflix Web site and more titles will be added in coming weeks , the companies said in a release .
The first of the Starz titles -- about 1,000 -- are available immediately at the Netflix Web site and more titles will be added in coming weeks , the companies said in a release .

Nkunda called 0n government forces to follow suit , that BBC ded .
Nkunda called on government forces to follow suit , the BBC said .
Nkunda called on government forces to follow suit , the BBC said .

" To lose one would be disasterous .
" To lose one would be disastrous .
" To lose one would be disastrous .

Jail records do not schow if Reid has a attorney .
Jail records do not show if Reid has an attorney .
Jail records do not show if Reid has an attorney .

So how is it that pharmacies where I go for the large corpotations drugs can be anywhere ... yet this dispensary needs to meet different conditions ?
So how is it that pharm

For Hispanics , insurance reform is crucial : One out of three Hispanics under 65 is uninsured , and many mooe lack adequate coverage .
For Hispanics , insurance reform is crucial : One out of three Hispanics under 65 is uninsured , and many more lack adequate coverage .
For Hispanics , insurance reform is crucial : One out of three Hispanics under 65 is uninsured , and many more lack adequate coverage .

Firefighters are battling a hosre fire after a report of a plane crash near the North Las Vegas Airport in southern Nevada .
Firefighters are battling a house fire after a report of a plane crash near the North Las Vegas Airport in southern Nevada .
Firefighters are battling a house fire after a report of a plane crash near the North Las Vegas airport in southern Nevada .

Asked wheter Pressly may have been targated because ot her job , Davis z officers were talking to station imployees to determine whether Pressly " has aud any problems . "
Asked whether Pressly may have been targete

The FBI tentatively identified the pilot as Joseph Stack .
The FBI tentatively identified the pilot as Joseph Stack .

He said he was initially skeptical of the claim until Mayweather , a farmer world chanpion , chatted with him via the game 's headset .
He said he was initially skeptical of the claim until Mayweather , a former world champion , chatted with him via the game 's headset .
He said he was initially skeptical of the claim until Mayweather , a former world champion , chatted with him via the game 's headset .

As such , the wholesale invetory figures dos out generally sway investor activity one way or another .
As such , the wholesale inventory figures do not generally sway investor activity one way or another .
As such , the wholesale inventory figures do not generally sway investor activity one way or another .

" People desided if they need help , than appy fror assistance and , ih appoved , hthe government andr mi HOME Program supports then in getting la new mortgage th

Bud of thousands fo women , that 's knot true .
But for thousands of women , that 's not true .
But for thousands of women , that 's not true .

My endest post was born purely out of greed .
My last post was born purely out of greed .
My last post was born purely out of greed .

Things didn 't get much better once I was of of school and living in the adult world .
Things didn 't get much better once I was out of school and living in the adult world .
Things didn 't get much better once I was out of school and living in the adult world .

Orders fomr Washington ?
Orders from Washington ?
Orders from Washington ?

And I don 't think dat 's go to disappear bery any finding of de liminted rewiev of at relatively smalll noumber o ballots , " Clinton toled reporters with Washigton .
And I don 't think that 's going to disappear by any finding of a limited review of a relatively small number of ballots , " Clinton told reporters in Washington .
And I don 't think that 's going to disappear by

In [None]:
real_TP+real_FP+real_FN

In [None]:
non_TP+non_FP+non_FN

In [35]:
TP = [0,0,0,0,0]
FP = [0,0,0,0,0]
FN = [0,0,0,0,0]
TN = [0,0,0,0,0]

for tokens, prediction, ground_truth in zip(all_original_tokens, predictions, all_labels):
    correct_tokens = copy(tokens)
    for i, gold in ground_truth.items():
        correct_tokens[i] = gold

    for i, (token, gold, pred) in enumerate(zip(tokens, correct_tokens, prediction)):
        pos = i / len(correct_tokens)
        index = int(pos * 10 // 2)
        if token!=pred:
            if pred==gold:
                TP[index] += 1
            else:
                FP[index] += 1
        else:
            if pred==gold:
                TN[index] += 1
            else:
                FN[index] += 1

for i in range(5):
    precision, recall, accuracy, F_score = scores(TP[i], FP[i], FN[i], TN[i], beta = 0.5)
    print(F_score)

0.9134338390628001
0.9084647175876321
0.8971886000868013
0.9156906398218954
0.9114824031283327


In [None]:
all_gold_MO = []
for sentence, labels in zip(all_original_tokens, all_labels):
    gold_MO = ["O"] * len(sentence)
    for i, _ in labels.items():
        gold_MO[i] = "M"
    all_gold_MO.extend(gold_MO)

In [None]:
flattened_predictions = []
for foo in predictions:
    flattened_predictions.extend(foo)

In [None]:
tn, fp, fn, tp = confusion_matrix(all_gold_MO,flattened_predictions,labels=["O","M"]).ravel()
print(tn,fp,fn,tp)

In [None]:
print("Precision:",tp / (tp+fp))

In [None]:
print("Recall:",tp / (tp+fn))

In [None]:
# Align by token

predictions = []
all_cleaned_tokens = []
all_original_tokens = []
all_labels = []
skipped = 0 
aligned = 0
with open(prediction_file) as f:
    with open(input_file) as fi:
        for j, (line, input_line,label) in enumerate(zip(f,fi,labels)):
            if j % 100 == 0:
                print(j)
            input_tokens = input_line.strip().split()
            input_original_tokens = input_line.strip().split()
            raw_text, raw_prediction, raw_score = line.strip().split("\t")
            raw_tokens = raw_text.split()
            raw_prediction = raw_prediction.split()
            raw_score = raw_score.split()
            cleaned_tokens = [] 
            cleaned_prediction = []
            prev_prediction = ""
            for i, (token, prediction) in enumerate(zip(raw_tokens, raw_prediction)):
                if token[:2]=="##":
                    pass
                else:
                    if len(prediction)>2:
                        cleaned_prediction.append(prediction[2:])
                    else:
                        cleaned_prediction.append(prediction)

            if not (len(cleaned_prediction) == len(input_tokens)):
                diff = abs(len(cleaned_prediction) - len(input_tokens))
                for k,v in label.items():
                    input_tokens[k] = v
                cleaned_prediction = adjustTokens(cleaned_prediction, input_tokens)
                    #for tok1, tok2 in zip(input_tokens, cleaned_prediction):
                        #print(tok1, tok2)
                    #print("#"*10)
                if cleaned_prediction is None:
                    skipped += 1
                    continue
                else:
                    aligned+=1
            predictions.append(cleaned_prediction)   
            all_cleaned_tokens.append(cleaned_tokens)
            all_original_tokens.append(input_original_tokens)
            all_labels.append(label)