Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
data, metrics from is13 repo converted to python3
- Loading branch information
0 parents
commit 9e9ec55
Showing
4 changed files
with
178 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
from __future__ import print_function | ||
import gzip | ||
try: #python2 | ||
import cPickle as pickle | ||
from urllib import urlretrieve | ||
except ImportError: #python3 | ||
import pickle | ||
from urllib.request import urlretrieve | ||
|
||
import os | ||
import random | ||
from os.path import isfile | ||
|
||
PREFIX = os.getenv('ATISDATA', '') | ||
|
||
def download(origin): | ||
''' | ||
download the corresponding atis file | ||
from http://www-etud.iro.umontreal.ca/~mesnilgr/atis/ | ||
''' | ||
print('Downloading data from %s' % origin) | ||
name = origin.split('/')[-1] | ||
urlretrieve(origin, name) | ||
|
||
def download_dropbox(): | ||
''' | ||
download from drop box in the meantime | ||
''' | ||
print('Downloading data from https://www.dropbox.com/s/3lxl9jsbw0j7h8a/atis.pkl?dl=0') | ||
os.system('wget -O atis.pkl https://www.dropbox.com/s/3lxl9jsbw0j7h8a/atis.pkl?dl=0') | ||
|
||
def load_dropbox(filename): | ||
if not isfile(filename): | ||
#download('http://www-etud.iro.umontreal.ca/~mesnilgr/atis/'+filename) | ||
download_dropbox() | ||
#f = gzip.open(filename,'rb') | ||
f = open(filename,'rb') | ||
return f | ||
|
||
def load_udem(filename): | ||
if not isfile(filename): | ||
download('http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/'+filename) | ||
f = gzip.open(filename,'rb') | ||
return f | ||
|
||
|
||
def atisfull(): | ||
f = load_dropbox(PREFIX + 'atis.pkl') | ||
|
||
try: | ||
train_set, test_set, dicts = pickle.load(f) | ||
except UnicodeDecodeError: | ||
train_set, test_set, dicts = pickle.load(f, encoding='latin1') | ||
return train_set, test_set, dicts | ||
|
||
def atisfold(fold): | ||
assert fold in range(5) | ||
f = load_udem(PREFIX + 'atis.fold'+str(fold)+'.pkl.gz') | ||
try: | ||
train_set, valid_set, test_set, dicts = pickle.load(f) | ||
except UnicodeDecodeError: | ||
train_set, valid_set, test_set, dicts = pickle.load(f, encoding='latin1') | ||
|
||
return train_set, valid_set, test_set, dicts | ||
|
||
if __name__ == '__main__': | ||
|
||
''' visualize a few sentences ''' | ||
|
||
import pdb | ||
|
||
w2ne, w2la = {}, {} | ||
train, test, dic = atisfull() | ||
train, _, test, dic = atisfold(1) | ||
|
||
w2idx, ne2idx, labels2idx = dic['words2idx'], dic['tables2idx'], dic['labels2idx'] | ||
|
||
idx2w = {w2idx[k]:k for k in w2idx} | ||
idx2ne = {ne2idx[k]:k for k in ne2idx} | ||
idx2la = {labels2idx[k]:k for k in labels2idx} | ||
|
||
test_x, test_ne, test_label = test | ||
train_x, train_ne, train_label = train | ||
wlength = 35 | ||
|
||
for e in ['train','test']: | ||
for sw, se, sl in zip(eval(e+'_x'), eval(e+'_ne'), eval(e+'_label')): | ||
print('WORD'.rjust(wlength), 'LABEL'.rjust(wlength)) | ||
for wx, la in zip(sw, sl): print(idx2w[wx].rjust(wlength), idx2la[la].rjust(wlength)) | ||
print('\n'+'**'*30+'\n') | ||
pdb.set_trace() |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
import numpy | ||
import random | ||
import os | ||
import stat | ||
import subprocess | ||
from os.path import isfile, join | ||
from os import chmod | ||
|
||
PREFIX = os.getenv('ATISDATA', '') | ||
|
||
def conlleval(p, g, w, filename): | ||
''' | ||
INPUT: | ||
p :: predictions | ||
g :: groundtruth | ||
w :: corresponding words | ||
OUTPUT: | ||
filename :: name of the file where the predictions | ||
are written. it will be the input of conlleval.pl script | ||
for computing the performance in terms of precision | ||
recall and f1 score | ||
''' | ||
out = '' | ||
for sl, sp, sw in zip(g, p, w): | ||
out += 'BOS O O\n' | ||
for wl, wp, w in zip(sl, sp, sw): | ||
out += w + ' ' + wl + ' ' + wp + '\n' | ||
out += 'EOS O O\n\n' | ||
|
||
f = open(filename,'w') | ||
f.writelines(out) | ||
f.close() | ||
|
||
return get_perf(filename) | ||
|
||
def get_perf(filename): | ||
''' run conlleval.pl perl script to obtain | ||
precision/recall and F1 score ''' | ||
_conlleval = PREFIX + 'conlleval.pl' | ||
if not isfile(_conlleval): | ||
#download('http://www-etud.iro.umontreal.ca/~mesnilgr/atis/conlleval.pl') | ||
os.system('wget https://www.comp.nus.edu.sg/%7Ekanmy/courses/practicalNLP_2008/packages/conlleval.pl') | ||
chmod('conlleval.pl', stat.S_IRWXU) # give the execute permissions | ||
|
||
proc = subprocess.Popen(["perl", _conlleval], stdin=subprocess.PIPE, stdout=subprocess.PIPE) | ||
stdout, _ = proc.communicate(open(filename,'rb').read()) | ||
for line in stdout.decode("utf-8").split('\n'): | ||
if 'accuracy' in line: | ||
out = line.split() | ||
break | ||
|
||
# out = ['accuracy:', '16.26%;', 'precision:', '0.00%;', 'recall:', '0.00%;', 'FB1:', '0.00'] | ||
|
||
precision = float(out[3][:-2]) | ||
recall = float(out[5][:-2]) | ||
f1score = float(out[7]) | ||
|
||
return {'p':precision, 'r':recall, 'f1':f1score} | ||
|
||
def get_perfo(filename): | ||
''' | ||
work around for using a PERL script in python | ||
dirty but still works. | ||
''' | ||
tempfile = str(random.randint(1,numpy.iinfo('i').max)) + '.txt' | ||
if not isfile(PREFIX + 'conlleval.pl'): | ||
os.system('wget https://www.comp.nus.edu.sg/%7Ekanmy/courses/practicalNLP_2008/packages/conlleval.pl') | ||
#download('http://www-etud.iro.umontreal.ca/~mesnilgr/atis/conlleval.pl') | ||
chmod('conlleval.pl', stat.S_IRWXU) # give the execute permissions | ||
if len(PREFIX) > 0: | ||
chmod(PREFIX + 'conlleval.pl', stat.S_IRWXU) # give the execute permissions | ||
cmd = PREFIX + 'conlleval.pl < %s | grep accuracy > %s'%(filename,tempfile) | ||
else: | ||
cmd = './conlleval.pl < %s | grep accuracy > %s'%(filename,tempfile) | ||
print(cmd) | ||
out = os.system(cmd) | ||
out = open(tempfile).readlines()[0].split() | ||
os.system('rm %s'%tempfile) | ||
precision = float(out[6][:-2]) | ||
recall = float(out[8][:-2]) | ||
f1score = float(out[10]) | ||
return {'p':precision, 'r':recall, 'f1':f1score} | ||
|
||
if __name__ == '__main__': | ||
#print get_perf('valid.txt') | ||
print(get_perf('valid.txt')) |