In [19]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn
import csv
import logging
log = logging.getLogger()
log.setLevel(logging.DEBUG)

pd.options.display.max_rows = 1000

files = ['jupiter', 'obama', 'paris', '500reverb']
config = {
    'reverb': [
        'default'
    ],
    'exemplar': [
        'stanford',
        'malt'
    ],
    'openie': [
        'binary',
        'nary'
    ]
}

In [20]:
def output_parser(program, c):
    if program == 'reverb':
        return reverb_parse(c)
    elif program == 'exemplar':
        return exemplar_parse(c)
    elif program == 'openie':
        return openie_parse(c)
    else:
        raise ValueError("program must be of valid entry")

def reverb_parse(c):
    data = {}
    for file in files:
        log.info("parsing file: {}-reverb-{}.txt".format(file, c))
        values = pd.read_csv("{}-reverb-{}.txt".format(file, c), sep='\t', header=None, quoting=csv.QUOTE_NONE)
        values.drop(values.columns[[0, 5, 6, 7, 8, 9, 10, 13, 14]], axis=1, inplace=True)
        values.rename(columns={1: 'sentence_id', 2: 'arg1', 3: 'rel', 4: 'arg2', 11: 'confidence', 12: 'sentence', 15: 'arg1_norm', 16: 'rel_norm', 17: 'arg2_norm'}, inplace=True)
        data[file] = values
    return data

def exemplar_parse(c):
    data = {}
    for file in files:
        log.info("parsing file: {}-exemplar-{}.txt".format(file, c))
        values = pd.read_csv("{}-exemplar-{}.txt".format(file, c), sep='\t', header=0, quoting=csv.QUOTE_NONE)
        values.rename(columns={'Subjects': 'arg1', 'Relation': 'rel', 'Objects': 'arg2', 'Normalized Relation': 'rel_norm', 'Sentence': 'sentence'}, inplace=True)
        data[file] = values
    return data

def openie_parse(c):
    data = {}
    for file in files:
        log.info("parsing file: {}-openie-{}.txt".format(file, c))
        values = pd.read_csv("{}-openie-{}.txt".format(file, c), sep='\t', header=None, quoting=csv.QUOTE_NONE)
        values.drop(values.columns[[0, 1]], axis=1, inplace=True)
        values.rename(columns={2: 'arg1', 3: 'rel', 4: 'arg2', 5: 'sentence_id', 6: 'sentence'}, inplace=True)
        patterns = [
            r'^SimpleArgument\(([0-9]*\s+)?',
            r',List\(.*\)$',
            r'^Relation\(',
            r'^Context\(([0-9]*\s+)?',
            r'^TemporalArgument\(',
            r'^SpatialArgument\('
        ]
        cols_to_clean = ['arg1', 'rel', 'arg2']
        for col in cols_to_clean:
            for pat in patterns:
                values[col].replace(to_replace=pat, value='', regex=True, inplace=True)
        data[file] = values
    return data

In [21]:
data = {}
for program in config.keys():
    if program not in data:
        data[program] = {}
    for c in config[program]:
        data[program][c] = output_parser(program, c)

INFO:root:parsing file: jupiter-reverb-default.txt
INFO:root:parsing file: obama-reverb-default.txt
INFO:root:parsing file: paris-reverb-default.txt
INFO:root:parsing file: 500reverb-reverb-default.txt
INFO:root:parsing file: jupiter-openie-binary.txt
INFO:root:parsing file: obama-openie-binary.txt
INFO:root:parsing file: paris-openie-binary.txt
INFO:root:parsing file: 500reverb-openie-binary.txt
INFO:root:parsing file: jupiter-openie-nary.txt
INFO:root:parsing file: obama-openie-nary.txt
INFO:root:parsing file: paris-openie-nary.txt
INFO:root:parsing file: 500reverb-openie-nary.txt
INFO:root:parsing file: jupiter-exemplar-stanford.txt
INFO:root:parsing file: obama-exemplar-stanford.txt
INFO:root:parsing file: paris-exemplar-stanford.txt
INFO:root:parsing file: 500reverb-exemplar-stanford.txt
INFO:root:parsing file: jupiter-exemplar-malt.txt
INFO:root:parsing file: obama-exemplar-malt.txt
INFO:root:parsing file: paris-exemplar-malt.txt
INFO:root:parsing file: 500reverb-exemplar-malt.tx

In [22]:
for f in files:
    for program in config.keys():
        for c in config[program]:
            print("{}-{}-{}: {}".format(f, program, c, len(data[program][c][f])))

jupiter-reverb-default: 375
jupiter-openie-binary: 779
jupiter-openie-nary: 620
jupiter-exemplar-stanford: 158
jupiter-exemplar-malt: 155
obama-reverb-default: 527
obama-openie-binary: 1265
obama-openie-nary: 829
obama-exemplar-stanford: 377
obama-exemplar-malt: 382
paris-reverb-default: 730
paris-openie-binary: 1457
paris-openie-nary: 1054
paris-exemplar-stanford: 390
paris-exemplar-malt: 369
500reverb-reverb-default: 727
500reverb-openie-binary: 1493
500reverb-openie-nary: 1156
500reverb-exemplar-stanford: 325
500reverb-exemplar-malt: 334


In [23]:
data['exemplar']['malt']['500reverb'].head()

Unnamed: 0,arg1,rel,arg2,rel_norm,sentence
0,El Nino,is weather event,,be event,"The current El Nino , a complicated and vaguel..."
1,El Nino,series phenomena,,be phenomenon,"The current El Nino , a complicated and vaguel..."
2,Jake,decides,,decide,Jake decides to give Melanie a bit of a hard t...
3,Sather,said,,say,"Sather , whose box sits high above the Garden ..."
4,Sather,sits,,sit,"Sather , whose box sits high above the Garden ..."


In [24]:
data['openie']['nary']['obama'].tail(15)

Unnamed: 0,arg1,rel,arg2,sentence_id
814,"SimpleArgument(I,List({3}))","Relation(came,List([5, 9)))","SimpleArgument(to my Christian faith,List([10,...","So I came to my Christian faith later in life,..."
815,"SimpleArgument(it,List([51, 53)))","Relation(was,List([54, 57)))",SimpleArgument(because the precepts of Jesus C...,"So I came to my Christian faith later in life,..."
816,"SimpleArgument(the precepts of Jesus Christ,Li...","Relation(spoke,List([95, 100)))","SimpleArgument(to me,List([101, 106))); Simple...","So I came to my Christian faith later in life,..."
817,"SimpleArgument(they,List([221, 225)))","Relation(would treat,List([226, 237)))","SimpleArgument(me,List([238, 240)))","So I came to my Christian faith later in life,..."
818,"SimpleArgument(I,List({141}))","Relation(would want,List([143, 153)))",SimpleArgument(to lead-being my brothers' and ...,"So I came to my Christian faith later in life,..."
819,"SimpleArgument(the kind of life,List([119, 135)))","Relation(to lead,List([154, 161)))","SimpleArgument(I,List({141}))","So I came to my Christian faith later in life,..."
820,"SimpleArgument(Obama,List([0, 5)))","Relation(met,List([6, 9)))",SimpleArgument(Trinity United Church of Christ...,Obama met Trinity United Church of Christ past...
821,"SimpleArgument(Obama,List([0, 5)))","Relation(became,List([91, 97)))","SimpleArgument(a member of Trinity,List([98, 1...",Obama met Trinity United Church of Christ past...
822,"SimpleArgument(Rev. Jeremiah Wright,List([49, ...","Relation([is] pastor [of],List([42, 48)))","SimpleArgument(Christ,List([35, 41)))",Obama met Trinity United Church of Christ past...
823,"SimpleArgument(He,List([0, 2)))","Relation(resigned,List([3, 11)))","SimpleArgument(from Trinity,List([12, 24))); T...",He resigned from Trinity in May 2008 during hi...
