In [1]:
import pickle
from itertools import islice

from decomp import UDSCorpus
uds_train = UDSCorpus(split='train')
uds_dev = UDSCorpus(split='dev')
uds_test = UDSCorpus(split='test')

In [2]:
def build_dataset(results, base):
    d = []
    for sentence, graph in base.items():
        for tuple_, data in graph.semantics_edges().items():
            if 'protoroles' in data.keys():
                pred = tuple_[0]
                arg = tuple_[1]
                arg_position, _ = base[sentence].head(arg, ['form', 'lemma'])
                pred_position, _ = base[sentence].head(pred, ['form', 'lemma'])
                syntax_nodes = base[sentence].syntax_nodes.values()
                tokens = [node['form'] for node in syntax_nodes]
                tokens.append(pred_position)
                tokens.append(arg_position)
                
                if len(results[sentence]) == 0:
                    tokens.append(0)
                else:
                    if tuple_ in results[sentence].keys():
                        tokens.append(1)
                    else:
                        tokens.append(0)
                d.append(tokens)                 
    return d

In [3]:
agentstr = """
           SELECT ?edge
           WHERE { ?edge <existed_before> ?existedbefore
                           FILTER (?existedbefore > 0 ) .
                   { ?edge <volition> ?volition
                           FILTER ( ?volition > 0 ) .
                   } UNION
                   { ?edge <instigation> ?instigation
                           FILTER ( ?instigation > 0 ) .
                   }
                 }
           """

agent_train_results = {gid: graph.query(agentstr, query_type='edge', cache_rdf=False)
           for gid, graph in uds_train.items()}

agent_dev_results = {gid: graph.query(agentstr, query_type='edge', cache_rdf=False)
           for gid, graph in uds_dev.items()}

agent_test_results = {gid: graph.query(agentstr, query_type='edge', cache_rdf=False)
           for gid, graph in uds_test.items()}

In [4]:
d = build_dataset(agent_train_results, uds_train)
with open("agent_train.data", 'wb') as filename:
    pickle.dump(d, filename)
    
d = build_dataset(agent_dev_results, uds_dev)
with open("agent_dev.data", 'wb') as filename:
    pickle.dump(d, filename)
    
d = build_dataset(agent_test_results, uds_test)
with open("agent_test.data", 'wb') as filename:
    pickle.dump(d, filename)

In [23]:
patientstr = """
           SELECT ?edge
           WHERE { ?edge <existed_before> ?existedbefore
                            FILTER ( ?existedbefore > 0 ) .                     
                    { ?edge <volition> ?volition
                            FILTER ( ?volition < 0 ) .
                    } UNION
                    { ?edge <instigation> ?instigation
                            FILTER ( ?instigation < 0 ) .
                    }
                 }
           """

patient_train_results = {gid: graph.query(patientstr, query_type='edge', cache_rdf=False)
           for gid, graph in uds_train.items()}

patient_dev_results = {gid: graph.query(patientstr, query_type='edge', cache_rdf=False)
           for gid, graph in uds_dev.items()}

patient_test_results = {gid: graph.query(patientstr, query_type='edge', cache_rdf=False)
           for gid, graph in uds_test.items()}

In [24]:
d = build_dataset(patient_train_results, uds_train)
with open("patient_train.data", 'wb') as filename:
    pickle.dump(d, filename)
    
d = build_dataset(patient_dev_results, uds_dev)
with open("patient_dev.data", 'wb') as filename:
    pickle.dump(d, filename)
    
d = build_dataset(patient_test_results, uds_test)
with open("patient_test.data", 'wb') as filename:
    pickle.dump(d, filename)

In [19]:
instrumentstr = """
           SELECT ?edge
           WHERE { ?edge <was_used> ?wasused
                            FILTER ( ?wasused > 0 ) .                     
                   ?edge <existed_during> ?existedduring
                            FILTER ( ?existedduring > 0 ) .
                    { ?edge <sentient> ?sentient
                            FILTER ( ?sentient < 0 ) . 
                    } UNION
                    { ?edge <awareness> ?awareness
                            FILTER ( ?awareness < 0 ) .
                    }
                 }
           """

instrument_train_results = {gid: graph.query(instrumentstr, query_type='edge', cache_rdf=False)
           for gid, graph in uds_train.items()}

instrument_dev_results = {gid: graph.query(instrumentstr, query_type='edge', cache_rdf=False)
           for gid, graph in uds_dev.items()}

instrument_test_results = {gid: graph.query(instrumentstr, query_type='edge', cache_rdf=False)
           for gid, graph in uds_test.items()}

In [20]:
d = build_dataset(instrument_train_results, uds_train)
with open("instrument_train.data", 'wb') as filename:
    pickle.dump(d, filename)
    
d = build_dataset(instrument_dev_results, uds_dev)
with open("instrument_dev.data", 'wb') as filename:
    pickle.dump(d, filename)
    
d = build_dataset(instrument_test_results, uds_test)
with open("instrument_test.data", 'wb') as filename:
    pickle.dump(d, filename)

In [27]:
resultstr = """
           SELECT ?edge
           WHERE { ?edge <existed_before> ?existedbefore
                            FILTER ( ?existedbefore < 0 ) .
                   ?edge <existed_after> ?existedafter
                            FILTER ( ?existedafter > 0 ) .
                   ?edge <change_of_state> ?changeofstate
                            FILTER ( ?changeofstate > 0 ) .
                 }
           """

result_train_results = {gid: graph.query(resultstr, query_type='edge', cache_rdf=False)
           for gid, graph in uds_train.items()}

result_dev_results = {gid: graph.query(resultstr, query_type='edge', cache_rdf=False)
           for gid, graph in uds_dev.items()}

result_test_results = {gid: graph.query(resultstr, query_type='edge', cache_rdf=False)
           for gid, graph in uds_test.items()}

In [28]:
d = build_dataset(result_train_results, uds_train)
with open("result_train.data", 'wb') as filename:
    pickle.dump(d, filename)
    
d = build_dataset(result_dev_results, uds_dev)
with open("result_dev.data", 'wb') as filename:
    pickle.dump(d, filename)
    
d = build_dataset(result_test_results, uds_test)
with open("result_test.data", 'wb') as filename:
    pickle.dump(d, filename)

In [25]:
destinationstr = """
           SELECT ?edge
           WHERE { ?edge <location> ?location
                            FILTER ( ?location > 0 ) .
                   ?edge <change_of_location> ?changeoflocation
                            FILTER ( ?changeoflocation > 0 ) .
                   ?edge <existed_before> ?existedbefore
                            FILTER ( ?existedbefore > 0 ) .
                 }
           """

dest_train_results = {gid: graph.query(destinationstr, query_type='edge', cache_rdf=False)
           for gid, graph in uds_train.items()}

dest_dev_results = {gid: graph.query(destinationstr, query_type='edge', cache_rdf=False)
           for gid, graph in uds_dev.items()}

dest_test_results = {gid: graph.query(destinationstr, query_type='edge', cache_rdf=False)
           for gid, graph in uds_test.items()}

In [26]:
d = build_dataset(dest_train_results, uds_train)
with open("dest_train.data", 'wb') as filename:
    pickle.dump(d, filename)
    
d = build_dataset(dest_dev_results, uds_dev)
with open("dest_dev.data", 'wb') as filename:
    pickle.dump(d, filename)
    
d = build_dataset(dest_test_results, uds_test)
with open("dest_test.data", 'wb') as filename:
    pickle.dump(d, filename)

In [30]:
count =0
for instance in d:
    if instance[-1] == 1:
        count+=1
        
print(count)

19
