In [None]:
import json
import os
import numpy as np
import pandas as pd

## Collect maps from directories

In [None]:
def collect_maps(root):
    """Iterate over subdirectories with argument maps and load its json 
      representation in the dictionary with a key of filename
      Each json-file contains one argument map 
      (folder = json files -> 1 file - 1 map)

      Args:
          - root - directory with subdirectories with maps

      Returns:
          - maps - dictionary with file-name as a key and json map as a value
    """
    maps = {}
    for subdir in os.listdir(root):
        if os.path.isdir(root+ '/' + subdir):

            for filename in os.listdir((root+ '/' + subdir)):
                if filename.endswith(".json"):
                    with open(os.path.join(root, subdir, filename)) as f:
                        maps[filename] = json.loads(f.read())
    return maps

In [None]:
def create_pairs(premises, conclusions):
    """Create dictionary of pairs with an identifier.
    With the following form:
    {id: {"conclusion": <SINGLE_CONCLUSION>, "premises":[<LIST_OF_PREMISES>]}}
    """  
    pairs = {}
    for i, x in enumerate(conclusions):
        pairs[i] = {'conclusion':x, 'premises':[]}
        id_to = x['from']['id']
        for p in premises:
            if p['to']['id'] == id_to:
                pairs[i]['premises'].append(p)
                
    return pairs

In [None]:
def collect_pairs (data):
    """Extract pairs of premises and conclusions from a single argumentation map.
       *[extract conclusions from the fields "edge" 
       (where the conclusion content is repeated).]
       
       Conclusion has incoming edge of the type 'RA' and the 'to' field of this 
       edge is 'I' (information node).

       Args:
        - data - single argumentation map

       Returns:
        - list of premises and conclusions from a single map """
    
    # conclusions
    conclusions = [x for x in data['edges'] if x['from']['type'] == 'RA' and x['to']['type'] == 'I']
    # id of 'from'
    # premises
    premises = [x for x in data['edges'] if x['to']['type'] == 'RA' and x['from']['type'] == 'I']
    # id of 'to'
    pairs = create_pairs(premises, conclusions)
    return pairs

In [None]:
def print_pairs(pairs):
    """Print pairs of premises and conclusions

     Args: 
         - pairs - dictionary of premises and conclusions
    """
    for pair in pairs:
        print('Conclusion: {}'.format(pairs[pair]['conclusion']['to']['text'],'\n'))
        for x in pairs[pair]['premises']:
            print('Premise: {}'.format(x['from']['text']))
        print("-----")

In [None]:
def corpus_pairs(maps):
    """Collect pairs of premises and conclusions for each separate 
    map in the corpus
    Args:
      - maps - dictionary with maps (key: file name, value: map)
    Returns:
      - corpus - dictionary with a filename as a key and dictionary of pairs as 
        a value
    """
    corpus = {}
    for elem in maps:
        corpus[elem] = collect_pairs(maps[elem])
    return corpus

In [None]:
def collect_pairs_from_maps(maps):
    """Given maps with all data compress them to contain only premsie-conclusion pairs
    
    Args:
        - maps - dictionary with argument maps with full data
    Returns:
        - maps_compressed - dictionary with argument maps with only pairs (conclusion, premise) in a list
    """
    maps_compressed = {}
    for elem in maps:
        pairs = []
        pairs_complex = collect_pairs(maps[elem])
        for pair_complex in pairs_complex:
            conclusion = pairs_complex[pair_complex]['conclusion']['to']['text']
            premises = [x['from']['text'] for x in pairs_complex[pair_complex]['premises']]
            pairs_tmp = [(conclusion, x) for x in premises]
            pairs.extend(pairs_tmp)
        
        maps_compressed[elem] = pairs
    return maps_compressed

## **Usage Example**

In [None]:
root= "./" # path to root directory of files with argumnt maps
maps = collect_maps(root)
corpus = corpus_pairs(maps)

In [None]:
corpus

In [None]:
data = maps['<FILE-NAME>.json']
pairs = collect_pairs(data)
print_pairs(pairs)

In [None]:
maps_comp = collect_pairs_from_maps(maps)
X = [y for x in maps_comp for y in maps_comp[x] ]