In [30]:
import numpy as np
import pandas as pd
import os

In [31]:
def load_from_csv(folder_name, file_name, sep='\t', header=None):
    """Load a csv file

        Loads a knowledge graph serialized in a csv file as:

        .. code-block:: text

           subj1    relationX   obj1
           subj1    relationY   obj2
           subj3    relationZ   obj2
           subj4    relationY   obj2
           ...


        .. note::
            Duplicates are filtered.

    Parameters
    ----------
    folder_name: str
        base folder within XAI_DATA_HOME where the file is stored.
    file_name : str
        file name
    sep : str
        The subject-predicate-object separator (default \t).
    header : int, None
        The row of the header of the csv file. Same as pandas.read_csv header param.

    Returns
    -------
        triples : ndarray , shape [n, 3]
            the actual triples of the file.

    Examples
    --------
    >>> from xai_lp.datasets import load_from_csv
    >>> X = load_from_csv('folder', 'dataset.csv', sep=',')
    >>> X[:3]
    array([['a', 'y', 'b'],
           ['b', 'y', 'a'],
           ['a', 'y', 'c']],
          dtype='<U1')





    """
    df = pd.read_csv(os.path.join(XAI_DATA_HOME, folder_name, file_name),
                     sep=sep,
                     header=header,
                     names=None,
                     dtype=str)

    df = df.drop_duplicates()
    return df.as_matrix()

In [32]:
path = os.getcwd()
XAI_DATA_HOME = path

In [33]:
train = load_from_csv('wn11','train.txt')
size = train.shape
n = int(size[0])    # Format returned is tuple, and we just need total number of rows.
ones = np.ones((n, 1), dtype=str)




In [34]:
ones

array([['1'],
       ['1'],
       ['1'],
       ...,
       ['1'],
       ['1'],
       ['1']], dtype='<U1')

In [35]:
train = np.concatenate((train, ones), axis=1)

In [36]:
train

array([['__spiritual_bouquet_1', '_type_of', '__sympathy_card_1', '1'],
       ['__spiritual_bouquet_1', '_synset_domain_topic',
        '__church_of_rome_1', '1'],
       ['__absorption_5', '_type_of', '__attention_4', '1'],
       ...,
       ['__frontier_1', '_type_of', '__bound_6', '1'],
       ['__eurasia_1', '_part_of', '__eastern_hemisphere_1', '1'],
       ['__electronics_1', '_domain_region', '__frequency_response_1',
        '1']], dtype=object)

In [37]:
val = load_from_csv('wn11','dev.txt')
test = load_from_csv('wn11','test.txt')



In [38]:
val

array([['__genus_xylomelum_1', '_type_of', '__dicot_genus_1', '1'],
       ['__genus_xylomelum_1', '_type_of', '__clostridium_perfringens_1',
        '-1'],
       ['__house_of_god_1', '_has_instance', '__church_2', '1'],
       ...,
       ['__tracked_vehicle_1', '_has_instance',
        '__potassium_acid_carbonate_1', '-1'],
       ['__hearing_disorder_1', '_type_of', '__disability_1', '1'],
       ['__hearing_disorder_1', '_type_of', '__buy_off_1', '-1']],
      dtype=object)

In [39]:
test

array([['__chamaecyparis_lawsoniana_1', '_type_of', '__cedar_1', '1'],
       ['__chamaecyparis_lawsoniana_1', '_type_of',
        '__order_synentognathi_1', '-1'],
       ['__sway_2', '_has_instance', '__brachiate_1', '1'],
       ...,
       ['__genus_isurus_1', '_member_meronym', '__portrait_3', '-1'],
       ['__baseball_diamond_1', '_has_part', '__short_3', '1'],
       ['__baseball_diamond_1', '_has_part', '__haste_2', '-1']],
      dtype=object)

In [40]:
val[val[:, 3] == '1']

array([['__genus_xylomelum_1', '_type_of', '__dicot_genus_1', '1'],
       ['__house_of_god_1', '_has_instance', '__church_2', '1'],
       ['__family_graminaceae_1', '_member_meronym', '__bromus_1', '1'],
       ...,
       ['__court_3', '_has_instance', '__criminal_court_1', '1'],
       ['__tracked_vehicle_1', '_has_instance', '__half_track_1', '1'],
       ['__hearing_disorder_1', '_type_of', '__disability_1', '1']],
      dtype=object)

In [42]:
val[val[:, 3] == '1'][:, 0:3]

array([['__genus_xylomelum_1', '_type_of', '__dicot_genus_1'],
       ['__house_of_god_1', '_has_instance', '__church_2'],
       ['__family_graminaceae_1', '_member_meronym', '__bromus_1'],
       ...,
       ['__court_3', '_has_instance', '__criminal_court_1'],
       ['__tracked_vehicle_1', '_has_instance', '__half_track_1'],
       ['__hearing_disorder_1', '_type_of', '__disability_1']],
      dtype=object)