# PyLPR demo
The demo includes running on whole dataset and for a single relation. Next block of code handles imports and defalt arguments for model.
- **rules_file** : file where selected rules are saved after training
- **rules_file_temp** : file where all rules are saved
- **cores**: number of CPU cores used for multiprocessing (None means all)

In [3]:
from argparse import Namespace
from pylpr.model import LPR_model
from pylpr.data import Graph_names
import pandas as pd

args = Namespace(
    rules_file = 'results/umls.npy',
    rules_file_temp = 'demo_temp.npy',
    solver = 'PULP_CBC_CMD',
    iterations = 20,
    rules_load = False,
    skip_writing = True,
    skip_neg = True,
    skip_weight = True,
    cores = 3,
    seed = 12345,
    max_length = 4,
    column_generation=False
)

model = LPR_model("datasets/UMLS/", [0.02, 0.03, 0.04, 0.05, 0.0055, 0.06, 0.07, 0.08, 0.09, 0.1], args)

In [38]:
counts = {}
for rel in model.data.relation_to_num.values():
    train = sum([len(ends) for _, ends in model.data.get_facts_for_rel(Graph_names.Train, rel)])
    valid = sum([len(ends) for _, ends in model.data.get_facts_for_rel(Graph_names.Validate, rel)])
    test  = sum([len(ends) for _, ends in model.data.get_facts_for_rel(Graph_names.Test, rel)])
    counts[rel] = (train, valid, test)

fact_counts = pd.DataFrame(data=counts.values(), index=counts.keys(), columns=['Train', 'Validate', 'Test'])

## Full dataset

In [None]:
model.fit()
result = model.predict()

pd.DataFrame(data=result.values(), index=result.keys())

## Single relation
Checking number of facts using variable **fact_counts** before trying other relations recommended. Validation dataset is used to calculate entity rank so relations with no facst in validation dataset yield always MRR 0. 

In [39]:
relation = 37

# fit
model.rules = model.generate_rules_for_rel(Graph_names.Train, relation)
model.rules = model.get_rules_with_updated_neg_freq(relation)
model.rules = model.solve_for_rel(relation, model.get_solver())

# predict
_, mrr, ranks = model.get_mrr_and_ranks(relation, Graph_names.Test)

result = {}

hits_1 = model.hits_k(ranks, 1)
hits_3 = model.hits_k(ranks, 3)
hits_10 = model.hits_k(ranks, 10)
result[relation] = {'mrr': mrr, 'hits_1': hits_1, 'hits_3': hits_3, 'hits:10': hits_10}

pd.DataFrame(result)

- relation '37': done
- neg for relation '37': done


## Other

method to convert rule to more readable format

In [1]:
def rule_to_txt(rule):
    consequent = model.data.num_to_relation[rule['consequent']]
    path = []
    for rel in model.get_rule_antecedent(rule):
        if model.data.is_inverse(rel):
            r = model.data.inv_to_rel(rel)
            path.append('R_' + model.data.num_to_relation[r])
            continue
        path.append(model.data.num_to_relation[rel])
            
    return [consequent, path, rule['weight'], rule['freq'], rule['neg']]