In [3]:
import logging
logging.basicConfig()
logger = logging.getLogger("stlarm")
logger.setLevel(logging.DEBUG)

from stlarm import analyzer, metarule_viz

DEBUG:stlarm.analyzer:Using AMIE_JAR_PATH "/home/jovyan/work/amie/amie3.jar"


In [6]:
import pandas as pd
pd.set_option("display.max_colwidth", None)

In [7]:
RULES_FILE = "../mining/mined_rules.tsv"
TRAIN_KB_FILE = "../resources/converted_nyc_foursquare_period_2_2_None_filtered_triples.nt"

In [64]:
ra = analyzer.RulesAnalyzer(RULES_FILE, 
                            TRAIN_KB_FILE, 
                            remove_reflexive_duplicates=True, 
                            metarules=True)

DEBUG:stlarm.analyzer:Using AMIE_JAR_PATH /home/jovyan/work/amie/amie3.jar
INFO:stlarm.analyzer:Loaded 459256 rules.
INFO:stlarm.analyzer:Improving rules...
INFO:stlarm.analyzer:Removing rules with unsorted reflexive relations...
INFO:stlarm.analyzer:Removed 148294 rules with unsorted reflexive relations. 310962 rules will be used.
DEBUG:stlarm.analyzer:Calling java -cp /home/jovyan/work/amie/amie3.jar amie.rules.eval.MetaruleBuilder /tmp/tmp6yny3x3x
INFO:stlarm.analyzer:[Done] amie.rules.eval.MetaruleBuilder
DEBUG:stlarm.analyzer:Building meta-rules lattice using amie.MetaruleBuilder took 54.8s.
INFO:stlarm.analyzer:Added 'Metarule' column. There are 1329 metarules.


In [262]:
def get_rules_without_relation(df, relation):
    return df[~df['Rule'].str.contains(relation, regex=False)]

ra_no_before_df = get_rules_without_relation(ra._raw_df, 'before')
ra_no_before = analyzer.RulesAnalyzer.from_analyzer(ra, ra_no_before_df, 
                                                    metarules_class_file='labeled_metarules.csv', 
                                                    metarules=True)

print(ra.df['Metarule'].nunique(),
      ra_no_before.df['Metarule'].nunique())

print(ra.df['Rule'].nunique(),
      ra_no_before.df['Rule'].nunique())

INFO:stlarm.analyzer:Loaded 211908 rules.
DEBUG:stlarm.analyzer:Calling java -cp /home/jovyan/work/amie/amie3.jar amie.rules.eval.MetaruleBuilder /tmp/tmpn323x702
INFO:stlarm.analyzer:[Done] amie.rules.eval.MetaruleBuilder
DEBUG:stlarm.analyzer:Building meta-rules lattice using amie.MetaruleBuilder took 16.46s.
INFO:stlarm.analyzer:Added 'Metarule' column. There are 370 metarules.


1329 370
310962 211908


In [266]:
import functools
to_datalog_rule = functools.partial(metarule_viz.to_datalog_rule, should_colorize=False)

In [280]:
chosen_metarules = ra_no_before.df.groupby('classification', sort=False)['Metarule'].apply(pd.Series.sample, n=1, random_state=42)
chosen_rules = ra_no_before.df[ra_no_before.df['Metarule'].isin(chosen_metarules)].groupby('Metarule')['Rule'].apply(pd.DataFrame.sample, n=2, random_state=42)
chosen_examples = ra_no_before.df[ra_no_before.df['Rule'].isin(chosen_rules)]

chosen_examples = chosen_examples.drop(columns=['body', 'head', 'Std. Lower Bound', 'PCA Lower Bound', 
                                   'PCA Conf estimation', 'head relation', 'length', 'Functional variable']) \
                    .assign(Metarule=chosen_examples['Metarule'].apply(to_datalog_rule)) \
                    .assign(Rule=chosen_examples['Rule'].apply(to_datalog_rule)) \
                    .set_index(['classification', 'Metarule'])

In [281]:
chosen_examples

Unnamed: 0_level_0,Unnamed: 1_level_0,Rule,Head Coverage,Std Confidence,PCA Confidence,Positive Examples,Body size,PCA Body size
classification,Metarule,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
interesting,"hasCheckin(?a, ?f), hasPOI(?f, ?z0), hasTime(?f, ?z1) => hasTrajectoryCategory(?a, ?z2)","hasCheckin(?a, ?f), hasPOI(?f, Poi_MtaSubwayManhattanBridgeBdnq_4a737bf8f964a52091dc1fe3), hasTime(?f, Morning) => hasTrajectoryCategory(?a, Weekday)",0.015829,1.0,1.0,74,74,74
interesting,"hasCheckin(?a, ?f), hasPOI(?f, ?z0), hasTime(?f, ?z1) => hasTrajectoryCategory(?a, ?z2)","hasCheckin(?a, ?f), hasPOI(?f, Poi_NewYorkPennStation_42911d00f964a520f5231fe3), hasTime(?f, Afternoon) => hasTrajectoryCategory(?a, Weekday)",0.01262,0.880597,0.880597,59,67,67
should not happen,"hasCheckin(?k, ?f), hasTrajectory(?z0, ?k), withinRadius(?a, ?f) => hasTime(?a, ?z1)","hasCheckin(?k, ?f), hasTrajectory(User_951, ?k), withinRadius(?a, ?f) => hasTime(?a, Afternoon)",0.002905,0.358885,0.358885,103,287,287
should not happen,"hasCheckin(?k, ?f), hasTrajectory(?z0, ?k), withinRadius(?a, ?f) => hasTime(?a, ?z1)","hasCheckin(?k, ?f), hasTrajectory(User_880, ?k), withinRadius(?a, ?f) => hasTime(?a, Night)",0.00251,0.354582,0.354582,89,251,251
uninteresting,"hasPOI(?i, ?z0), hasPOI(?a, ?z1), hasTime(?i, ?b) => hasTime(?a, ?b)","hasPOI(?i, Poi_FairleighDickinsonUniversity_4c4f5881eaa1be9abdf1c345), hasPOI(?a, Poi_PortAuthorityBusTerminal_4840fe6bf964a52030501fe3), hasTime(?i, ?b) => hasTime(?a, ?b)",0.003356,0.333333,0.333333,119,357,357
uninteresting,"hasPOI(?i, ?z0), hasPOI(?a, ?z1), hasTime(?i, ?b) => hasTime(?a, ?b)","hasPOI(?i, Poi_CypressHillsNycha_4c5c46329b28d13a664e5670), hasPOI(?a, Poi_MtaSubway14ThStunionSquare456Lnqrw_4ad8add6f964a520a61321e3), hasTime(?i, ?b) => hasTime(?a, ?b)",0.002228,0.333333,0.333333,79,237,237
more or less,"hasCheckin(?i, ?a), hasCheckin(?i, ?b), hasPOI(?a, ?z0) => withinRadius(?a, ?b)","hasCheckin(?i, ?a), hasCheckin(?i, ?b), hasPOI(?a, Poi_DukeEllingtonBlvd_4dc72f55d164eb9ca01ac3ba) => withinRadius(?a, ?b)",0.000572,0.855556,0.855556,77,90,90
more or less,"hasCheckin(?i, ?a), hasCheckin(?i, ?b), hasPOI(?a, ?z0) => withinRadius(?a, ?b)","hasCheckin(?i, ?a), hasCheckin(?i, ?b), hasPOI(?a, Poi_StLukesRomanCatholicChurch_4c02c91d0d0e0f47c9a6019a) => withinRadius(?a, ?b)",0.000586,0.686957,0.686957,79,115,115
too general,"hasPOI(?b, ?z0), withinRadius(?b, ?j), withinTimeWindow(?a, ?j) => withinTimeWindow(?a, ?b)","hasPOI(?b, Poi_Yankeemania_4e8dc55bf790e886ae0fd1c6), withinRadius(?b, ?j), withinTimeWindow(?a, ?j) => withinTimeWindow(?a, ?b)",0.00059,0.635514,0.635514,68,107,107
too general,"hasPOI(?b, ?z0), withinRadius(?b, ?j), withinTimeWindow(?a, ?j) => withinTimeWindow(?a, ?b)","hasPOI(?b, Poi_UnitedCigarsInc_4d61b919865a224bef72ba85), withinRadius(?b, ?j), withinTimeWindow(?a, ?j) => withinTimeWindow(?a, ?b)",0.003,0.534776,0.534776,346,647,647
