In [1]:
import logging
logging.basicConfig()
logger = logging.getLogger("stlarm")
logger.setLevel(logging.DEBUG)

from stlarm import analyzer

DEBUG:stlarm.analyzer:Using AMIE_JAR_PATH /home/jovyan/work/amie/bin/amie3.jar


In [2]:
import pandas as pd
pd.set_option("display.max_colwidth", None)

In [3]:
RULES_FILE = "../mining/mined_rules.tsv"
TRAIN_KB_FILE = "../resources/converted_nyc_foursquare_period_2_2_None_filtered_triples.nt"

In [4]:
ra = analyzer.RulesAnalyzer(RULES_FILE, 
                            TRAIN_KB_FILE,
                            remove_reflexive_duplicates=True, 
                            metarules=True)

INFO:stlarm.analyzer:Loaded 459256 rules.
INFO:stlarm.analyzer:Improving rules...
INFO:stlarm.analyzer:Removing rules with unsorted reflexive relations...
INFO:stlarm.analyzer:Removed 148294 rules with unsorted reflexive relations. 310962 rules will be used.
DEBUG:stlarm.analyzer:Calling java -cp /home/jovyan/work/amie/bin/amie3.jar amie.rules.eval.MetaruleBuilder /tmp/tmptvvganh8
INFO:stlarm.analyzer:[Done] amie.rules.eval.MetaruleBuilder
DEBUG:stlarm.analyzer:Building meta-rules lattice using amie.MetaruleBuilder took 94.18s.
INFO:stlarm.analyzer:Added 'Metarule' column. There are 1329 metarules.


In [5]:
def get_rules_without_relation(df, relation):
    return df[~df['Rule'].str.contains(relation, regex=False)]

ra_no_before_df = get_rules_without_relation(ra._raw_df, 'before')
ra_no_before = analyzer.RulesAnalyzer.from_analyzer(ra, ra_no_before_df, metarules=True)
ra_no_withinTimeWindow_df = get_rules_without_relation(ra._raw_df, 'withinTimeWindow')
ra_no_withinTimeWindow = analyzer.RulesAnalyzer.from_analyzer(ra, ra_no_withinTimeWindow_df, metarules=True)
ra_no_withinRadius_df = get_rules_without_relation(ra._raw_df, 'withinRadius')
ra_no_withinRadius = analyzer.RulesAnalyzer.from_analyzer(ra, ra_no_withinRadius_df, metarules=True)

print(ra.df['Metarule'].nunique(),
      ra_no_before.df['Metarule'].nunique(),
      ra_no_withinTimeWindow.df['Metarule'].nunique(),
      ra_no_withinRadius.df['Metarule'].nunique())

print(ra.df['Rule'].nunique(),
      ra_no_before.df['Rule'].nunique(),
      ra_no_withinTimeWindow.df['Rule'].nunique(),
      ra_no_withinRadius.df['Rule'].nunique())

INFO:stlarm.analyzer:Loaded 211908 rules.
DEBUG:stlarm.analyzer:Calling java -cp /home/jovyan/work/amie/bin/amie3.jar amie.rules.eval.MetaruleBuilder /tmp/tmpcv9xkfwk
INFO:stlarm.analyzer:[Done] amie.rules.eval.MetaruleBuilder
DEBUG:stlarm.analyzer:Building meta-rules lattice using amie.MetaruleBuilder took 27.21s.
INFO:stlarm.analyzer:Added 'Metarule' column. There are 370 metarules.
INFO:stlarm.analyzer:Loaded 232438 rules.
DEBUG:stlarm.analyzer:Calling java -cp /home/jovyan/work/amie/bin/amie3.jar amie.rules.eval.MetaruleBuilder /tmp/tmpj0_yxffj
INFO:stlarm.analyzer:[Done] amie.rules.eval.MetaruleBuilder
DEBUG:stlarm.analyzer:Building meta-rules lattice using amie.MetaruleBuilder took 46.36s.
INFO:stlarm.analyzer:Added 'Metarule' column. There are 571 metarules.
INFO:stlarm.analyzer:Loaded 231981 rules.
DEBUG:stlarm.analyzer:Calling java -cp /home/jovyan/work/amie/bin/amie3.jar amie.rules.eval.MetaruleBuilder /tmp/tmplpgg6kcj
INFO:stlarm.analyzer:[Done] amie.rules.eval.MetaruleBuild

1329 370 571 559
310962 211908 232438 231981


In [6]:
ra_no_before = analyzer.RulesAnalyzer.from_analyzer(ra, ra_no_before_df, 
                                                    metarules_class_file='labeled_metarules.csv', 
                                                    metarules=True)

INFO:stlarm.analyzer:Loaded 211908 rules.
DEBUG:stlarm.analyzer:Calling java -cp /home/jovyan/work/amie/bin/amie3.jar amie.rules.eval.MetaruleBuilder /tmp/tmpiszaufrp
INFO:stlarm.analyzer:[Done] amie.rules.eval.MetaruleBuilder
DEBUG:stlarm.analyzer:Building meta-rules lattice using amie.MetaruleBuilder took 26.16s.
INFO:stlarm.analyzer:Added 'Metarule' column. There are 370 metarules.


In [9]:
ra_no_before.df.to_csv("analyzed_rules_df.csv", index=False)

In [8]:
ra_no_before.df.head()

Unnamed: 0,body,head,Rule,Head Coverage,Std Confidence,PCA Confidence,Positive Examples,Body size,PCA Body size,Functional variable,Std. Lower Bound,PCA Lower Bound,PCA Conf estimation,Metarule,head relation,length,classification
0,?a <hasCheckin> ?n ?g <hasCheckin> ?n ?g <hasDayOfTheWeek> ?b,?a <hasDayOfTheWeek> ?b,?a <hasCheckin> ?n ?g <hasCheckin> ?n ?g <hasDayOfTheWeek> ?b => ?a <hasDayOfTheWeek> ?b,1.0,1.0,1.0,4557,4557,4557,?a,0.0,0.0,651.0,?a <hasCheckin> ?n ?g <hasCheckin> ?n ?g <hasDayOfTheWeek> ?b => ?a <hasDayOfTheWeek> ?b,<hasDayOfTheWeek>,4,should not happen
1,?a <hasCheckin> ?n ?g <hasCheckin> ?n ?g <hasTrajectoryCategory> ?b,?a <hasTrajectoryCategory> ?b,?a <hasCheckin> ?n ?g <hasCheckin> ?n ?g <hasTrajectoryCategory> ?b => ?a <hasTrajectoryCategory> ?b,1.0,1.0,1.0,4675,4675,4675,?a,0.0,0.0,1558.333333,?a <hasCheckin> ?n ?g <hasCheckin> ?n ?g <hasTrajectoryCategory> ?b => ?a <hasTrajectoryCategory> ?b,<hasTrajectoryCategory>,4,should not happen
2,?b <hasCheckin> ?n ?h <hasCheckin> ?n ?a <hasTrajectory> ?h,?a <hasTrajectory> ?b,?b <hasCheckin> ?n ?h <hasCheckin> ?n ?a <hasTrajectory> ?h => ?a <hasTrajectory> ?b,1.0,1.0,1.0,4557,4557,4557,?b,0.0,0.0,28.841772,?b <hasCheckin> ?n ?h <hasCheckin> ?n ?a <hasTrajectory> ?h => ?a <hasTrajectory> ?b,<hasTrajectory>,4,should not happen
3,?a <hasCheckin> ?n ?g <hasCheckin> ?n ?g <hasMonth> ?b,?a <hasMonth> ?b,?a <hasCheckin> ?n ?g <hasCheckin> ?n ?g <hasMonth> ?b => ?a <hasMonth> ?b,1.0,1.0,1.0,4557,4557,4557,?a,0.0,0.0,414.272727,?a <hasCheckin> ?n ?g <hasCheckin> ?n ?g <hasMonth> ?b => ?a <hasMonth> ?b,<hasMonth>,4,should not happen
4,?m <hasPOI> ?a ?m <hasPOI> ?g ?g <hasPOICategory> ?b,?a <hasPOICategory> ?b,?m <hasPOI> ?a ?m <hasPOI> ?g ?g <hasPOICategory> ?b => ?a <hasPOICategory> ?b,1.0,1.0,1.0,4387,4387,4387,?a,0.0,0.0,487.444444,?m <hasPOI> ?a ?m <hasPOI> ?g ?g <hasPOICategory> ?b => ?a <hasPOICategory> ?b,<hasPOICategory>,4,should not happen
