In [6]:
%%init_spark
launcher.jars = ["/nfs/knowledge-glue/notebooks/3rdparty/lucene-index/babelnet-api-4.0.1.jar", "/nfs/knowledge-glue/notebooks/3rdparty/lucene-index/lib/*"]

# On BabelNet nature

BabelNet follows a structure very similar to wordnet. Lexemes are mapped to synsets in a N-M fashion. The mapping function is *not injective*, but *2-way surjective* (it would be weird to have a synset that corresponds to no lexeme, or a lexeme that has no meanings).

In [7]:
import com.babelscape.util.UniversalPOS
import scala.collection.JavaConverters._
import it.uniroma1.lcl.babelnet._
import it.uniroma1.lcl.jlt.util.Language



val bn = BabelNet.getInstance
val by = bn.getSynset(new BabelSynsetID("bn:00044492n"))
for(edge <- by.getOutgoingEdges.asScala) {
  println(by.getID + "\t" + by.getMainSense(Language.EN).get.getFullLemma + " - "
    + edge.getPointer.getShortName + " - "
    + edge.getBabelSynsetIDTarget)
}

val query = new BabelNetQuery.Builder("home")
            .from(Language.EN)
            .POS(UniversalPOS.VERB)
            .build

val byl = bn.getSynsets(query)

bn:00044492n	home - has-kind - bn:01345697n
bn:00044492n	home - has-kind - bn:02478548n
bn:00044492n	home - has-kind - bn:15245321n
bn:00044492n	home - has-kind - bn:06589708n
bn:00044492n	home - has-kind - bn:10480707n
bn:00044492n	home - has-kind - bn:14590490n
bn:00044492n	home - has-kind - bn:17908438n
bn:00044492n	home - deriv - bn:00089431v
bn:00044492n	home - is-a - bn:00046953n
bn:00044492n	home - gloss-related - bn:00046953n
bn:00044492n	home - gloss-related - bn:00061450n
bn:00044492n	home - gloss-related - bn:00084525v
bn:00044492n	home - subclass_of - bn:00044896n
bn:00044492n	home - is-a - bn:00058336n
bn:00044492n	home - related - bn:00000752n
bn:00044492n	home - related - bn:00001426n
bn:00044492n	home - related - bn:00001940n
bn:00044492n	home - related - bn:00003193n
bn:00044492n	home - related - bn:00009011n
bn:00044492n	home - related - bn:00013173n
bn:00044492n	home - related - bn:00014982n
bn:00044492n	home - related - bn:00017724n
bn:00044492n	home - related - bn:

import com.babelscape.util.UniversalPOS
import scala.collection.JavaConverters._
import it.uniroma1.lcl.babelnet._
import it.uniroma1.lcl.jlt.util.Language
bn: it.uniroma1.lcl.babelnet.BabelNet = it.uniroma1.lcl.babelnet.OfflineBabelNet@6ca55bf
by: it.uniroma1.lcl.babelnet.BabelSynset = home#n#9
query: it.uniroma1.lcl.babelnet.BabelNetQuery = it.uniroma1.lcl.babelnet.BabelNetQuery@7a108c2b
byl: java.util.List[it.uniroma1.lcl.babelnet.BabelSynset] = [home#v#1, home#v#2]


In [10]:
val x = new BabelNetQuery.Builder("test")

x: it.uniroma1.lcl.babelnet.BabelNetQuery.Builder = it.uniroma1.lcl.babelnet.BabelNetQuery$Builder@6d025b18


In [13]:
import it.uniroma1.lcl.babelnet.data.BabelSenseSource
import it.uniroma1.lcl.babelnet.data.BabelPointer

def linguistic(source: BabelSenseSource): Boolean = {
    return source.isFromAnyWordnet || source.isFromBabelNet || source.isFromWiktionary || source.isFromWordAtlas
}

import it.uniroma1.lcl.babelnet.resources._

for (home <- byl.asScala) {
    if(home.getSenseSources.asScala.exists(linguistic)) {
        println("Found a linguistic definition")
        println(home.getID)
        println(home.getGlosses().asScala.filter(_.getLanguage == Language.EN)) // English only
        println("=======")

        // get all the possible wordnet attributes we may need
        for(edge <- home.getOutgoingEdges.asScala) {
            println(home.getID + "\t" + home.getMainSense(Language.EN).get.getFullLemma + " - "
            + edge.getPointer + " - "
            + edge.getBabelSynsetIDTarget)
            
            // val pointer = edge.getPointer
        }
    }
    }

<console>: 68: error: not found: value pointer

poss: List[com.babelscape.util.UniversalPOS] = List(NOUN)


In [12]:
val andQuery = new BabelNetQuery.Builder("and")
            .from(Language.EN)
            .build()


val results = bn.getSynsets(andQuery)

for (result <- results.asScala) {
    println(result.getPOS)
    println(result.getMainSense)
}

NOUN
Optional[GEONM:EN:Anderson_Regional_Airport]
VERB
Optional[WIKT:EN:AND]
NOUN
Optional[WIKIDATA:EN:Maknae_de_Exo,_el_marido_de_Baek,_Sehunnie,_Hunnie,_Sebootie_y_Lord_Oh_Sehun]
NOUN
Optional[WIKI:EN:Logical_conjunction]
NOUN
Optional[WIKI:EN:And_(John_Martyn_album)]
NOUN
Optional[WN:EN:logic_gate]
NOUN
Optional[WIKI:EN:Armed_and_Dangerous_(video_game)]
VERB
Optional[WIKT:EN:and]
NOUN
Optional[WIKIDATA:EN:AND]


andQuery: it.uniroma1.lcl.babelnet.BabelNetQuery = it.uniroma1.lcl.babelnet.BabelNetQuery@175e1b15
results: java.util.List[it.uniroma1.lcl.babelnet.BabelSynset] = [GEONM:EN:Anderson_Regional_Airport, WIKT:EN:AND, WIKIDATA:EN:Maknae_de_Exo,_el_marido_de_Baek,_Sehunnie,_Hunnie,_Sebootie_y_Lord_Oh_Sehun, WIKI:EN:Logical_conjunction, WIKI:EN:And_(John_Martyn_album), logic_gate#n#1, WIKI:EN:Armed_and_Dangerous_(video_game), WIKT:EN:and, WIKIDATA:EN:AND]
