# Download Dependencies

In [None]:
%AddDeps com.lucidworks.spark spark-solr 3.6.0 --transitive
%AddDeps edu.stanford.nlp stanford-corenlp 3.9.2 --transitive
%AddDeps edu.stanford.nlp stanford-corenlp 3.9.2 --classifier models

# Fetch Documents from Solr

In [None]:
import com.lucidworks.spark.rdd.SelectSolrRDD

val SOLR = "localhost:9990"

val INDEX = "core17"
val FIELD = "contents"
val QUERY = "Obama"

val rdd = new SelectSolrRDD(SOLR, INDEX, sc)
    .rows(1000)
    .query(FIELD + ":" + QUERY)
    .count()

# CoreNLP Example

In [None]:
import collection.JavaConversions._
import edu.stanford.nlp.simple._

val doc = new Document("Barrack Obama was born in Hawaii. He is our president.")

for (sent <- doc.sentences()) {
    for (triple <- sent.openieTriples()) {
        println(s"(${triple.subjectLemmaGloss()}, ${triple.relationLemmaGloss()}, ${triple.objectLemmaGloss()})")
    }
}

# Solr + Spark + CoreNLP

In [None]:
import collection.mutable.ListBuffer
import collection.JavaConversions._

import com.lucidworks.spark.rdd.SelectSolrRDD
import edu.stanford.nlp.simple.Document

val SOLR = "localhost:9990"

val INDEX = "core17"
val FIELD = "contents"
val QUERY = "Obama"

val rdd = new SelectSolrRDD(SOLR, INDEX, sc)
    .rows(1000)
    .query(FIELD + ":" + QUERY)
    .map(d => {
        
        val list = new ListBuffer[Tuple3[String, String, String]]()
        val doc = new Document(d.get("raw").asInstanceOf[String])
        
        for (sent <- doc.sentences()) {
            for (triple <- sent.openieTriples()) {
                list.append((triple.subjectLemmaGloss(), triple.relationLemmaGloss(), triple.objectLemmaGloss()))
            }
        }
        
        (d.get("id"), list.toList)
        
    })

val doc = rdd.take(1)

println(doc.head._1)
doc.head._2.foreach(println)