# Search personal name IDs in HMT *scholia*


### How to use this notebook

1. First, run step 1 (e.g., by selecting the cell labelled **Step 1: load everything** and choosing "Run all below" from the "Cell" menu).  This will be slow, and your mileage may vary depending on how well your connection to different resources on the internet happens to be performing just then.
2. Just below the cell labelled **Step 2: search**, fill in between quotation marks an argument to the function `passages`. 

Then run the cell (e.g., by selecting it, and choosing "Run cells" from the "Cell" menu).



# Step 2: search

In [None]:
passages("pers1")

# Step 1. Load everything


The most recent release of the archive is always available from [this directory](https://github.com/homermultitext/hmt-archive/tree/master/releases-cex):  you can check there to update the release version in the following cell.

In [None]:
// Check for most recent release at
// https://github.com/homermultitext/hmt-archive/tree/master/releases-cex
// and change this value if needed:
val releaseId = "2020i"


## Configure Jupyter notebook

In [None]:
// 1. Add maven repository where we can find our libraries
val myBT = coursierapi.MavenRepository.of("https://dl.bintray.com/neelsmith/maven")
interp.repositories() ++= Seq(myBT)

In [None]:
// 2. Make libraries available with `$ivy` imports:
import $ivy.`edu.holycross.shot::scm:7.4.0`
import $ivy.`edu.holycross.shot::ohco2:10.20.4`
import $ivy.`edu.holycross.shot.cite::xcite:4.3.0`
import $ivy.`edu.holycross.shot::dse:7.1.3`
import $ivy.`edu.holycross.shot::greek:9.0.0`

## Load HMT data

Data releases of the Homer Multitext project archive are published as CITE libraries, and committed to the `hmt-archive` github repository in CEX format.



In [None]:
import edu.holycross.shot.scm._

val url = s"https://raw.githubusercontent.com/homermultitext/hmt-archive/master/releases-cex/hmt-${releaseId}.cex"
val lib = CiteLibrarySource.fromUrl(url)

In [None]:
import edu.holycross.shot.ohco2._
import edu.holycross.shot.dse._
import edu.holycross.shot.greek._

val corpus = lib.textRepository.get.corpus
val dsev = DseVector.fromCiteLibrary(lib)
val scholia = corpus.nodes.filter(_.urn.textGroup == "tlg5026")

In [None]:
val dsev = DseVector.fromCiteLibrary(lib)

## Load xml of scholia and index persons to scholia

In [None]:
val xmlUrl = s"https://raw.githubusercontent.com/homermultitext/hmt-archive/master/releases-cex/hmt-${releaseId}-scholia-xml.cex"
val scholiaXml = CorpusSource.fromUrl(xmlUrl, cexHeader=false)

import scala.xml._

val pnIndex = for (n <- scholiaXml.nodes) yield {
  val x = XML.loadString(n.text)
  val pns = x \\ "persName"
  pns.toVector.map(pn => {
    val attValue = pn.attributes.asAttrMap.getOrElse("n", "No @n attribute on " + pn.text)
    n.urn -> attValue
  })
}
val personMap = pnIndex.flatten.groupBy(_._2)


In [None]:
import edu.holycross.shot.cite._
val baseUrn = "urn:cite2:hmt:pers.v1:"
// Collect passages where name occurs
def passageRefs(pers: String): Vector[CtsUrn] = {
  val psgs = personMap(baseUrn + pers)
  psgs.map(_._1)
}

## Search and format passages

In [None]:

def passages(pers: String) = {
  val pageBaseUrl = "http://www.homermultitext.org/facsimiles/venetus-a/"

  val urns = passageRefs(pers)
  val s = if (urns.size == 1) { "" } else  { "s" }
  val hdr = s"<h2>Search for ID <code>${pers}</code></h2>" +
  "<p>Found " + urns.size + s" passage${s} for <code>${pers}</code></p>"

  val results = for ( (urn, idx)  <- urns.zipWithIndex) yield {
    val scholion = urn.collapsePassageBy(1)
    val nd = corpus.nodes.filter(nd => scholion > nd.urn)
    val text = nd.map(n => s"<blockquote>${n.text}</blockquote>" )
    val pgOpt = dsev.tbsForText(scholion)

    pgOpt match  {
      case None => {
        s"<li> <strong>${idx + 1}/${urns.size}</strong> ${scholion} (Sadly, no page indexed in DSE record)" + text.mkString("\n")
      }
      case _ => {
        val pg = pgOpt.get.objectComponent
        val url = pageBaseUrl + pg + "/"
        val link = "<a href=\"" + url + "\">facsimile</a>"

        s"<li> <strong>${idx + 1}/${urns.size}</strong> ${scholion}, page ${pg} (${link})" + text.mkString("\n")
      }
    }
  }
  Html(hdr + results.mkString("\n"))
}