# Search diplomatic text of HMT *scholia*


### How to use this notebook

1. First, run step 1 (e.g., by selecting the cell labelled **Step 1: load everything** and choosing "Run all below" from the "Cell" menu).  This will be slow, and your mileage may vary depending on how well your connection to different resources on the internet happens to be performing just then.
2. Just below the cell labelled **Step 2: search**, fill in between quotation marks an argument to the function `search`. 

Then run the cell (e.g., by selecting it, and choosing "Run cells" from the "Cell" menu).



# Step 2: search

In [None]:
search("χαρις")

# Step 1. Load everything


The most recent release of the archive is always available from [this directory](https://github.com/homermultitext/hmt-archive/tree/master/releases-cex):  you can check there to update the release version in the following cell.

In [1]:
// Check for most recent release at
// https://github.com/homermultitext/hmt-archive/tree/master/releases-cex
// and change this value if needed:
val releaseId = "2020i"

[36mreleaseId[39m: [32mString[39m = [32m"2020i"[39m


## Configure Jupyter notebook

In [2]:
// 1. Add maven repository where we can find our libraries
val myBT = coursierapi.MavenRepository.of("https://dl.bintray.com/neelsmith/maven")
interp.repositories() ++= Seq(myBT)

[36mmyBT[39m: [32mcoursierapi[39m.[32mMavenRepository[39m = MavenRepository(https://dl.bintray.com/neelsmith/maven)

In [3]:
// 2. Make libraries available with `$ivy` imports:
import $ivy.`edu.holycross.shot::scm:7.4.0`
import $ivy.`edu.holycross.shot::ohco2:10.20.4`
import $ivy.`edu.holycross.shot.cite::xcite:4.3.0`
import $ivy.`edu.holycross.shot::dse:7.1.3`
import $ivy.`edu.holycross.shot::greek:9.0.0`

Downloading https://repo1.maven.org/maven2/edu/holycross/shot/scm_2.12/7.4.0/scm_2.12-7.4.0.pom
Downloaded https://repo1.maven.org/maven2/edu/holycross/shot/scm_2.12/7.4.0/scm_2.12-7.4.0.pom
Downloading https://repo1.maven.org/maven2/edu/holycross/shot/scm_2.12/7.4.0/scm_2.12-7.4.0.pom.sha1
Downloaded https://repo1.maven.org/maven2/edu/holycross/shot/scm_2.12/7.4.0/scm_2.12-7.4.0.pom.sha1
Downloading https://dl.bintray.com/neelsmith/maven/edu/holycross/shot/scm_2.12/7.4.0/scm_2.12-7.4.0.pom
Downloaded https://dl.bintray.com/neelsmith/maven/edu/holycross/shot/scm_2.12/7.4.0/scm_2.12-7.4.0.pom
Downloading https://repo1.maven.org/maven2/edu/holycross/shot/cex_2.12/6.5.0/cex_2.12-6.5.0.pom
Downloading https://repo1.maven.org/maven2/edu/holycross/shot/citeobj_2.12/7.5.1/citeobj_2.12-7.5.1.pom
Downloading https://repo1.maven.org/maven2/edu/holycross/shot/cite/xcite_2.12/4.3.0/xcite_2.12-4.3.0.pom
Downloading https://repo1.maven.org/maven2/edu/holycross/shot/citerelations_2.12/2.7.0/citerelat

[32mimport [39m[36m$ivy.$                              
[39m
[32mimport [39m[36m$ivy.$                                  
[39m
[32mimport [39m[36m$ivy.$                                     
[39m
[32mimport [39m[36m$ivy.$                              
[39m
[32mimport [39m[36m$ivy.$                                [39m

## Load HMT data

Data releases of the Homer Multitext project archive are published as CITE libraries, and committed to the `hmt-archive` github repository in CEX format.



In [4]:
import edu.holycross.shot.scm._

val url = s"https://raw.githubusercontent.com/homermultitext/hmt-archive/master/releases-cex/hmt-${releaseId}.cex"
val lib = CiteLibrarySource.fromUrl(url)

Sep 12, 2020 6:28:27 PM wvlet.log.Logger log
INFO: Building text repo from cex ...
Sep 12, 2020 6:28:30 PM wvlet.log.Logger log
INFO: Building collection repo from cex ...
Sep 12, 2020 6:29:09 PM wvlet.log.Logger log
INFO: Building relations from cex ...
Sep 12, 2020 6:29:17 PM wvlet.log.Logger log
INFO: All library components built.


[32mimport [39m[36medu.holycross.shot.scm._

[39m
[36murl[39m: [32mString[39m = [32m"https://raw.githubusercontent.com/homermultitext/hmt-archive/master/releases-cex/hmt-2020i.cex"[39m
[36mlib[39m: [32mCiteLibrary[39m = [33mCiteLibrary[39m(
  [32m"Homer Multitext project, release 2020h"[39m,
  [33mCite2Urn[39m([32m"urn:cite2:hmt:publications.cex.2020h:all"[39m),
  [32m"Creative Commons Attribution, Non-Commercial 4.0 License <https://creativecommons.org/licenses/by-nc/4.0/>."[39m,
  [33mVector[39m(
    [33mCiteNamespace[39m([32m"hmt"[39m, http://www.homermultitext.org/citens/hmt),
    [33mCiteNamespace[39m([32m"greekLit"[39m, http://chs.harvard.edu/ctsns/greekLit)
  ),
  [33mSome[39m(
    [33mTextRepository[39m(
      [33mCorpus[39m(
        [33mVector[39m(
          [33mCitableNode[39m(
            [33mCtsUrn[39m([32m"urn:cts:greekLit:tlg0012.tlg001.due_ebbott:10.1"[39m),
            [32m"Alongside the ships the other best men of the Pa

In [5]:
import edu.holycross.shot.ohco2._
import edu.holycross.shot.dse._
import edu.holycross.shot.greek._

val corpus = lib.textRepository.get.corpus
val dsev = DseVector.fromCiteLibrary(lib)
val scholia = corpus.nodes.filter(_.urn.textGroup == "tlg5026")

[32mimport [39m[36medu.holycross.shot.ohco2._
[39m
[32mimport [39m[36medu.holycross.shot.dse._
[39m
[32mimport [39m[36medu.holycross.shot.greek._

[39m
[36mcorpus[39m: [32mCorpus[39m = [33mCorpus[39m(
  [33mVector[39m(
    [33mCitableNode[39m(
      [33mCtsUrn[39m([32m"urn:cts:greekLit:tlg0012.tlg001.due_ebbott:10.1"[39m),
      [32m"Alongside the ships the other best men of the Panachaeans"[39m
    ),
    [33mCitableNode[39m(
      [33mCtsUrn[39m([32m"urn:cts:greekLit:tlg0012.tlg001.due_ebbott:10.2"[39m),
      [32m"slept all night long, subdued by gentle sleep,"[39m
    ),
    [33mCitableNode[39m(
      [33mCtsUrn[39m([32m"urn:cts:greekLit:tlg0012.tlg001.due_ebbott:10.3"[39m),
      [32m"but not the son of Atreus, Agamemnon, the shepherd of the warriors\u2014"[39m
    ),
    [33mCitableNode[39m(
      [33mCtsUrn[39m([32m"urn:cts:greekLit:tlg0012.tlg001.due_ebbott:10.4"[39m),
      [32m"sweet sleep did not hold him, as he pondered man

In [6]:
val dsev = DseVector.fromCiteLibrary(lib)

[36mdsev[39m: [32mDseVector[39m = [33mDseVector[39m(
  [33mVector[39m(
    [33mDsePassage[39m(
      [33mCite2Urn[39m([32m"urn:cite2:hmt:va_dse.v1:il2168"[39m),
      [32m"DSE record for Iliad 4.217"[39m,
      [33mCtsUrn[39m([32m"urn:cts:greekLit:tlg0012.tlg001.msA:4.217"[39m),
      [33mCite2Urn[39m(
        [32m"urn:cite2:hmt:vaimg.2017a:VA055VN_0557@0.4865,0.3644,0.3954,0.0391"[39m
      ),
      [33mCite2Urn[39m([32m"urn:cite2:hmt:msA.v1:55v"[39m)
    ),
    [33mDsePassage[39m(
      [33mCite2Urn[39m([32m"urn:cite2:hmt:va_dse.v1:il11826"[39m),
      [32m"DSE record for Iliad 18.529"[39m,
      [33mCtsUrn[39m([32m"urn:cts:greekLit:tlg0012.tlg001.msA:18.529"[39m),
      [33mCite2Urn[39m(
        [32m"urn:cite2:hmt:vaimg.2017a:VA249RN_0420@0.19,0.6589,0.427,0.0331"[39m
      ),
      [33mCite2Urn[39m([32m"urn:cite2:hmt:msA.v1:249r"[39m)
    ),
    [33mDsePassage[39m(
      [33mCite2Urn[39m([32m"urn:cite2:hmt:va_dse.v1:il6005"[39m)

In [7]:
val xmlUrl = s"https://raw.githubusercontent.com/homermultitext/hmt-archive/master/releases-cex/hmt-${releaseId}-scholia-xml.cex"
val scholiaXml = CorpusSource.fromUrl(xmlUrl, cexHeader=false)

import scala.xml._

val pnIndex = for (n <- scholiaXml.nodes) yield {
  val x = XML.loadString(n.text)
  val pns = x \\ "persName"
  pns.toVector.map(pn => {
    val attValue = pn.attributes.asAttrMap.getOrElse("n", "No @n attribute on " + pn.text)
    n.urn -> attValue
  })
}
val personMap = pnIndex.flatten.groupBy(_._2)


[36mxmlUrl[39m: [32mString[39m = [32m"https://raw.githubusercontent.com/homermultitext/hmt-archive/master/releases-cex/hmt-2020i-scholia-xml.cex"[39m
[36mscholiaXml[39m: [32mCorpus[39m = [33mCorpus[39m(
  [33mVector[39m(
    [33mCitableNode[39m(
      [33mCtsUrn[39m([32m"urn:cts:greekLit:tlg5026.msAint.hmt:1.27.lemma"[39m),
      [32m"<div n=\"lemma\" xmlns=\"http://www.tei-c.org/ns/1.0\" xmlns:xd=\"http://www.oxygenxml.com/ns/doc/xsl\"/>"[39m
    ),
    [33mCitableNode[39m(
      [33mCtsUrn[39m([32m"urn:cts:greekLit:tlg5026.msAint.hmt:1.27.comment"[39m),
      [32m"<div n=\"comment\" xmlns=\"http://www.tei-c.org/ns/1.0\" xmlns:xd=\"http://www.oxygenxml.com/ns/doc/xsl\"> <p>\u03b4\u03b1\u03c3\u03cd\u03bd\u03b5\u03c4\u03b1\u03b9 \u03c4\u1f78 \u03be\u03c5\u03bd\u1f15\u03b7\u03ba\u03b5\u2051</p> </div>"[39m
    ),
    [33mCitableNode[39m(
      [33mCtsUrn[39m([32m"urn:cts:greekLit:tlg5026.msAint.hmt:1.28.lemma"[39m),
      [32m"<div n=\"lemma\" xmlns=\

In [8]:
import edu.holycross.shot.cite._
val baseUrn = "urn:cite2:hmt:pers.v1:"
// Collect passages where name occurs
def passageRefs(pers: String): Vector[CtsUrn] = {
  val psgs = personMap(baseUrn + pers)
  psgs.map(_._1)
}

[32mimport [39m[36medu.holycross.shot.cite._
[39m
[36mbaseUrn[39m: [32mString[39m = [32m"urn:cite2:hmt:pers.v1:"[39m
defined [32mfunction[39m [36mpassageRefs[39m

In [9]:

def passages(pers: String) = {
  val pageBaseUrl = "http://www.homermultitext.org/facsimiles/venetus-a/"

  val urns = passageRefs(pers)
  val s = if (urns.size == 1) { "" } else  { "s" }
  val hdr = s"<h2>Search for ID <code>${pers}</code></h2>" +
  "<p>Found " + urns.size + s" passage${s} for <code>${pers}</code></p>"

  val results = for ( (urn, idx)  <- urns.zipWithIndex) yield {
    val scholion = urn.collapsePassageBy(1)
    val nd = corpus.nodes.filter(nd => scholion > nd.urn)
    val text = nd.map(n => s"<p>${n.text}</p>" )
    val pgOpt = dsev.tbsForText(scholion)

    pgOpt match  {
      case None => {
        "NO page in DSE, I am sad"
        s"<li> <strong>${idx + 1}/${urns.size}</strong> ${scholion} (Sadly, no page indexed in DSE record)" + text.mkString("\n")
      }
      case _ => {
        val pg = pgOpt.get.objectComponent
        val url = pageBaseUrl + pg + "/"
        val link = "<a href=\"" + url + "\">facsimile</a>"

        s"<li> <strong>${idx + 1}/${urns.size}</strong> ${scholion}, page ${pg} (${link})" + text.mkString("\n")
      }
    }
  }
  Html(hdr + results.mkString("\n"))
}

defined [32mfunction[39m [36mpassages[39m

## Search and format results 