Skip to content

Commit

Permalink
simplify scoring mechanism
Browse files Browse the repository at this point in the history
  • Loading branch information
breandan committed Jun 25, 2023
1 parent ed7cd1b commit bcd65e0
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion galoisenne
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ val P_seq2parse: MarkovChain<Σᐩ> by lazy {
}.let { println("Trained Markov chain on ${it.value.counter.total.get()} tokens StackOverflow in ${it.duration.inWholeMilliseconds}ms"); it.value }
}

val P_stackoverflow: MarkovChain<Σᐩ> by lazy {
val P_BIFI: MarkovChain<Σᐩ> by lazy {
measureTimedValue {
readBIFIContents().take(100_000).asStream().parallel()
.map { "\n$it\n".lexToStrTypesAsPython().asSequence().toMarkovChain(4) }
Expand Down Expand Up @@ -225,7 +225,7 @@ class MultiRankStats {

fun evaluateTidyparseOnStackoverflow() {
// val errDeck = pythonErrProbs.expandByFrequency(10)
val topTokens = P_stackoverflow.topK(200).map { it.first } + "ε" // + errDeck
val topTokens = P_BIFI.topK(200).map { it.first } + "ε" // + errDeck
println("Top tokens: $topTokens")

val multiRankStats = MultiRankStats()
Expand Down Expand Up @@ -260,7 +260,7 @@ fun evaluateTidyparseOnStackoverflow() {
admissibilityFilter = { map { pythonVocabBindex.getUnsafe(it) ?: it.toInt() }.isValidPython() },
// TODO: incorporate parseable segmentations into scoring mechanism to prioritize chokepoint repairs
// TODO: only score the locations that are actually being modified to avoid redundant work
scoreEdit = { P_stackoverflow.score(it) }
scoreEdit = { P_BIFI.score(it) }
).also { repairs ->
repairs.take(20).apply { println("\nTop $size repairs:\n") }.forEach {
println("Δ=${it.scoreStr()} repair (${it.elapsed()}): ${prettyDiffNoFrills(coarseBrokeStr, it.resToStr())}")
Expand Down

0 comments on commit bcd65e0

Please sign in to comment.