From 84ec91e063c03030b20e21bcd940e9c18fb02a3f Mon Sep 17 00:00:00 2001 From: breandan Date: Wed, 10 May 2023 18:33:20 -0400 Subject: [PATCH] first kotlin example --- galoisenne | 2 +- .../repair/KotlinStatementRepair.kt | 8 ++++-- .../repair/PythonStatementRepair.kt | 2 +- .../edu/mcgill/cstk/utils/StringUtils.kt | 28 +++++++++++++++---- 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/galoisenne b/galoisenne index c3043a35..8e95a049 160000 --- a/galoisenne +++ b/galoisenne @@ -1 +1 @@ -Subproject commit c3043a35cc4666147a8f9cc09a2fe72d956549bd +Subproject commit 8e95a049cd4c82b0baac940c019f58d0c30fb365 diff --git a/src/main/kotlin/edu/mcgill/cstk/experiments/repair/KotlinStatementRepair.kt b/src/main/kotlin/edu/mcgill/cstk/experiments/repair/KotlinStatementRepair.kt index 80ac51b5..df0539b8 100644 --- a/src/main/kotlin/edu/mcgill/cstk/experiments/repair/KotlinStatementRepair.kt +++ b/src/main/kotlin/edu/mcgill/cstk/experiments/repair/KotlinStatementRepair.kt @@ -10,6 +10,7 @@ import java.io.File import kotlin.time.* /* +./gradlew kotlinStatementRepair 2>&1 | grep -v "Parser error:" | grep -v "LATEX" ./gradlew kotlinStatementRepair 2>&1 | grep -v "Parser error:" */ @@ -41,10 +42,12 @@ fun main() { println("\nTop 100 repairs:\n") it.take(100).forEach { - println("Δ=${levenshtein(prompt, it) - 1} repair: ${prettyDiffNoFrills(prompt, it)}") + println("Δ=${levenshtein(prompt, it)} repair: ${prettyDiffNoFrills(prompt, it)}") + println("(LATEX) Δ=${levenshtein(prompt, it)} repair: ${latexDiffSingleLOC(prompt, it)}") } - println("Found ${it.size} valid repairs in ${elapsed}ms, or roughly ${it.size / (elapsed/1000.0)} repairs per second.") + println("Found ${it.size} valid repairs in ${elapsed}ms, or roughly " + + "${(it.size / (elapsed/1000.0)).toString().take(5)} repairs per second.") println("Original string was ${if (contained) "#${it.indexOf(original)}" else "NOT"} in repair proposals!\n") } } @@ -109,6 +112,7 @@ fun parallelRepairKotlinStatement( val levDiff = levenshtein(prompt, it) - 1 if (levDiff < bestRepair) { println("Δ=$levDiff repair: ${prettyDiffNoFrills(prompt, it)}") + println("(LATEX) Δ=$levDiff repair: ${latexDiffSingleLOC(prompt, it)}") bestRepair = levDiff } }, diff --git a/src/main/kotlin/edu/mcgill/cstk/experiments/repair/PythonStatementRepair.kt b/src/main/kotlin/edu/mcgill/cstk/experiments/repair/PythonStatementRepair.kt index 481429ed..3cf97ad0 100644 --- a/src/main/kotlin/edu/mcgill/cstk/experiments/repair/PythonStatementRepair.kt +++ b/src/main/kotlin/edu/mcgill/cstk/experiments/repair/PythonStatementRepair.kt @@ -103,7 +103,7 @@ fun repairPythonStatement( coarsen = String::coarsenAsPython, uncoarsen = String::uncoarsenAsPython, synthesizer = satRepair(clock), // Enumerative search - diagnostic = { println("Δ=${ levenshtein( prompt, it ) - 1 } repair: ${prettyDiffNoFrills(prompt, it)}") }, + diagnostic = { println("Δ=${levenshtein(prompt, it) - 1} repair: ${prettyDiffNoFrills(prompt, it)}") }, filter = { isValidPython() }, ) diff --git a/src/main/kotlin/edu/mcgill/cstk/utils/StringUtils.kt b/src/main/kotlin/edu/mcgill/cstk/utils/StringUtils.kt index 7224b37c..8ce2c4f7 100644 --- a/src/main/kotlin/edu/mcgill/cstk/utils/StringUtils.kt +++ b/src/main/kotlin/edu/mcgill/cstk/utils/StringUtils.kt @@ -1,11 +1,12 @@ package edu.mcgill.cstk.utils +import ai.hypergraph.kaliningraph.parsing.tokenizeByWhitespace import ai.hypergraph.kaliningraph.types.cc import com.github.difflib.text.* import com.github.difflib.text.DiffRow.Tag.* import edu.mcgill.cstk.disk.* import edu.mcgill.cstk.experiments.probing.embeddingServer -import edu.mcgill.cstk.experiments.repair.isValidKotlin +import edu.mcgill.cstk.experiments.repair.defaultTokenizer import info.debatty.java.stringsimilarity.interfaces.MetricStringDistance import me.vovak.antlr.parser.* import net.sf.extjwnl.data.PointerUtils.* @@ -13,7 +14,6 @@ import net.sf.extjwnl.dictionary.Dictionary import org.antlr.v4.runtime.* import org.apache.commons.lang3.StringUtils import org.jetbrains.kotlin.lexer.* -import org.jetbrains.kotlin.spec.grammar.tools.tokenizeKotlinCode import spoon.Launcher import java.io.File import java.net.* @@ -339,18 +339,18 @@ $summary \subsection{Original} \begin{lstlisting}[language=java] -${diffString(original, synthetic).first} +${latexDiffMultilineStrings(original, synthetic).first} \end{lstlisting} \subsection{Synthetic} \begin{lstlisting}[language=java] -${diffString(original, synthetic).second} +${latexDiffMultilineStrings(original, synthetic).second} \end{lstlisting} \subsection{Variant} \begin{lstlisting}[language=java] -${diffString(original, variant).second} +${latexDiffMultilineStrings(original, variant).second} \end{lstlisting} \subsection{Comment} @@ -366,7 +366,7 @@ $discrepancy %-------- """.trimIndent().also { println(it) } -fun diffString(old: String, new: String) = +fun latexDiffMultilineStrings(old: String, new: String) = DiffRowGenerator.create() .showInlineDiffs(true) .ignoreWhiteSpaces(true) @@ -415,6 +415,22 @@ fun String.visibleLen() = .replace(ANSI_GREEN_BACKGROUND,"") .replace(ANSI_RESET,"").length +fun latexDiffSingleLOC(original: String, new: String) = + DiffRowGenerator.create() + .showInlineDiffs(true) + .inlineDiffByWord(true) + .newTag { l -> if(l) "(*@" else "@*)" } + .build() + .generateDiffRows(original.tokenizeByWhitespace(), new.tokenizeByWhitespace()) + .joinToString(" ") { + when (it.tag) { + INSERT -> it.newLine.replace("", "\\hlgreen{").replace("", "}") + CHANGE -> it.newLine.replace("", "\\hlorange{").replace("", "}") + DELETE -> "\\hlred{${List(it.oldLine.length){ " " }.joinToString("")}}" + else -> it.newLine.replace("", "").replace("", "") + } + }.replace("<", "<").replace(">", ">") + // Just print the new line with ASCII colors but no border fun prettyDiffNoFrills(original: String, new: String) = DiffRowGenerator.create()