Skip to content

Commit

Permalink
prototype semantic Kotlin repair
Browse files Browse the repository at this point in the history
  • Loading branch information
breandan committed May 25, 2023
1 parent c90187b commit 3b7e5e8
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 36 deletions.
1 change: 1 addition & 0 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ tasks {
"localizedSyntaxRepair" to "edu.mcgill.cstk.experiments.repair.LocalizedSyntaxRepairKt",
"syntheticSyntaxRepair" to "edu.mcgill.cstk.experiments.repair.SyntheticSyntaxRepairKt",
"organicSyntaxRepair" to "edu.mcgill.cstk.experiments.repair.OrganicSyntaxRepairKt",
"kotlinFunctionRepair" to "edu.mcgill.cstk.experiments.repair.KotlinFunctionRepairKt",
"kotlinStatementRepair" to "edu.mcgill.cstk.experiments.repair.KotlinStatementRepairKt",
"pythonStatementRepair" to "edu.mcgill.cstk.experiments.repair.PythonStatementRepairKt",
"extractRepairSamples" to "edu.mcgill.cstk.experiments.repair.ExtractRepairSamplesKt",
Expand Down
2 changes: 1 addition & 1 deletion galoisenne
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package edu.mcgill.cstk.experiments.repair

import javax.tools.*
import org.jetbrains.kotlin.cli.common.arguments.K2JVMCompilerArguments
import org.jetbrains.kotlin.cli.common.messages.*
import org.jetbrains.kotlin.cli.jvm.K2JVMCompiler
import org.jetbrains.kotlin.config.*
import java.io.*
import kotlin.system.measureTimeMillis

val javaCompiler: JavaCompiler = ToolProvider.getSystemJavaCompiler()

/*
./gradlew kotlinFunctionRepair
*/

fun main() {
// Write simple file:
for (i in 0..100) {
// Don't actually create the file on disk, but a virtual file
measureTimeMillis {
println("typealias KWIndex = List<String<String>>".isCompilableKotlin())
}.also { println("Millis: $it") }
}
}

fun String.isCompilableKotlin(): Boolean = K2JVMCompiler().run {
val args = K2JVMCompilerArguments().apply {
val file = createTempFile(suffix = ".kt").apply { writeText(this@isCompilableKotlin) }
freeArgs = listOf(file.absolutePath)
classpath = System.getProperty("java.class.path")
.split(System.getProperty("path.separator"))
.filter { File(it).exists() && File(it).canRead() }.joinToString(":")
noStdlib = true
noReflect = true
reportPerf = true
}
// output.deleteOnExit()
execImpl(
PrintingMessageCollector(
System.out,
MessageRenderer.WITHOUT_PATHS, true),
Services.EMPTY,
args)
}.code == 0
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import ai.hypergraph.kaliningraph.parsing.*
import ai.hypergraph.markovian.mcmc.*
import bijectiveRepair
import edu.mcgill.cstk.utils.*
import org.apache.commons.io.output.NullOutputStream
import org.intellij.lang.annotations.Language
import org.jetbrains.kotlin.spec.grammar.tools.*
import java.io.*
Expand Down Expand Up @@ -36,8 +35,9 @@ class FilteredOutputStream(private val out: OutputStream) : PrintStream(out) {
override fun println(x: String?) {
if (x == null) return
if (x.toString().let {
// it.startsWith("logging: ") ||
it.startsWith("Parser error:") ||
it.startsWith("Lexer error:")
it.startsWith("Lexer error:")
}) return
super.println(x)
}
Expand Down Expand Up @@ -80,27 +80,32 @@ fun main() {
.forEach { (original, prompt) ->
println("Original: $original\nCorrupted: ${prettyDiffNoFrills(original, prompt)}")
val startTime = System.currentTimeMillis()
parallelRepairKotlinStatement(prompt, deck, scoreEdit).also {
// repairKotlinStatement(prompt).also {
val contained = original in it
val elapsed = System.currentTimeMillis() - startTime
parallelRepairKotlinStatement(prompt, deck, scoreEdit)
// .filter { it.isCompilableKotlin() }
.also {
// repairKotlinStatement(prompt).also {
val contained = original in it
val elapsed = System.currentTimeMillis() - startTime

val toTake = 20
println("\nTop $toTake repairs:\n")
it.take(toTake).forEach {
println("Δ=${levenshtein(prompt, it)} repair: ${prettyDiffNoFrills(prompt, it)}")
// println("(LATEX) Δ=${levenshtein(prompt, it)} repair: ${latexDiffSingleLOC(prompt, it)}")
}
val toTake = 20
println("\nTop $toTake repairs:\n")
it.take(toTake).forEach {
println("Δ=${levenshtein(prompt, it)} repair: ${prettyDiffNoFrills(prompt, it)}")
// println("(LATEX) Δ=${levenshtein(prompt, it)} repair: ${latexDiffSingleLOC(prompt, it)}")
}

println("Found ${it.size} valid repairs in ${elapsed}ms, or roughly " +
"${(it.size / (elapsed/1000.0)).toString().take(5)} repairs per second.")
println("Original string was ${if (contained) "#${it.indexOf(original)}" else "NOT"} in repair proposals!\n")
println("Found ${it.size} valid repairs in ${elapsed}ms, or roughly " +
"${(it.size / (elapsed/1000.0)).toString().take(5)} repairs per second.")
println("Original string was ${if (contained) "#${it.indexOf(original)}" else "NOT"} in repair proposals!\n")
}
}
}
}

val projectDir = File(File("").absolutePath)
val allProjectsDir = projectDir.parentFile

fun collectMostCommonKeywords() {
File(File("").absolutePath).parentFile.also { println("Working directory: $it") }
projectDir.also { println("Working directory: $it") }
.walkTopDown().filter { it.extension == "kt" }
.flatMap { it.readLines() }
.filter { it.isValidKotlin() }
Expand All @@ -119,15 +124,17 @@ private fun constructScoringFunction(): (Σᐩ) -> Double =

// Get top level directory and all Kotlin files in all subdirectories
fun fetchKotlinExamples() =
File(File("").absolutePath).parentFile
.also { println("Working directory: $it") }
allProjectsDir.also { println("Working directory: $it") }
.walkTopDown().asSequence()
.filter { it.extension == "kt" }
.flatMap { it.readLines() }
.filter { it.isValidKotlin() }
.map { it.coarsenAsKotlin() }
.filter { str -> ignoredKeywords.none { it in str } }
// .filter { str -> str.lexAsKotlin().filter { it.isNotBlank() }.all { it in allNames } }
// .filter { it.isCompilableKotlin() }
.map { it.coarsenAsKotlin() }
.map { it.trim() }.distinct()
// .take(10)

fun Σᐩ.coarsenAsKotlin(lex: Boolean = true): Σᐩ =
(if(lex) lexAsKotlin() else tokenizeByWhitespace()).joinToString(" ") {
Expand Down Expand Up @@ -231,20 +238,6 @@ private fun bruteForceKotlinRepair(clock: TimeMark): CFG.(List<Σᐩ>) -> Sequen
} catch (e: Exception) { e.printStackTrace(); emptySequence()}
}

val ignoredKeywords =
setOf("import", "package", "//", "/*", "\"", "\'", "\\`", "data", "_")

val officialKotlinKeywords = setOf(
"as", "as?", "break", "class", "continue", "do", "else", "false", "for", "fun", "if", "in",
"!in", "interface", "is", "!is", "null", "object", "package", "return", "super", "this",
"throw", "true", "try", "typealias", "val", "var", "when", "while", "by", "catch", "constructor",
"delegate", "dynamic", "field", "file", "finally", "get", "import", "init", "param", "property",
"receiver", "set", "setparam", "where", "actual", "abstract", "annotation", "companion",
"const", "crossinline", "data", "enum", "expect", "external", "final", "infix", "inline",
"inner", "internal", "lateinit", "noinline", "open", "operator", "out", "override", "private",
"protected", "public", "reified", "sealed", "suspend", "tailrec", "vararg", "field", "it"
)

fun Σᐩ.isValidKotlin(): Boolean =
try { parseKotlinCode(tokenizeKotlinCode(this)).let { true } }
catch (_: Throwable) { false }
Expand Down Expand Up @@ -1081,4 +1074,36 @@ val commonKotlinKeywords: Set<Σᐩ> = coarsenedKotlinLines
val permissiveKotlinCFG = """
START -> START START
START -> ${commonKotlinKeywords.joinToString(" | ") { it } }
""".parseCFG().apply { blocked.add("w") }
""".parseCFG().apply { blocked.add("w") }


val ignoredKeywords =
setOf("import", "package", "//", "/*", "\"", "\'", "\\`", "data", "_")

val officialKotlinKeywords = setOf(
"as", "as?", "break", "class", "continue", "do", "else", "false", "for", "fun", "if", "in",
"!in", "interface", "is", "!is", "null", "object", "package", "return", "super", "this",
"throw", "true", "try", "typealias", "val", "var", "when", "while", "by", "catch", "constructor",
"delegate", "dynamic", "field", "file", "finally", "get", "import", "init", "param", "property",
"receiver", "set", "setparam", "where", "actual", "abstract", "annotation", "companion",
"const", "crossinline", "data", "enum", "expect", "external", "final", "infix", "inline",
"inner", "internal", "lateinit", "noinline", "open", "operator", "out", "override", "private",
"protected", "public", "reified", "sealed", "suspend", "tailrec", "vararg", "field", "it"
)

val allBuiltinTypes = setOf(
"Any", "Boolean", "Byte", "Char", "Double", "Float", "Int", "Long", "Nothing", "Short", "String",
"Unit", "Array", "BooleanArray", "ByteArray", "CharArray", "DoubleArray", "FloatArray",
"IntArray", "LongArray", "ShortArray", "List", "Map", "MutableList", "MutableMap", "MutableSet",
"Set", "Sequence", "StringBuffer", "StringBuilder", "Triple", "Pair", "Exception", "Throwable",
"Regex", "RegexOption", "MatchGroup", "MatchGroupCollection", "MatchResult", "MatchResult.Destructured",
)

val allBuiltinNames = setOf(
"println", "print", "readLine", "readText", "mutableMapOf", "mapOf", "mutableListOf", "listOf",
"mutableSetOf", "setOf", "arrayOf", "arrayOfNulls", "sequenceOf", "emptySequence", "emptyList",
"listOfNotNull", "emptyMap", "mapOfNotNull", "emptySet", "setOfNotNull", "error", "require",
"requireNotNull", "check", "checkNotNull", "assert", "assertNotNull", "generateSequence",
)

val allNames = officialKotlinKeywords + allBuiltinNames + allBuiltinTypes + commonKotlinKeywords

0 comments on commit 3b7e5e8

Please sign in to comment.