diff --git a/build.gradle.kts b/build.gradle.kts index aab1944b..8065d2f9 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -202,6 +202,7 @@ tasks { "localizedSyntaxRepair" to "edu.mcgill.cstk.experiments.repair.LocalizedSyntaxRepairKt", "syntheticSyntaxRepair" to "edu.mcgill.cstk.experiments.repair.SyntheticSyntaxRepairKt", "organicSyntaxRepair" to "edu.mcgill.cstk.experiments.repair.OrganicSyntaxRepairKt", + "kotlinFunctionRepair" to "edu.mcgill.cstk.experiments.repair.KotlinFunctionRepairKt", "kotlinStatementRepair" to "edu.mcgill.cstk.experiments.repair.KotlinStatementRepairKt", "pythonStatementRepair" to "edu.mcgill.cstk.experiments.repair.PythonStatementRepairKt", "extractRepairSamples" to "edu.mcgill.cstk.experiments.repair.ExtractRepairSamplesKt", diff --git a/galoisenne b/galoisenne index 7d859a75..9e8e1a3e 160000 --- a/galoisenne +++ b/galoisenne @@ -1 +1 @@ -Subproject commit 7d859a757419e7b5a13ad15bcd765fac80b23287 +Subproject commit 9e8e1a3e1e0cef4491354e62bf9dcddc4ac69ead diff --git a/src/main/kotlin/edu/mcgill/cstk/experiments/repair/KotlinFunctionRepair.kt b/src/main/kotlin/edu/mcgill/cstk/experiments/repair/KotlinFunctionRepair.kt new file mode 100644 index 00000000..94e3604a --- /dev/null +++ b/src/main/kotlin/edu/mcgill/cstk/experiments/repair/KotlinFunctionRepair.kt @@ -0,0 +1,45 @@ +package edu.mcgill.cstk.experiments.repair + +import javax.tools.* +import org.jetbrains.kotlin.cli.common.arguments.K2JVMCompilerArguments +import org.jetbrains.kotlin.cli.common.messages.* +import org.jetbrains.kotlin.cli.jvm.K2JVMCompiler +import org.jetbrains.kotlin.config.* +import java.io.* +import kotlin.system.measureTimeMillis + +val javaCompiler: JavaCompiler = ToolProvider.getSystemJavaCompiler() + +/* +./gradlew kotlinFunctionRepair + */ + +fun main() { + // Write simple file: + for (i in 0..100) { + // Don't actually create the file on disk, but a virtual file + measureTimeMillis { + println("typealias KWIndex = List>".isCompilableKotlin()) + }.also { println("Millis: $it") } + } +} + +fun String.isCompilableKotlin(): Boolean = K2JVMCompiler().run { + val args = K2JVMCompilerArguments().apply { + val file = createTempFile(suffix = ".kt").apply { writeText(this@isCompilableKotlin) } + freeArgs = listOf(file.absolutePath) + classpath = System.getProperty("java.class.path") + .split(System.getProperty("path.separator")) + .filter { File(it).exists() && File(it).canRead() }.joinToString(":") + noStdlib = true + noReflect = true + reportPerf = true + } +// output.deleteOnExit() + execImpl( + PrintingMessageCollector( + System.out, + MessageRenderer.WITHOUT_PATHS, true), + Services.EMPTY, + args) +}.code == 0 \ No newline at end of file diff --git a/src/main/kotlin/edu/mcgill/cstk/experiments/repair/KotlinStatementRepair.kt b/src/main/kotlin/edu/mcgill/cstk/experiments/repair/KotlinStatementRepair.kt index 55e89d15..746f3663 100644 --- a/src/main/kotlin/edu/mcgill/cstk/experiments/repair/KotlinStatementRepair.kt +++ b/src/main/kotlin/edu/mcgill/cstk/experiments/repair/KotlinStatementRepair.kt @@ -5,7 +5,6 @@ import ai.hypergraph.kaliningraph.parsing.* import ai.hypergraph.markovian.mcmc.* import bijectiveRepair import edu.mcgill.cstk.utils.* -import org.apache.commons.io.output.NullOutputStream import org.intellij.lang.annotations.Language import org.jetbrains.kotlin.spec.grammar.tools.* import java.io.* @@ -36,8 +35,9 @@ class FilteredOutputStream(private val out: OutputStream) : PrintStream(out) { override fun println(x: String?) { if (x == null) return if (x.toString().let { +// it.startsWith("logging: ") || it.startsWith("Parser error:") || - it.startsWith("Lexer error:") + it.startsWith("Lexer error:") }) return super.println(x) } @@ -80,27 +80,32 @@ fun main() { .forEach { (original, prompt) -> println("Original: $original\nCorrupted: ${prettyDiffNoFrills(original, prompt)}") val startTime = System.currentTimeMillis() - parallelRepairKotlinStatement(prompt, deck, scoreEdit).also { -// repairKotlinStatement(prompt).also { - val contained = original in it - val elapsed = System.currentTimeMillis() - startTime + parallelRepairKotlinStatement(prompt, deck, scoreEdit) +// .filter { it.isCompilableKotlin() } + .also { + // repairKotlinStatement(prompt).also { + val contained = original in it + val elapsed = System.currentTimeMillis() - startTime - val toTake = 20 - println("\nTop $toTake repairs:\n") - it.take(toTake).forEach { - println("Δ=${levenshtein(prompt, it)} repair: ${prettyDiffNoFrills(prompt, it)}") -// println("(LATEX) Δ=${levenshtein(prompt, it)} repair: ${latexDiffSingleLOC(prompt, it)}") - } + val toTake = 20 + println("\nTop $toTake repairs:\n") + it.take(toTake).forEach { + println("Δ=${levenshtein(prompt, it)} repair: ${prettyDiffNoFrills(prompt, it)}") + // println("(LATEX) Δ=${levenshtein(prompt, it)} repair: ${latexDiffSingleLOC(prompt, it)}") + } - println("Found ${it.size} valid repairs in ${elapsed}ms, or roughly " + - "${(it.size / (elapsed/1000.0)).toString().take(5)} repairs per second.") - println("Original string was ${if (contained) "#${it.indexOf(original)}" else "NOT"} in repair proposals!\n") + println("Found ${it.size} valid repairs in ${elapsed}ms, or roughly " + + "${(it.size / (elapsed/1000.0)).toString().take(5)} repairs per second.") + println("Original string was ${if (contained) "#${it.indexOf(original)}" else "NOT"} in repair proposals!\n") + } } - } } +val projectDir = File(File("").absolutePath) +val allProjectsDir = projectDir.parentFile + fun collectMostCommonKeywords() { - File(File("").absolutePath).parentFile.also { println("Working directory: $it") } + projectDir.also { println("Working directory: $it") } .walkTopDown().filter { it.extension == "kt" } .flatMap { it.readLines() } .filter { it.isValidKotlin() } @@ -119,15 +124,17 @@ private fun constructScoringFunction(): (Σᐩ) -> Double = // Get top level directory and all Kotlin files in all subdirectories fun fetchKotlinExamples() = - File(File("").absolutePath).parentFile - .also { println("Working directory: $it") } + allProjectsDir.also { println("Working directory: $it") } .walkTopDown().asSequence() .filter { it.extension == "kt" } .flatMap { it.readLines() } .filter { it.isValidKotlin() } - .map { it.coarsenAsKotlin() } .filter { str -> ignoredKeywords.none { it in str } } +// .filter { str -> str.lexAsKotlin().filter { it.isNotBlank() }.all { it in allNames } } +// .filter { it.isCompilableKotlin() } + .map { it.coarsenAsKotlin() } .map { it.trim() }.distinct() +// .take(10) fun Σᐩ.coarsenAsKotlin(lex: Boolean = true): Σᐩ = (if(lex) lexAsKotlin() else tokenizeByWhitespace()).joinToString(" ") { @@ -231,20 +238,6 @@ private fun bruteForceKotlinRepair(clock: TimeMark): CFG.(List<Σᐩ>) -> Sequen } catch (e: Exception) { e.printStackTrace(); emptySequence()} } -val ignoredKeywords = - setOf("import", "package", "//", "/*", "\"", "\'", "\\`", "data", "_") - -val officialKotlinKeywords = setOf( - "as", "as?", "break", "class", "continue", "do", "else", "false", "for", "fun", "if", "in", - "!in", "interface", "is", "!is", "null", "object", "package", "return", "super", "this", - "throw", "true", "try", "typealias", "val", "var", "when", "while", "by", "catch", "constructor", - "delegate", "dynamic", "field", "file", "finally", "get", "import", "init", "param", "property", - "receiver", "set", "setparam", "where", "actual", "abstract", "annotation", "companion", - "const", "crossinline", "data", "enum", "expect", "external", "final", "infix", "inline", - "inner", "internal", "lateinit", "noinline", "open", "operator", "out", "override", "private", - "protected", "public", "reified", "sealed", "suspend", "tailrec", "vararg", "field", "it" -) - fun Σᐩ.isValidKotlin(): Boolean = try { parseKotlinCode(tokenizeKotlinCode(this)).let { true } } catch (_: Throwable) { false } @@ -1081,4 +1074,36 @@ val commonKotlinKeywords: Set<Σᐩ> = coarsenedKotlinLines val permissiveKotlinCFG = """ START -> START START START -> ${commonKotlinKeywords.joinToString(" | ") { it } } -""".parseCFG().apply { blocked.add("w") } \ No newline at end of file +""".parseCFG().apply { blocked.add("w") } + + +val ignoredKeywords = + setOf("import", "package", "//", "/*", "\"", "\'", "\\`", "data", "_") + +val officialKotlinKeywords = setOf( + "as", "as?", "break", "class", "continue", "do", "else", "false", "for", "fun", "if", "in", + "!in", "interface", "is", "!is", "null", "object", "package", "return", "super", "this", + "throw", "true", "try", "typealias", "val", "var", "when", "while", "by", "catch", "constructor", + "delegate", "dynamic", "field", "file", "finally", "get", "import", "init", "param", "property", + "receiver", "set", "setparam", "where", "actual", "abstract", "annotation", "companion", + "const", "crossinline", "data", "enum", "expect", "external", "final", "infix", "inline", + "inner", "internal", "lateinit", "noinline", "open", "operator", "out", "override", "private", + "protected", "public", "reified", "sealed", "suspend", "tailrec", "vararg", "field", "it" +) + +val allBuiltinTypes = setOf( + "Any", "Boolean", "Byte", "Char", "Double", "Float", "Int", "Long", "Nothing", "Short", "String", + "Unit", "Array", "BooleanArray", "ByteArray", "CharArray", "DoubleArray", "FloatArray", + "IntArray", "LongArray", "ShortArray", "List", "Map", "MutableList", "MutableMap", "MutableSet", + "Set", "Sequence", "StringBuffer", "StringBuilder", "Triple", "Pair", "Exception", "Throwable", + "Regex", "RegexOption", "MatchGroup", "MatchGroupCollection", "MatchResult", "MatchResult.Destructured", +) + +val allBuiltinNames = setOf( + "println", "print", "readLine", "readText", "mutableMapOf", "mapOf", "mutableListOf", "listOf", + "mutableSetOf", "setOf", "arrayOf", "arrayOfNulls", "sequenceOf", "emptySequence", "emptyList", + "listOfNotNull", "emptyMap", "mapOfNotNull", "emptySet", "setOfNotNull", "error", "require", + "requireNotNull", "check", "checkNotNull", "assert", "assertNotNull", "generateSequence", +) + +val allNames = officialKotlinKeywords + allBuiltinNames + allBuiltinTypes + commonKotlinKeywords \ No newline at end of file