1. Adding dependencies:

In [32]:
@file:DependsOn("dev.langchain4j:langchain4j:1.6.0")
@file:DependsOn("dev.langchain4j:langchain4j-open-ai:1.6.0")
@file:DependsOn("dev.langchain4j:langchain4j-http-client-jdk:1.6.0")

2. Setup networking layer

In [33]:
import java.time.Duration
import java.net.http.HttpClient
import dev.langchain4j.model.openai.OpenAiChatModel
import dev.langchain4j.http.client.jdk.JdkHttpClient

val durationLimit = Duration.ofMinutes(20)
val httpClientBuilder = HttpClient.newBuilder()
    .version(HttpClient.Version.HTTP_1_1)

val jdkHttpClientBuilder = JdkHttpClient.builder()
    .httpClientBuilder(httpClientBuilder)

// Connect to LM Studio Server
val modelBuilder = OpenAiChatModel.builder()
    .baseUrl("http://127.0.0.1:1234/v1")
    .httpClientBuilder(jdkHttpClientBuilder)
    .timeout(durationLimit)
    .temperature(0.0)
    .returnThinking(false)


3. Setup AI layer

In [34]:
import dev.langchain4j.service.Result

val modelList = listOf(
    "microsoft/phi-4",
    "openai/gpt-oss-20b",
    "mistralai/devstral-small-2-2512",
    "google/gemma-3-27b",
    "qwen/qwen3-coder-30b",
)

interface CodeGenAiService {
    fun generateCode(prompt: String): Result<String>
}

data class Task(
    val systemPromptPath: String = "",
    val promptPath: String = "",
    val outputDirectory: String = "",
    val extension: String = "",
)

val basePromptPath = "../../resources/prompts"
val baseBuildPath = "../../build"

val taskList = listOf<Task>(
    Task("$basePromptPath/system-prompt-kotlin.md", "$basePromptPath/test1-preview.md", "$baseBuildPath/test1-preview", "kt"),
    Task("$basePromptPath/system-prompt-kotlin.md", "$basePromptPath/test2-unit-test.md", "$baseBuildPath/test2-unit-test", "kt"),
    Task("$basePromptPath/system-prompt-kotlin.md", "$basePromptPath/test3-instrumentation-test.md", "$baseBuildPath/test3-instrumentation-test", "kt"),
    Task("$basePromptPath/system-prompt-diff.md", "$basePromptPath/test4-deprecated-material.md", "$baseBuildPath/test4-deprecated-material", "diff"),
    Task("$basePromptPath/system-prompt-diff.md", "$basePromptPath/test5-deprecated-plugin.md", "$baseBuildPath/test5-deprecated-plugin", "diff"),
)

4. Setup resource monitor and utilities

In [35]:
import java.io.File
import kotlin.concurrent.thread
import kotlin.io.path.Path
import kotlin.io.path.absolutePathString
import kotlin.io.path.createDirectories
import kotlin.io.path.writeText

data class PeakStats<T>(
    val result: T,
    val durationSeconds: UInt,
    val startRamGb: Double,
    val peakRamGb: Double,
    val startVramGb: Double,
    val peakVramGb: Double
)

class ResourceMonitor {

    private val vramFile: File? by lazy {
        File("/sys/class/drm").listFiles()
            ?.filter { it.name.startsWith("card") && !it.name.contains("-") }
            ?.maxByOrNull { card ->
                File(card, "device/mem_info_vram_total").let {
                    if (it.exists()) it.readText().trim().toLongOrNull() ?: 0L else 0L
                }
            }?.let { File(it, "device/mem_info_vram_used") }
    }

    fun <T> measurePeakDelta(block: () -> T): PeakStats<T> {
        val startRam = getUsedRamGb()
        val startVram = getUsedVramGb()

        var peakRam = startRam
        var peakVram = startVram
        var running = true

        val monitorThread = thread {
            while (running) {
                peakRam = max(peakRam, getUsedRamGb())
                peakVram = max(peakVram, getUsedVramGb())
                Thread.sleep(100)
            }
        }

        val startTime = System.currentTimeMillis()
        val result = try {
            block()
        } finally {
            running = false
            monitorThread.join()
        }
        val durationSeconds = ((System.currentTimeMillis() - startTime) / 1000).toUInt()

        return PeakStats(
            result = result,
            durationSeconds = durationSeconds,
            startRamGb = startRam,
            peakRamGb = peakRam,
            startVramGb = startVram,
            peakVramGb = peakVram
        )
    }

    /**
     * Returns actual RAM used by applications (excluding buffers/cache).
     * Uses MemTotal - MemAvailable from /proc/meminfo for accurate measurement.
     */
    private fun getUsedRamGb(): Double {
        val memInfo = File("/proc/meminfo").readLines()
            .mapNotNull { line ->
                val parts = line.split(":", limit = 2)
                if (parts.size == 2) {
                    val key = parts[0].trim()
                    val value = parts[1].trim().split(" ")[0].toLongOrNull()
                    if (value != null) key to value else null
                } else null
            }
            .toMap()

        val total = memInfo["MemTotal"] ?: return 0.0
        val available = memInfo["MemAvailable"] ?: return 0.0

        // Convert from KB to GB
        return (total - available) / (1024.0 * 1024.0)
    }

    /**
     * Returns VRAM used by AMD GPU in GB.
     * Reads from /sys/class/drm/cardX/device/mem_info_vram_used
     */
    private fun getUsedVramGb(): Double {
        val bytes = vramFile?.readText()?.trim()?.toLongOrNull() ?: 0L
        return bytes / (1024.0 * 1024.0 * 1024.0)
    }
}

fun String.saveToFile(folderName: String, outputName: String) {
    val folderPath = Path(folderName)
    folderPath.createDirectories()

    val filePath = Path("$folderPath/$outputName")
    filePath.writeText(this)

    println("Saved to: ${filePath.absolutePathString()}")
}

fun String.sanitizeForFilename(): String = replace("/", "_")


5. Execute and store the results

In [36]:
import dev.langchain4j.service.AiServices

data class ModelExecutionResult(
    val modelName: String,
    val durationSeconds: UInt,
    val inputTokenCount: UInt,
    val outputTokenCount: UInt,
    val totalTokenCount: UInt,
    val startRamGb: Double,
    val peakRamGb: Double,
    val startVramGb: Double,
    val peakVramGb: Double,
    val resultPath: String,
) {
    fun toCsvRow(): String =
        "$modelName,$durationSeconds,$inputTokenCount,$outputTokenCount,$totalTokenCount," +
                "${"%.2f".format(startRamGb)},${"%.2f".format(peakRamGb)}," +
                "${"%.2f".format(startVramGb)},${"%.2f".format(peakVramGb)},$resultPath"

    companion object {
        const val CSV_HEADER = "modelName,durationSeconds,inputTokenCount,outputTokenCount,totalTokenCount,startRamGb,peakRamGb,startVramGb,peakVramGb,resultPath"
    }
}

val monitor = ResourceMonitor()

fun createService(
    modelName: String,
    systemPromptPath: String,
): CodeGenAiService {
    val model = modelBuilder.modelName(modelName).build()
    val systemPrompt = File(systemPromptPath).readText().trimIndent()
    return AiServices.builder(CodeGenAiService::class.java)
        .systemMessageProvider { systemPrompt }
        .chatModel(model)
        .build()
}

taskList.forEach { task ->
    modelList.mapIndexed { index, modelName ->
        val service = createService(modelName, task.systemPromptPath)
        val userPrompt = File(task.promptPath).readText().trimIndent()
        val stats: PeakStats<Result<String>> = monitor.measurePeakDelta { service.generateCode(userPrompt) }

        val path = "result${index + 1}-${modelName.sanitizeForFilename()}.${task.extension}"
        stats.result.content().saveToFile(task.outputDirectory, path)

        ModelExecutionResult(
            modelName = modelName,
            inputTokenCount = stats.result.tokenUsage().inputTokenCount().toUInt(),
            outputTokenCount = stats.result.tokenUsage().outputTokenCount().toUInt(),
            totalTokenCount = stats.result.tokenUsage().totalTokenCount().toUInt(),
            durationSeconds = stats.durationSeconds,
            startRamGb = stats.startRamGb,
            peakRamGb = stats.peakRamGb,
            startVramGb = stats.startVramGb,
            peakVramGb = stats.peakVramGb,
            resultPath = path
        ).also {
            // Allow VRAM to goes back to normal
            Thread.sleep(80_000)
        }
    }.joinToString(
        separator = "\n",
        prefix = "${ModelExecutionResult.CSV_HEADER}\n",
        transform = ModelExecutionResult::toCsvRow
    ).also { it.saveToFile(task.outputDirectory, "execution-results.csv") }
}

Saved to: /home/bazzite/IdeaProjects/notebooks/local-llm-for-android/notebooks/kotlin/../../build/test1-preview/result1-mellum-4b-sft-kotlin.kt
Saved to: /home/bazzite/IdeaProjects/notebooks/local-llm-for-android/notebooks/kotlin/../../build/test1-preview/result2-microsoft_phi-4.kt
Saved to: /home/bazzite/IdeaProjects/notebooks/local-llm-for-android/notebooks/kotlin/../../build/test1-preview/result3-openai_gpt-oss-20b.kt
Saved to: /home/bazzite/IdeaProjects/notebooks/local-llm-for-android/notebooks/kotlin/../../build/test1-preview/result4-mistralai_devstral-small-2-2512.kt
Saved to: /home/bazzite/IdeaProjects/notebooks/local-llm-for-android/notebooks/kotlin/../../build/test1-preview/result5-google_gemma-3-27b.kt
Saved to: /home/bazzite/IdeaProjects/notebooks/local-llm-for-android/notebooks/kotlin/../../build/test1-preview/result6-qwen_qwen3-coder-30b.kt
Saved to: /home/bazzite/IdeaProjects/notebooks/local-llm-for-android/notebooks/kotlin/../../build/test1-preview/result7-nvidia_nemotro

dev.langchain4j.exception.InvalidRequestException: {"error":"Model unloaded."}