In [1]:
%useLatestDescriptors
%use langchain4j

In [2]:
val openAiApiKey = System.getenv("OPENAI_API_KEY") ?: "YOUR-OPENAI-API-KEY"

/**
 * A function to split text into chunks.
 * For simplicity, this example splits the text by sentences ('.')
 * and then re-joins them into chunks if the chunk size is under a limit.
 */
fun splitIntoChunks(text: String, maxTokensPerChunk: Int = 300): List<String> {
    // Very simplified approach:
    val sentences = text.split(".")
    val chunks = mutableListOf<String>()
    val currentChunk = StringBuilder()

    for (sentence in sentences) {
        val potentialChunk = if (currentChunk.isEmpty()) sentence else "${currentChunk.trim()}. $sentence"
        // Here you would estimate token count; for simplicity, we use character length.
        // For robust token-based splits, you could integrate a tokenizer class from the library.
        if (potentialChunk.length < maxTokensPerChunk) {
            if (currentChunk.isNotEmpty()) {
                currentChunk.append(". ")
            }
            currentChunk.append(sentence)
        } else {
            // Add the current chunk and start a new one
            chunks.add(currentChunk.toString())
            currentChunk.clear()
            currentChunk.append(sentence)
        }
    }
    // Add the last chunk if it’s not empty
    if (currentChunk.isNotEmpty()) {
        chunks.add(currentChunk.toString())
    }
    return chunks
}


In [3]:
import dev.langchain4j.data.message.UserMessage.userMessage
import dev.langchain4j.model.chat.request.ChatRequest
import dev.langchain4j.model.openai.OpenAiChatModel

/**
 * A pseudo function that sends the prompt to an OpenAI LLM and returns the completion.
 * In a real scenario, you’d use something like:
 * OpenAiService.builder()
 *   .openAiApiKey(openAiApiKey)
 *   .build()
 * and then create an LLM chain. This is just a placeholder for demonstration.
 */
fun summarizeChunkWithOpenAI(chunk: String, openAiApiKey: String): String {
    val openAi = OpenAiChatModel.builder()
        .apiKey(openAiApiKey)
        .modelName("gpt-4")
        .temperature(0.7)
        .build()

    val prompt = "Please summarize the following text:\n\n$chunk"

    // Send the request to get the actual summary
    val response = openAi.chat(userMessage(prompt))

    // Extract the content from the response
    return response.aiMessage().text()
}


In [4]:
/**
 * Summarizes a large document by splitting it into chunks, summarizing each chunk,
 * and combining the results.
 */
fun summarizeDocument(
    text: String,
    detail: Double = 0.0,
    maxTokensPerChunk: Int = 500
): String {
    require(detail in 0.0..1.0) { "Detail must be between 0.0 and 1.0" }

    // Split the text into an initial set of chunks
    val initialChunks = splitIntoChunks(text, maxTokensPerChunk)
    // Interpolate number of chunks based on the detail desired
    val maxChunks = initialChunks.size
    val minChunks = 1
    val targetChunksCount = (minChunks + detail * (maxChunks - minChunks)).toInt().coerceAtLeast(1)

    // Recalculate chunk size to approximate the target number of chunks
    // (For a real application, you might do more advanced splitting)
    val totalLength = text.length
    val adjustedChunkSize = (totalLength / targetChunksCount).coerceAtLeast(200)
    val finalChunks = splitIntoChunks(text, adjustedChunkSize)

    // Summarize each chunk individually
    val chunkSummaries = finalChunks.map { chunk ->
        summarizeChunkWithOpenAI(chunk, openAiApiKey)
    }

    // Combine all chunk summaries into a final summary
    return chunkSummaries.joinToString(separator = "\n\n")
}


In [5]:
import java.io.File

val artificialIntelligenceWikipediaText = File("data/artificial_intelligence_wikipedia.txt").readText(Charsets.UTF_8)


In [6]:
val summaryDetail = summarizeDocument(artificialIntelligenceWikipediaText, detail = 1.0)

In [7]:
println("Summary with detail=1.0:\n$summaryDetail")

Summary with detail=1.0:
Artificial intelligence (AI) is the display of intelligence by machines, especially computer systems. It is a research field within computer science that focuses on creating and studying methods and software that allow machines to understand their environment and use learning and intelligence to perform actions that increase their likelihood of achieving set goals.

. , self-driving cars); and applications in healthcare, defense, and financial services. These applications are often referred to as AI technology.

The text discusses the use of artificial intelligence (AI) in different fields such as autonomous vehicles, creative tools, and strategy games. However, it also points out that many AI applications are not recognized as such. This is because once AI becomes extremely useful and common in a particular application, it is no longer labeled as AI.

Alan Turing was the pioneer of substantial research in what he labeled as machine intelligence. The academic d