Skip to content

Kotlin: Add LighterAST support to numlines extraction #14887

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ open class KotlinFileExtractor(
val lighterAstCommentsExtracted = CommentExtractorLighterAST(this, file, tw.fileId).extract()
if (psiCommentsExtracted == lighterAstCommentsExtracted) {
if (psiCommentsExtracted) {
logger.warnElement("Found both PSI and LightAST comments in ${file.path}.", file)
logger.warnElement("Found both PSI and LighterAST comments in ${file.path}.", file)
} else {
logger.warnElement("Comments could not be processed in ${file.path}.", file)
}
Expand Down
130 changes: 10 additions & 120 deletions java/kotlin-extractor/src/main/kotlin/LinesOfCode.kt
Original file line number Diff line number Diff line change
@@ -1,138 +1,28 @@
package com.github.codeql

import com.github.codeql.utils.versions.getPsi2Ir
import com.intellij.psi.PsiComment
import com.intellij.psi.PsiElement
import com.intellij.psi.PsiWhiteSpace
import org.jetbrains.kotlin.config.KotlinCompilerVersion
import org.jetbrains.kotlin.ir.IrElement
import org.jetbrains.kotlin.ir.declarations.*
import org.jetbrains.kotlin.kdoc.psi.api.KDocElement
import org.jetbrains.kotlin.psi.KtCodeFragment
import org.jetbrains.kotlin.psi.KtVisitor

class LinesOfCode(
val logger: FileLogger,
val tw: FileTrapWriter,
val file: IrFile
) {
val psi2Ir = getPsi2Ir().also {
if (it == null) {
logger.warn("Lines of code will not be populated as Kotlin version is too old (${KotlinCompilerVersion.getVersion()})")
}
}
val linesOfCodePSI = LinesOfCodePSI(logger, tw, file)
val linesOfCodeLighterAST = LinesOfCodeLighterAST(logger, tw, file)

fun linesOfCodeInFile(id: Label<DbFile>) {
if (psi2Ir == null) {
return
val psiExtracted = linesOfCodePSI.linesOfCodeInFile(id)
val lighterASTExtracted = linesOfCodeLighterAST.linesOfCodeInFile(id)
if (psiExtracted && lighterASTExtracted) {
logger.warnElement("Both PSI and LighterAST number-of-lines-in-file information for ${file.path}.", file)
}
val ktFile = psi2Ir.getKtFile(file)
if (ktFile == null) {
return
}
linesOfCodeInPsi(id, ktFile, file)
}

fun linesOfCodeInDeclaration(d: IrDeclaration, id: Label<out DbSourceline>) {
if (psi2Ir == null) {
return
}
val p = psi2Ir.findPsiElement(d, file)
if (p == null) {
return
}
linesOfCodeInPsi(id, p, d)
}

private fun linesOfCodeInPsi(id: Label<out DbSourceline>, root: PsiElement, e: IrElement) {
val document = root.getContainingFile().getViewProvider().getDocument()
if (document == null) {
logger.errorElement("Cannot find document for PSI", e)
tw.writeNumlines(id, 0, 0, 0)
return
val psiExtracted = linesOfCodePSI.linesOfCodeInDeclaration(d, id)
val lighterASTExtracted = linesOfCodeLighterAST.linesOfCodeInDeclaration(d, id)
if (psiExtracted && lighterASTExtracted) {
logger.warnElement("Both PSI and LighterAST number-of-lines-in-file information for declaration.", d)
}

val rootRange = root.getTextRange()
val rootFirstLine = document.getLineNumber(rootRange.getStartOffset())
val rootLastLine = document.getLineNumber(rootRange.getEndOffset())
if (rootLastLine < rootFirstLine) {
logger.errorElement("PSI ends before it starts", e)
tw.writeNumlines(id, 0, 0, 0)
return
}
val numLines = 1 + rootLastLine - rootFirstLine
val lineContents = Array(numLines) { LineContent() }

val visitor =
object : KtVisitor<Unit, Unit>() {
override fun visitElement(element: PsiElement) {
val isComment = element is PsiComment
// Comments may include nodes that aren't PsiComments,
// so we don't want to visit them or we'll think they
// are code.
if (!isComment) {
element.acceptChildren(this)
}

if (element is PsiWhiteSpace) {
return
}
// Leaf nodes are assumed to be tokens, and
// therefore we count any lines that they are on.
// For comments, we actually need to look at the
// outermost node, as the leaves of KDocs don't
// necessarily cover all lines.
if (isComment || element.getChildren().size == 0) {
val range = element.getTextRange()
val startOffset = range.getStartOffset()
val endOffset = range.getEndOffset()
// The PSI doesn't seem to have anything like
// the IR's UNDEFINED_OFFSET and SYNTHETIC_OFFSET,
// but < 0 still seem to represent bad/unknown
// locations.
if (startOffset < 0 || endOffset < 0) {
logger.errorElement("PSI has negative offset", e)
return
}
if (startOffset > endOffset) {
return
}
// We might get e.g. an import list for a file
// with no imports, which claims to have start
// and end offsets of 0. Anything of 0 width
// we therefore just skip.
if (startOffset == endOffset) {
return
}
val firstLine = document.getLineNumber(startOffset)
val lastLine = document.getLineNumber(endOffset)
if (firstLine < rootFirstLine) {
logger.errorElement("PSI element starts before root", e)
return
} else if (lastLine > rootLastLine) {
logger.errorElement("PSI element ends after root", e)
return
}
for (line in firstLine..lastLine) {
val lineContent = lineContents[line - rootFirstLine]
if (isComment) {
lineContent.containsComment = true
} else {
lineContent.containsCode = true
}
}
}
}
}
root.accept(visitor)
val total = lineContents.size
val code = lineContents.count { it.containsCode }
val comment = lineContents.count { it.containsComment }
tw.writeNumlines(id, total, code, comment)
}

private class LineContent {
var containsComment = false
var containsCode = false
}
}
153 changes: 153 additions & 0 deletions java/kotlin-extractor/src/main/kotlin/LinesOfCodePSI.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package com.github.codeql

import com.github.codeql.utils.versions.getPsi2Ir
import com.intellij.psi.PsiComment
import com.intellij.psi.PsiElement
import com.intellij.psi.PsiWhiteSpace
import org.jetbrains.kotlin.config.KotlinCompilerVersion
import org.jetbrains.kotlin.ir.IrElement
import org.jetbrains.kotlin.ir.declarations.*
import org.jetbrains.kotlin.kdoc.psi.api.KDocElement
import org.jetbrains.kotlin.psi.KtCodeFragment
import org.jetbrains.kotlin.psi.KtVisitor

class LinesOfCodePSI(
val logger: FileLogger,
val tw: FileTrapWriter,
val file: IrFile
) {
val psi2Ir = getPsi2Ir().also {
if (it == null) {
logger.warn("Lines of code will not be populated as Kotlin version is too old (${KotlinCompilerVersion.getVersion()})")
}
}

fun linesOfCodeInFile(id: Label<DbFile>): Boolean {
if (psi2Ir == null) {
return false
}
val ktFile = psi2Ir.getKtFile(file)
if (ktFile == null) {
return false
}
linesOfCodeInPsi(id, ktFile, file)
// Even if linesOfCodeInPsi didn't manage to extract any
// information, if we got as far as calling it then we have
// PSI info for the file
return true
}

fun linesOfCodeInDeclaration(d: IrDeclaration, id: Label<out DbSourceline>): Boolean {
if (psi2Ir == null) {
return false
}
val p = psi2Ir.findPsiElement(d, file)
if (p == null) {
return false
}
linesOfCodeInPsi(id, p, d)
// Even if linesOfCodeInPsi didn't manage to extract any
// information, if we got as far as calling it then we have
// PSI info for the declaration
return true
}

private fun linesOfCodeInPsi(id: Label<out DbSourceline>, root: PsiElement, e: IrElement) {
val document = root.getContainingFile().getViewProvider().getDocument()
if (document == null) {
logger.errorElement("Cannot find document for PSI", e)
tw.writeNumlines(id, 0, 0, 0)
return
}

val rootRange = root.getTextRange()
val rootStartOffset = rootRange.getStartOffset()
val rootEndOffset = rootRange.getEndOffset()
if (rootStartOffset < 0 || rootEndOffset < 0) {
// This is synthetic, or has an invalid location
tw.writeNumlines(id, 0, 0, 0)
return
}
val rootFirstLine = document.getLineNumber(rootStartOffset)
val rootLastLine = document.getLineNumber(rootEndOffset)
if (rootLastLine < rootFirstLine) {
logger.errorElement("PSI ends before it starts", e)
tw.writeNumlines(id, 0, 0, 0)
return
}
val numLines = 1 + rootLastLine - rootFirstLine
val lineContents = Array(numLines) { LineContent() }

val visitor =
object : KtVisitor<Unit, Unit>() {
override fun visitElement(element: PsiElement) {
val isComment = element is PsiComment
// Comments may include nodes that aren't PsiComments,
// so we don't want to visit them or we'll think they
// are code.
if (!isComment) {
element.acceptChildren(this)
}

if (element is PsiWhiteSpace) {
return
}
// Leaf nodes are assumed to be tokens, and
// therefore we count any lines that they are on.
// For comments, we actually need to look at the
// outermost node, as the leaves of KDocs don't
// necessarily cover all lines.
if (isComment || element.getChildren().size == 0) {
val range = element.getTextRange()
val startOffset = range.getStartOffset()
val endOffset = range.getEndOffset()
// The PSI doesn't seem to have anything like
// the IR's UNDEFINED_OFFSET and SYNTHETIC_OFFSET,
// but < 0 still seem to represent bad/unknown
// locations.
if (startOffset < 0 || endOffset < 0) {
logger.errorElement("PSI element has negative offset", e)
return
}
if (startOffset > endOffset) {
logger.errorElement("PSI element has negative size", e)
return
}
// We might get e.g. an import list for a file
// with no imports, which claims to have start
// and end offsets of 0. Anything of 0 width
// we therefore just skip.
if (startOffset == endOffset) {
return
}
val firstLine = document.getLineNumber(startOffset)
val lastLine = document.getLineNumber(endOffset)
if (firstLine < rootFirstLine) {
logger.errorElement("PSI element starts before root", e)
return
} else if (lastLine > rootLastLine) {
logger.errorElement("PSI element ends after root", e)
return
}
for (line in firstLine..lastLine) {
val lineContent = lineContents[line - rootFirstLine]
if (isComment) {
lineContent.containsComment = true
} else {
lineContent.containsCode = true
}
}
}
}
}
root.accept(visitor)
val code = lineContents.count { it.containsCode }
val comment = lineContents.count { it.containsComment }
tw.writeNumlines(id, numLines, code, comment)
}

private class LineContent {
var containsComment = false
var containsCode = false
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package com.github.codeql

import org.jetbrains.kotlin.ir.declarations.*

class LinesOfCodeLighterAST(
val logger: FileLogger,
val tw: FileTrapWriter,
val file: IrFile
) {
// We don't support LighterAST with old Kotlin versions
fun linesOfCodeInFile(@Suppress("UNUSED_PARAMETER") id: Label<DbFile>): Boolean {
return false
}

// We don't support LighterAST with old Kotlin versions
fun linesOfCodeInDeclaration(@Suppress("UNUSED_PARAMETER") d: IrDeclaration, @Suppress("UNUSED_PARAMETER") id: Label<out DbSourceline>): Boolean {
return false
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ import org.jetbrains.kotlin.ir.visitors.acceptVoid
import org.jetbrains.kotlin.ir.visitors.IrElementVisitorVoid
import org.jetbrains.kotlin.kdoc.lexer.KDocTokens
import org.jetbrains.kotlin.lexer.KtTokens
import org.jetbrains.kotlin.psi.psiUtil.endOffset
import org.jetbrains.kotlin.psi.psiUtil.startOffset
import org.jetbrains.kotlin.util.getChildren

class CommentExtractorLighterAST(fileExtractor: KotlinFileExtractor, file: IrFile, fileLabel: Label<out DbFile>): CommentExtractor(fileExtractor, file, fileLabel) {
Expand Down
Loading