In [13]:
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.async
import kotlinx.coroutines.awaitAll
import kotlinx.coroutines.runBlocking
import com.google.common.hash.Hashing

In [14]:
import io.github.cdimascio.dotenv.Dotenv
import java.nio.file.Paths

val dotenv = Dotenv.load()
val dataDir = dotenv.get("DATA_DIR").let { Paths.get(it).toFile() }.also { it.mkdirs() }
dataDir

/home/j.zeidler@GDAAG.DE/private/ba3/bachelor-thesis/data

In [15]:
import org.bson.BsonDocument
import org.litote.kmongo.*

val client = KMongo.createClient("mongodb://localhost:42692/")
val db = client.getDatabase("s5_snyk_libio")
val mvnEcoVuln = db.getCollection<BsonDocument>("mvnEcoVuln")

In [16]:
import com.mongodb.client.*

private class MongoCursorIterable<T>(private val cursor: MongoCursor<T>) : MongoCursor<T> by cursor, Iterable<T> {

    override fun iterator(): Iterator<T> = cursor
}

private fun <T> MongoIterable<T>.kCursor(): MongoCursorIterable<T> = MongoCursorIterable(iterator())

fun <T, R> MongoIterable<T>.useCursor(block: (Iterable<T>) -> R): R {
    return kCursor().use(block)
}

In [17]:
val vulnGavs = mvnEcoVuln.find().useCursor { blk ->
    blk.map {
        it["vuln_gav"]!!.asString().value
    }
}.toSet()

vulnGavs.count()

613

In [18]:
val jarsDir = dataDir.resolve("interim/jars").also { it.mkdirs() }

fun gavToJarUrl(packageGAV: String): String
{
    val parts = packageGAV.split(":")
    return parts[0].replace('.', '/') + "/" + parts[1] + "/" + parts[2] + "/" + parts[1] + "-" + parts[2] + ".jar"
}

In [19]:
val gavToJar = vulnGavs
    .map {
        val jarPath = jarsDir.resolve(gavToJarUrl(it))
        if (!jarPath.isFile()) null
        else it to jarPath
    }
    .filterNotNull()
    .toMap()
gavToJar.count()  // 3856

611

In [20]:
import java.io.FileNotFoundException
import java.util.zip.ZipFile

fun classExistsInJar(jar: File, classFilePath: String): Boolean {
    if (!jar.isFile()) {
        throw FileNotFoundException("jar file not found: ${jar.absolutePath}")
    }
    
    val jarAsZip = ZipFile(jar)
    val entry = jarAsZip.getEntry(classFilePath)
    if (entry == null)
        return false
    
    return true
}

In [21]:
import org.bson.BsonArray

val filteredItems = mvnEcoVuln.find().useCursor { blk ->
    blk.filter {
        gavToJar.containsKey(it["vuln_gav"]!!.asString().value)
    }.map {
        val jar = gavToJar[it["vuln_gav"]!!.asString().value]!!
        val modifiedClassesInJar = it["vuln_classes"]!!.asArray().values.filter {
            classExistsInJar(
                jar, it.asString().value.replace('.', '/') + ".class"
            )
        }

        if (modifiedClassesInJar.isEmpty()) return@map null

        it["vuln_classes"] = BsonArray(modifiedClassesInJar)
        it
    }.filterNotNull()
}

In [22]:
filteredItems.count()

829

In [23]:
filteredItems.map { it["vuln_gav"]!!.asString().value }.toSet().count()

611

In [24]:
mvnEcoVuln.drop()
mvnEcoVuln.insertMany(filteredItems)

AcknowledgedInsertManyResult{insertedIds={0=BsonObjectId{value=67fc3c9a6baaaadb2f3e0852}, 1=BsonObjectId{value=67fc3c9a6baaaadb2f3e0853}, 2=BsonObjectId{value=67fc3c9a6baaaadb2f3e0854}, 3=BsonObjectId{value=67fc3c9a6baaaadb2f3e0855}, 4=BsonObjectId{value=67fc3c9a6baaaadb2f3e0856}, 5=BsonObjectId{value=67fc3c9a6baaaadb2f3e0857}, 6=BsonObjectId{value=67fc3c9a6baaaadb2f3e0858}, 7=BsonObjectId{value=67fc3c9a6baaaadb2f3e0859}, 8=BsonObjectId{value=67fc3c9a6baaaadb2f3e085a}, 9=BsonObjectId{value=67fc3c9a6baaaadb2f3e085c}, 10=BsonObjectId{value=67fc3c9a6baaaadb2f3e085d}, 11=BsonObjectId{value=67fc3c9a6baaaadb2f3e085e}, 12=BsonObjectId{value=67fc3c9a6baaaadb2f3e085f}, 13=BsonObjectId{value=67fc3c9a6baaaadb2f3e0860}, 14=BsonObjectId{value=67fc3c9a6baaaadb2f3e0861}, 15=BsonObjectId{value=67fc3c9a6baaaadb2f3e0862}, 16=BsonObjectId{value=67fc3c9a6baaaadb2f3e0863}, 17=BsonObjectId{value=67fc3c9a6baaaadb2f3e0864}, 18=BsonObjectId{value=67fc3c9a6baaaadb2f3e0865}, 19=BsonObjectId{value=67fc3c9a6baaaad