In [14]:
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.async
import kotlinx.coroutines.awaitAll
import kotlinx.coroutines.runBlocking
import com.google.common.hash.Hashing

In [15]:
import io.github.cdimascio.dotenv.Dotenv
import java.nio.file.Paths

val dotenv = Dotenv.load()
val dataDir = dotenv.get("DATA_DIR").let { Paths.get(it).toFile() }.also { it.mkdirs() }
dataDir

/home/j.zeidler@GDAAG.DE/private/ba3/bachelor-thesis/data

In [16]:
import org.bson.BsonDocument
import org.litote.kmongo.*

val client = KMongo.createClient("mongodb://localhost:42692/")
val db = client.getDatabase("s5_snyk_libio")
val libioVuln = db.getCollection<BsonDocument>("libioVuln")

In [17]:
import com.mongodb.client.*

private class MongoCursorIterable<T>(private val cursor: MongoCursor<T>) : MongoCursor<T> by cursor, Iterable<T> {

    override fun iterator(): Iterator<T> = cursor
}

private fun <T> MongoIterable<T>.kCursor(): MongoCursorIterable<T> = MongoCursorIterable(iterator())

fun <T, R> MongoIterable<T>.useCursor(block: (Iterable<T>) -> R): R {
    return kCursor().use(block)
}

In [18]:
val vulnGavs = libioVuln.find().useCursor { blk ->
    blk.map {
        it["vuln_gav"]!!.asString().value
    }
}.toSet()

vulnGavs.count()

3627

In [19]:
val jarsDir = dataDir.resolve("interim/jars").also { it.mkdirs() }

fun gavToJarUrl(packageGAV: String): String
{
    val parts = packageGAV.split(":")
    return parts[0].replace('.', '/') + "/" + parts[1] + "/" + parts[2] + "/" + parts[1] + "-" + parts[2] + ".jar"
}

In [20]:
val gavToJar = vulnGavs
    .map {
        val jarPath = jarsDir.resolve(gavToJarUrl(it))
        if (!jarPath.isFile()) null
        else it to jarPath
    }
    .filterNotNull()
    .toMap()
gavToJar.count()  // 3856

3626

In [21]:
import java.io.FileNotFoundException
import java.util.zip.ZipFile

fun classExistsInJar(jar: File, classFilePath: String): Boolean {
    if (!jar.isFile()) {
        throw FileNotFoundException("jar file not found: ${jar.absolutePath}")
    }
    
    val jarAsZip = ZipFile(jar)
    val entry = jarAsZip.getEntry(classFilePath)
    if (entry == null)
        return false
    
    return true
}

In [22]:
import org.bson.BsonArray

val filteredItems = libioVuln.find().useCursor { blk ->
    blk.filter {
        gavToJar.containsKey(it["vuln_gav"]!!.asString().value)
    }.map {
        val jar = gavToJar[it["vuln_gav"]!!.asString().value]!!
        val modifiedClassesInJar = it["vuln_classes"]!!.asArray().values.filter {
            classExistsInJar(
                jar, it.asString().value.replace('.', '/') + ".class"
            )
        }

        if (modifiedClassesInJar.isEmpty()) return@map null

        it["vuln_classes"] = BsonArray(modifiedClassesInJar)
        it
    }.filterNotNull()
}

In [23]:
filteredItems.count()

6028

In [24]:
filteredItems.map { it["vuln_gav"]!!.asString().value }.toSet().count()

3098

In [25]:
libioVuln.drop()
libioVuln.insertMany(filteredItems)

AcknowledgedInsertManyResult{insertedIds={0=BsonObjectId{value=67f919b35878e3c56f2efb85}, 1=BsonObjectId{value=67f919b35878e3c56f2efb86}, 2=BsonObjectId{value=67f919b35878e3c56f2efb8c}, 3=BsonObjectId{value=67f919b35878e3c56f2efb8d}, 4=BsonObjectId{value=67f919b35878e3c56f2efb8e}, 5=BsonObjectId{value=67f919b35878e3c56f2efb8f}, 6=BsonObjectId{value=67f919b35878e3c56f2efb90}, 7=BsonObjectId{value=67f919b35878e3c56f2efb91}, 8=BsonObjectId{value=67f919b35878e3c56f2efb92}, 9=BsonObjectId{value=67f919b35878e3c56f2efb93}, 10=BsonObjectId{value=67f919b35878e3c56f2efb94}, 11=BsonObjectId{value=67f919b35878e3c56f2efb95}, 12=BsonObjectId{value=67f919b35878e3c56f2efb96}, 13=BsonObjectId{value=67f919b35878e3c56f2efb97}, 14=BsonObjectId{value=67f919b35878e3c56f2efba8}, 15=BsonObjectId{value=67f919b35878e3c56f2efbae}, 16=BsonObjectId{value=67f919b35878e3c56f2efbb6}, 17=BsonObjectId{value=67f919b35878e3c56f2efbba}, 18=BsonObjectId{value=67f919b35878e3c56f2efbbb}, 19=BsonObjectId{value=67f919b35878e3c