In [None]:
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.async
import kotlinx.coroutines.awaitAll
import kotlinx.coroutines.runBlocking
import com.google.common.hash.Hashing
import io.github.cdimascio.dotenv.Dotenv
import java.nio.file.Paths

%useLatestDescriptors
%use lets-plot

class Main

val dotenv = Dotenv.load()
val dataDir = dotenv.get("DATA_DIR").let { Paths.get(it).toFile() }.also { it.mkdirs() }
dataDir

In [None]:
import org.bson.BsonDocument
import org.litote.kmongo.*

val client = KMongo.createClient("mongodb://localhost:42692/")
val db = client.getDatabase("s5_snyk_libio")
val vulnCollection = db.getCollection<BsonDocument>("mergedVuln")
val vulnClientCollection = db.getCollection<BsonDocument>("mergedVulnClients")

In [None]:
import com.mongodb.client.*

private class MongoCursorIterable<T>(private val cursor: MongoCursor<T>) : MongoCursor<T> by cursor, Iterable<T> {

    override fun iterator(): Iterator<T> = cursor
}

private fun <T> MongoIterable<T>.kCursor(): MongoCursorIterable<T> = MongoCursorIterable(iterator())

fun <T, R> MongoIterable<T>.useCursor(block: (Iterable<T>) -> R): R {
    return kCursor().use(block)
}

In [None]:
import org.nield.kotlinstatistics.median as kmdn

val vulnCveGavToClasses = vulnCollection.find().useCursor { blk ->
    blk.map {
        val gav = it["vuln_gav"]!!.asString().value
        val cve = it["cve_ref"]!!.asString().value.let { c -> if (c.isBlank()) it["snyk_url"]!!.asString().value else c }
        val vulnClasses = it["vuln_classes"]!!.asArray().map { it.asString().value }.toSet()

        if (vulnClasses.count() == 0) throw Exception("no vuln class (record should have been ommitted previously)")

        (cve to gav) to vulnClasses
    }
}.groupBy { it.first }.map { g -> g.key to g.value.flatMap { it -> it.second }.toSet() }.toMap()
    
println(vulnCveGavToClasses.count())
println(vulnCveGavToClasses.map { it.value.size }.minOrNull())
println(vulnCveGavToClasses.map { it.value.size }.maxOrNull())
println(vulnCveGavToClasses.map { it.value.size }.average())
println(vulnCveGavToClasses.map { it.value.size }.kmdn())

In [None]:
val vulnCveGavPairToClasses = vulnClientCollection.find().useCursor { blk ->
    blk.map {
        val dep_gav = it["dep_gav"]!!.asString().value
        val client_gav = it["client_gav"]!!.asString().value
        val cve = it["cve"]!!.asString().value.let { c -> if (c.isBlank()) it["snyk_url"]!!.asString().value else c }
        val vulnClasses = vulnCveGavToClasses[cve to dep_gav]!!
        
        if (vulnClasses.count() == 0) throw Exception("no vuln class (record should have been ommitted previously)")

        Triple(cve, dep_gav, client_gav) to vulnClasses
    }
}.groupBy { it.first }.map { g -> g.key to g.value.flatMap { it -> it.second }.toSet() }.toMap()
    
println(vulnCveGavPairToClasses.count())
println(vulnCveGavPairToClasses.map { it.value.size }.minOrNull())
println(vulnCveGavPairToClasses.map { it.value.size }.maxOrNull())
println(vulnCveGavPairToClasses.map { it.value.size }.average())
println(vulnCveGavPairToClasses.map { it.value.size }.kmdn())
println(vulnCveGavPairToClasses.keys.map { it.second }.toSet().count()) // are no client for some of the libs in our dataset

In [None]:
println(vulnCveGavPairToClasses.map { it.key.third }.toSet().size)

In [None]:
import scripts.partitionDepGraph.dagP.loadDagpPartitionInfo

val partitionInfo = vulnCveGavToClasses.keys.map {
    it.second to loadDagpPartitionInfo(it.second)
}.toMap()

partitionInfo.count()

In [None]:
import common.DefaultGraph
import io.github.classgraph.ClassInfoList
import java.io.File
import java.net.URLClassLoader
import org.jgrapht.Graph
import org.jgrapht.graph.DefaultEdge
import org.jgrapht.graph.builder.GraphTypeBuilder

fun loadDepGraph(gav: String): DefaultGraph = 
    scripts.exportDepGraphs.loadDepGraphFromCache(gav)

fun loadClassListInfo(gav: String): ClassInfoList =
    scripts.exportDepGraphs.loadClassInfoListFromCache(gav)

fun loadVertexInfo(gav: String) =
    scripts.exportDepGraphs.loadVertexInfo(gav)

fun loadDepGraph(depGav: String, clientGav: String): DefaultGraph = 
    scripts.exportPairDepGraphs.loadDepGraphFromCache(depGav, clientGav)

fun loadClassListInfo(depGav: String, clientGav: String): ClassInfoList =
    scripts.exportPairDepGraphs.loadClassInfoListFromCache(depGav, clientGav)

fun loadVertexInfo(depGav: String, clientGav: String) =
    scripts.exportPairDepGraphs.loadVertexInfo(depGav, clientGav)

In [None]:
import com.google.common.collect.Queues

/**
 * returns the depths of each visited node from the starting node
 */
fun bfsOnDepGraph(graph: DefaultGraph, startNode: String): Map<String, Int> {
    val expanded = mutableSetOf<String>()
    val depthMap = mutableMapOf<String, Int>()
    val queue = Queues.newArrayDeque<String>()
    queue.add(startNode)
    depthMap[startNode] = 0
    while (!queue.isEmpty()) {
        val node = queue.pop()
        expanded.add(node)
        val parentDepth = depthMap[node]!! 
        graph
            .outgoingEdgesOf(node)
            .map { e -> graph.getEdgeTarget(e) }
            .filterNot { expanded.contains(it) }
            .forEach { v ->
                if (depthMap.containsKey(v)) {
                    depthMap[v] = min(depthMap[v]!!, parentDepth + 1)
                } else {
                    depthMap[v] = parentDepth + 1;
                }
                queue.add(v)
            }
    }
    return depthMap
}

fun addFakeSourceToNodes(graph: DefaultGraph, startingNodes: Set<String>, fakeNode: String) {
    graph.addVertex(fakeNode)
    startingNodes.forEach { v ->
        graph.addEdge(fakeNode, v)
    }
}

fun createCondensedGraphFromMapping(
    originalGraph: DefaultGraph,
    atomToCondensedNode: Map<String, String>,
): DefaultGraph {
    val condensedGraph =
        GraphTypeBuilder
            .directed<String, DefaultEdge>()
            .allowingMultipleEdges(false)
            .allowingSelfLoops(false)
            .edgeClass(DefaultEdge::class.java)
            .weighted(true)
            .buildGraph()
    
    atomToCondensedNode.values.toSet().forEach {
        condensedGraph.addVertex(it)
    }
    
    originalGraph.edgeSet().forEach { e ->
        val source = atomToCondensedNode[originalGraph.getEdgeSource(e)]
        if (source == null) {
            println("source was null: ${originalGraph.getEdgeSource(e)} -> ${originalGraph.getEdgeTarget(e)}")
            throw NullPointerException()
        }
        val target = atomToCondensedNode[originalGraph.getEdgeTarget(e)]
        if (target == null) {
            println("target was null: ${originalGraph.getEdgeSource(e)} -> ${originalGraph.getEdgeTarget(e)}")
            throw NullPointerException()
        }
        
        if (source == target) return@forEach
        
        if (!condensedGraph.containsEdge(source, target)) {
            condensedGraph.addEdge(source, target)
        } else {
            val edge = condensedGraph.getEdge(source, target)
            val weight = condensedGraph.getEdgeWeight(edge)
            condensedGraph.setEdgeWeight(edge, weight + 1)
        }
    }
    
    return condensedGraph
}

In [None]:
import io.exoquery.fansi.toStr
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.async
import kotlinx.coroutines.runBlocking
import org.jetbrains.kotlinx.dataframe.math.median
import org.jgrapht.graph.EdgeReversedGraph
import java.lang.reflect.Modifier

data class VulnAnalysisInfo(
    val depClassCount: Int,
    val depVulnClassCount: Int,
    val clientClassCount: Int,
    val depPublicClassCount: Int,
    val clientPublicClassCount: Int,
    val clientVulnClassCount: Int,
    val clientVulnPublicClassCount: Int,
    val depVulnPublicClassCount: Int,
    val referenedPublicClassCount: Int,
    val vulnModulesUsedByClientCount: Int,
    val safeModulesUsedByClientCount: Int,
    val depModuleCount: Int,
    val vulnDepPublicClassesUsedByClientCount: Int,
    val metadataVulnModulesUsedByClientCount: Int,
    val metadataSafeModulesUsedByClientCount: Int,
    val clientAndDepPublicVulnClassCountBeforeModularization: Int,
    val clientAndDepPublicVulnClassCountAfterModularization: Int,
    val depPublicVulnClassCountBeforeModularization: Int,
    val depPublicVulnClassCountAfterModularization: Int,
    val depPartCount: Int,
    val depPartsUsedByClientCount: Int,
    val medianSizeOfModulesUsedByClient: Double,
    val meanSizeOfModulesUsedByClient: Double,
    val isClientUsingDep: Boolean,
    ) {
    val depVulnRatio = depVulnClassCount.toDouble() / depClassCount.toDouble()
    val depApiSurfaceRatio = depPublicClassCount.toDouble() / depClassCount.toDouble()
    val depVulnApiSurfaceRatio = depVulnPublicClassCount.toDouble() / depPublicClassCount.toDouble()
    
    val clientVulnRatio = clientVulnClassCount.toDouble() / clientClassCount.toDouble()
    val clientApiSurfaceRatio = clientPublicClassCount.toDouble() / clientClassCount.toDouble()
    val clientVulnApiSurfaceRatio = clientVulnPublicClassCount.toDouble() / clientPublicClassCount.toDouble()
}

System.gc()

@kotlinx.coroutines.ExperimentalCoroutinesApi fun _vulnInfo() = Dispatchers.IO.limitedParallelism(32).let { dispatcher ->
    vulnCveGavPairToClasses.asSequence().windowed(512, 512, true).flatMapIndexed { batch, w ->
        runBlocking {
            System.gc()
            println("processing batch $batch")
            w.map {
                async(dispatcher) {
                    val depGav = it.key.second
                    val clientGav = it.key.third
                    val vulnClasses = it.value
                    
                    val depGraph = loadDepGraph(depGav, clientGav)
                    
                    val vertexInfo = loadVertexInfo(depGav, clientGav)
                    val depVertexInfo = loadVertexInfo(depGav)
                    val clientVertexInfo = vertexInfo.filterNot { depVertexInfo.containsKey(it.key) }.toMap()
                    
                    // sanity check if we get similar results with random vuln roots
                    // val vulnClasses = depVertexInfo.keys.shuffled().take(it.value.size).toSet()
                    
                    val depPublicClasses =
                        depGraph.vertexSet().filter { v -> depVertexInfo.containsKey(v) && Modifier.isPublic(vertexInfo[v]!!) }
                    
                    val publicApiUsages = depGraph.edgeSet().map { e ->
                        val s = depGraph.getEdgeSource(e)
                        val t = depGraph.getEdgeTarget(e)
                        if (clientVertexInfo.keys.contains(s) && depVertexInfo.keys.contains(t)) s to t
                        else null
                    }.filterNotNull()
                    
                    // if (publicApiUsages.size == 0) {
                    //     // so many cases that the dep is in the pom of the client but actually is not using it at all! 
                    //     return@async Result.failure(Exception("no public api usage!"))
                    // }
                    
                    val depPublicClassesUsedByClient = publicApiUsages.map { it.second }.toSet()
                    
                    // class-level propagation
                    val revDepGraph = EdgeReversedGraph(depGraph)  // shallow!
                    addFakeSourceToNodes(revDepGraph, vulnClasses, "<fake source>")
                    val distToNearestVuln = mutableMapOf<String, Int>()
                    bfsOnDepGraph(revDepGraph, "<fake source>").let { m ->
                        m.map { it.key to it.value - 1 }.forEach { (v, d) ->
                            if (distToNearestVuln.containsKey(v)) {
                                distToNearestVuln[v] = min(distToNearestVuln[v]!!, d)
                            } else {
                                distToNearestVuln[v] = d
                            }
                        }
                    }
                    distToNearestVuln.remove("<fake source>")
                    revDepGraph.removeVertex("<fake source>")
                    
                    val allVulnClasses = distToNearestVuln.keys.toSet()
                    val allClientVulnClasses = allVulnClasses.intersect(clientVertexInfo.keys)
                    val clientPublicClasses = clientVertexInfo.keys.filter { v ->
                        Modifier.isPublic(clientVertexInfo[v]!!)
                    }
                    val publicClientVulnClasses = allClientVulnClasses.intersect(clientPublicClasses)
                    
                    val depVulnClasses = distToNearestVuln.keys.filter { v ->
                        depVertexInfo.containsKey(v)
                    }.toSet()
                    
                    val depVulnPublicClasses = distToNearestVuln.keys.filter { v ->
                        depVertexInfo.containsKey(v) && Modifier.isPublic(depVertexInfo[v]!!)
                    }.toSet()
                    
                    val vulnDepPublicClassesUsedByClient = depPublicClassesUsedByClient.intersect(depVulnPublicClasses)
                    
                    // val clientVulnRoots = publicApiUsages.filter { depVulnClasses.contains(it.second) }.map { it.first }
                    
                    val depPartitionInfo = loadDagpPartitionInfo(depGav)
                    val depVulnParts = depPartitionInfo.partToVertices.filter { (p, vl) ->
                        vl.intersect(depVulnClasses).isNotEmpty()
                    }
                    
                    val depSafeParts = depPartitionInfo.partToVertices.filterNot { (p, vl) ->
                        depVulnParts.keys.contains(p)
                    }
                    
                    val depVulnPartsUsedByClient =
                        // depVulnParts.filter { (p, vl) -> vl.intersect(vulnDepPublicClassesUsedByClient).isNotEmpty() }
                        depVulnParts.filter { (p, vl) -> vl.intersect(depPublicClassesUsedByClient).isNotEmpty() }
                    
                    val depSafePartsUsedByClient =
                        depSafeParts.filter { (p, vl) -> vl.intersect(depPublicClassesUsedByClient).isNotEmpty() }
                    
                    // meta-data/module level propagation
                    val metadataVulnRootParts = depPartitionInfo.partToVertices.filter { (p, vl) ->
                        vl.intersect(vulnClasses).isNotEmpty()
                    }
                    
                    // println("here?")
                    
                    // client classes + dep modules
                    val moduleDepGraph = createCondensedGraphFromMapping(
                        depGraph,
                        depPartitionInfo.vertexToPart.map { (v, p) -> v to p.toString() }.toMap() + clientVertexInfo
                            .map { it.key to it.key }
                            .toMap() // client nodes should not be condensed 
                    )
                    
                    val distToNearestMetadataVuln = mutableMapOf<String, Int>()
                    
                    val revModuleDepGraph = EdgeReversedGraph(moduleDepGraph)
                    addFakeSourceToNodes(revModuleDepGraph, metadataVulnRootParts.map { it.key.toString() }.toSet(), "<fake source>")
                    bfsOnDepGraph(revModuleDepGraph, "<fake source>").let { m ->
                        m.map { it.key to it.value - 1 }.forEach { (v, d) ->
                            if (distToNearestMetadataVuln.containsKey(v)) {
                                distToNearestMetadataVuln[v] = min(distToNearestMetadataVuln[v]!!, d)
                            } else {
                                distToNearestMetadataVuln[v] = d
                            }
                        }
                    }
                    distToNearestMetadataVuln.remove("<fake source>")
                    revModuleDepGraph.removeVertex("<fake source>")
                    
                    val depPartsUsedByClient = depPartitionInfo.partToVertices.filter { (p, vl) ->
                        vl.intersect(depPublicClassesUsedByClient).isNotEmpty()
                    }.keys
                    
                    val metadataVulnParts: Set<Int> =
                        depPartitionInfo.partToVertices.keys.filter { distToNearestMetadataVuln.containsKey(it.toString()) }.toSet()
                    
                    val depMetadataVulnPartsUsedByClient = metadataVulnParts.filter { p ->
                        depPartitionInfo.partToVertices[p]!!.intersect(depPublicClassesUsedByClient).isNotEmpty()
                    }.toSet()
                    
                    val depMetadataSafePartsUsedByClient = depPartsUsedByClient.filterNot { p ->
                        metadataVulnParts.contains(p)
                    }.toSet()
                    
                    // sanity check
                    if (depPartsUsedByClient.size != depMetadataVulnPartsUsedByClient.size + depMetadataSafePartsUsedByClient.size) {
                        error("depPartsUsedByClient.size != depMetadataVulnPartsUsedByClient.size + depMetadataSafePartsUsedByClient.size")
                    }
                    
                    if (depPartsUsedByClient.size != depVulnPartsUsedByClient.size + depSafePartsUsedByClient.size) {
                        error("depPartsUsedByClient.size != depVulnPartsUsedByClient.size + depSafePartsUsedByClient.size")
                    }
                    
                    // api surface before and after removing unused modules
                    
                    val publicUsedDepVulnClassesBeforeModularization = allVulnClasses.filter {
                        depPartitionInfo.vertexToPart.containsKey(it)
                    }.filter { Modifier.isPublic(depVertexInfo[it]!!) }.toSet()
                    
                    val publicUsedDepVulnClassesAfterModularization = allVulnClasses.filter {
                        val part = depPartitionInfo.vertexToPart[it]
                        return@filter if (part == null) false else depMetadataVulnPartsUsedByClient.contains(part)
                    }.filter { Modifier.isPublic(depVertexInfo[it]!!) }.toSet()
                    
                    // vuln public classes in the client and referenced modules (pre-modularization)
                    val publicVulnClassesBeforeModularization =
                        publicClientVulnClasses + publicUsedDepVulnClassesBeforeModularization
                    
                    // vuln public classes in the client and referenced modules (post-modularization)
                    val publicVulnClassesAfterModularization = 
                        publicClientVulnClasses + publicUsedDepVulnClassesAfterModularization
                    
                    val res = VulnAnalysisInfo(
                        depClassCount = depVertexInfo.keys.size,
                        depPublicClassCount = depPublicClasses.size,
                        depVulnPublicClassCount = depVulnPublicClasses.size,
                        depVulnClassCount = depVulnClasses.size,
                        clientClassCount = clientVertexInfo.keys.size,
                        clientPublicClassCount = clientPublicClasses.size,
                        clientVulnClassCount = allClientVulnClasses.size,
                        clientVulnPublicClassCount = publicClientVulnClasses.size,
                        referenedPublicClassCount = depPublicClassesUsedByClient.size,
                        vulnModulesUsedByClientCount = depVulnPartsUsedByClient.size,
                        safeModulesUsedByClientCount = depSafePartsUsedByClient.size,
                        depModuleCount = depPartitionInfo.partitionCount,
                        vulnDepPublicClassesUsedByClientCount = vulnDepPublicClassesUsedByClient.size,
                        
                        metadataVulnModulesUsedByClientCount = depMetadataVulnPartsUsedByClient.size,
                        metadataSafeModulesUsedByClientCount = depMetadataSafePartsUsedByClient.size,
                        
                        clientAndDepPublicVulnClassCountBeforeModularization = publicVulnClassesBeforeModularization.size,
                        clientAndDepPublicVulnClassCountAfterModularization = publicVulnClassesAfterModularization.size,
                        
                        depPublicVulnClassCountBeforeModularization = publicUsedDepVulnClassesBeforeModularization.size,
                        depPublicVulnClassCountAfterModularization = publicUsedDepVulnClassesAfterModularization.size,
                        
                        isClientUsingDep = publicApiUsages.size == 0,
                        
                        depPartCount = depPartitionInfo.partitionCount,
                        depPartsUsedByClientCount = depPartsUsedByClient.size,
                        
                        medianSizeOfModulesUsedByClient = if (depPartsUsedByClient.size == 0) 0.0 else depPartsUsedByClient.map { p -> depPartitionInfo.partToVertices[p]!!.size.toDouble() }.median(),
                        meanSizeOfModulesUsedByClient = if (depPartsUsedByClient.size == 0) 0.0 else depPartsUsedByClient.map { p -> depPartitionInfo.partToVertices[p]!!.size }.average(),
                    )
                    
                    return@async Result.success(res)
                }
            }.toList().awaitAll().toList()
        }.toList().filter { it.isSuccess }.map { it.getOrNull()!! }.toList()
    }.toList()
}

val vulnInfo = _vulnInfo()
vulnInfo.count()

In [None]:
fun Double.format(digits: Int) =
    "%.${digits}f".format(this)

fun Float.format(digits: Int) =
    "%.${digits}f".format(this)

In [None]:
println("${vulnCveGavPairToClasses.count() - vulnInfo.count()}")
println("${(vulnCveGavPairToClasses.count() - vulnInfo.count()).toDouble() / vulnCveGavPairToClasses.count()} ")

In [None]:
println("${vulnInfo.map { it.depPartsUsedByClientCount / it.depPartCount.toDouble() }.kmdn()}")

In [None]:
println("${vulnInfo.filter { it.isClientUsingDep }.size}")
println("${vulnInfo.filter { it.isClientUsingDep && it.clientVulnClassCount == 0 }.size.toDouble() / vulnInfo.filter { it.clientVulnClassCount == 0 }.size.toDouble()}")

In [None]:
println("${vulnInfo.filter { it.vulnModulesUsedByClientCount == 0 }.size}")
println("${vulnInfo.filter { it.vulnModulesUsedByClientCount != 0 && it.clientVulnClassCount == 0 }.size.toDouble()}")
println("${vulnInfo.filter { it.vulnModulesUsedByClientCount == 0 }.size.toDouble()}")
println("${vulnInfo.filter { it.clientVulnClassCount == 0 }.size.toDouble()}")
// println("${vulnInfo.filter { it.vulnModulesUsedByClientCount == 0 }.size.toDouble() / vulnInfo.filter { it.clientVulnClassCount == 0 }.size.toDouble()}")

println("${vulnInfo.filter { it.clientVulnClassCount == 0 && it.metadataVulnModulesUsedByClientCount == 0 }.size.toDouble() / vulnInfo.filter { it.clientVulnClassCount == 0 }.size.toDouble()}")

In [None]:
println("${vulnInfo.filter { it.vulnModulesUsedByClientCount == 0 && it.metadataVulnModulesUsedByClientCount > 0 }.size.toDouble() / vulnInfo.filter { it.clientVulnClassCount == 0 }.size.toDouble()}")

In [None]:
println("${vulnInfo.map { (it.clientAndDepPublicVulnClassCountAfterModularization - it.clientAndDepPublicVulnClassCountBeforeModularization).toDouble() / it.clientAndDepPublicVulnClassCountBeforeModularization }.filterNot { it.isNaN() }.average() }")

println("${vulnInfo.map { (it.depPublicVulnClassCountAfterModularization - it.depPublicVulnClassCountBeforeModularization).toDouble() / it.depPublicVulnClassCountBeforeModularization }.filterNot { it.isNaN() }.average() }")

In [None]:
println("how many clients are meta-data safe? (they won't get the security alerts at meta-data level")
println("${vulnInfo.filter { it.metadataVulnModulesUsedByClientCount == 0 }.size}")
println("${vulnInfo.filter { it.metadataVulnModulesUsedByClientCount == 0 }.size.toDouble() / vulnInfo.count()}")

println("how many clients are not using any meta-data safe modules? (they don't get any benefit)")
println("${vulnInfo.filter { it.metadataSafeModulesUsedByClientCount == 0 }.size}")
println("${vulnInfo.filter { it.metadataSafeModulesUsedByClientCount == 0 }.size.toDouble() / vulnInfo.count()}")

println("how many client use all the modules?")
println("${vulnInfo.filter { it.metadataVulnModulesUsedByClientCount + it.metadataSafeModulesUsedByClientCount == it.depModuleCount }.size}")
println("${vulnInfo.filter { it.metadataVulnModulesUsedByClientCount + it.metadataSafeModulesUsedByClientCount == it.depModuleCount }.size.toDouble() / vulnInfo.count()}")

// println("${vulnInfo.filter { it.vulnModulesUsedByClientCount != 0 && it.clientVulnClassCount == 0 }.size.toDouble()}")
// println("${vulnInfo.filter { it.vulnModulesUsedByClientCount == 0 }.size.toDouble()}")
// println("${vulnInfo.filter { it.clientVulnClassCount == 0 }.size.toDouble()}")
// println("${vulnInfo.filter { it.vulnModulesUsedByClientCount == 0 }.size.toDouble() / vulnInfo.filter { it.clientVulnClassCount == 0 }.size.toDouble()}")

In [None]:
println("Out of meta-data vulnerable clients, how many are 'actually vuln'?")
println("${vulnInfo.filter { it.vulnModulesUsedByClientCount > 0 }.size}")
println("${vulnInfo.filter { it.vulnModulesUsedByClientCount > 0 }.size / vulnInfo.size.toDouble()}")
// ans:
println("${vulnInfo.filter { it.metadataVulnModulesUsedByClientCount > 0 && it.clientVulnClassCount > 0 }.size.toDouble() / vulnInfo.filter { it.metadataVulnModulesUsedByClientCount > 0 }.count()}")


In [None]:
vulnInfo.filter { it.vulnModulesUsedByClientCount == 0 && it.clientVulnClassCount != 0 }.size

In [None]:
vulnInfo.filter { it.clientVulnClassCount == 0 }.size

In [None]:
val safeClientCount = vulnInfo.filter { it.clientVulnClassCount == 0 }.count()
val vulnClientCount = vulnInfo.filter { it.clientVulnClassCount > 0 }.count()
println("safe clients: ${safeClientCount} ratio: ${(safeClientCount.toDouble() / vulnInfo.count()).format(2)}")
println("vuln clients: ${vulnClientCount} ratio: ${(vulnClientCount.toDouble() / vulnInfo.count()).format(2)}")

In [None]:
val nonSafeClientsInfo = vulnInfo.filter { it.clientPublicClassCount > 0 && it.clientVulnClassCount > 0 }
nonSafeClientsInfo.count()

 if (vuln) clients remove vuln modules, how much of their references to external libs are going be removed?

In [None]:
import org.jetbrains.kotlinx.dataframe.math.median

println("${vulnInfo.filter { it.clientVulnClassCount > 0 }.map { it.vulnDepPublicClassesUsedByClientCount }.median()}")
println("${vulnInfo.filter { it.clientVulnClassCount > 0 }.map { it.vulnDepPublicClassesUsedByClientCount }.average()}")
println("${vulnInfo.filter { it.clientVulnClassCount > 0 }.map { it.vulnDepPublicClassesUsedByClientCount / it.referenedPublicClassCount.toDouble() }.median()}")
println("${vulnInfo.filter { it.clientVulnClassCount > 0 }.map { it.vulnDepPublicClassesUsedByClientCount / it.referenedPublicClassCount.toDouble() }.average()}")

In [None]:
import org.nield.kotlinstatistics.median as kmdn
import org.jetbrains.kotlinx.dataframe.math.median as mdn
println("${vulnInfo.map { it.depVulnClassCount.toDouble() / it.depClassCount.toDouble() }.kmdn() }")

println("${vulnInfo.map { it.depPartCount }.mdn()}")
println("${vulnInfo.map { it.depPartCount }.average()}")

In [None]:
val p =
    letsPlot(
        mapOf("clientVulnClassCount" to vulnInfo.filter { it.clientVulnClassCount > 0 }.map { it.clientVulnClassCount.coerceAtMost(500) })
    ) { x = "clientVulnClassCount" } + ggsize(1800, 800) +
            geomHistogram(alpha = .3, binWidth = 1, center = .5) +
            xlim(limits = Pair(0, 501))

p

In [None]:
val p =
    letsPlot(
        mapOf("avgModuleSizes" to vulnInfo.filter { it.meanSizeOfModulesUsedByClient == 0.0 }.map { it.meanSizeOfModulesUsedByClient.toDouble() / it.depClassCount * 100 })
    ) { x = "avgModuleSizes" } + ggsize(1200, 400) +
    geomHistogram(alpha = .3, binWidth = 1, center = .5) +
    xlim(limits = Pair(0, 100)) + themeGrey() + xlab("")

ggsave(p, "/tmp/graph.png")

p