 # MVCC cause finder
 *This notebook uses the kotlin kernel (https://github.com/Kotlin/kotlin-jupyter) to process datasets with 
 large amounts of read conflicts.*
 Please export the dataframe from the python notebook so the csv can be read here.

In [1]:
%use dataframe
%use coroutines
import java.time.LocalDateTime

// dataframe containing all keyaccesses, sorted by txid, and access type, ascending
var df = DataFrame.readCSV("data/txsmvccs_pre.csv")
df.schema()

untitled: Int
txid: Int
blockid: Int
status: Double
creator_msp_id: String
validation_code: String
chaincode_proposal_input: Double
chaincode: String
key: String
access_type: String
version_block: Double?
version_tx: Double?
mvcc_cause: Int


In [2]:
import kotlinx.coroutines.sync.Mutex
import kotlinx.coroutines.sync.withLock
import kotlinx.coroutines.Dispatchers.Default

val mutex = Mutex()
// list to store the background jobs in
val jobs = mutableListOf<Job>()
val start = System.currentTimeMillis()
val rowCount = df.rowsCount()
val percentile = 10
val threshold = rowCount.toDouble()/percentile.toDouble()
runBlocking{
    var k = 1
    // iterate over all rows(keyaccesses)
    for(i in 0 until(rowCount)){
        // print iteration progress
        if(i>threshold*k){
            val done = (percentile*k)
            print("$done%...")
            k++
        }
        val row = df[i]
        // if keyaccess' validation code is MVCC and its access type is READ
        if(row["validation_code"]=="MVCC_READ_CONFLICT" && row["access_type"]=="READ"){
            var j = i
            // save the key and its block version
            val key = row["key"]
            val versionB = row.version_block
            // this null-check is for kotlin null-safety, a read's version should not be null
            if(versionB!=null){
                // start a new thread
                val job = CoroutineScope(Default).launch{
                    // walk back until the start of the df, or the block with the id
                    // that the key's version specified
                    while(df.blockid[j]>=versionB && j>=0){
                        j--
                        val rowj = df[j]
                        // if a valid write is found before that
                        if(rowj.key==key && rowj.validation_code=="VALID" && rowj.access_type=="WRITE"){
                            // only 1 thread can update the df at a time
                            mutex.withLock{
                                // mark it mvcc causing
                                df = df.update{ mvcc_cause }.at(j).with { it+1 }
                            }
                            // break out of while loop
                            break
                        }
                    } 
                }
                // save the job so it can be waited for
                jobs.add(job)
            }
        }
    }
    val done = k*percentile
    println("$done%")
    print("waiting for background threads...")
    // wait for all background jobs to complete
    jobs.joinAll()
}
val end = System.currentTimeMillis()
val diff = end-start
print("Operation took: $diff ms")
df.writeCSV("data/txsmvccs_post_no_gb.csv")
df

10%...20%...30%...40%...50%...60%...70%...80%...90%...100%
waiting for background threads...Operation took: 469767 ms

In [3]:
// group by keys and calculate the sum of mvccs they caused
var processed = df.groupBy{ key }.sum("mvccs_caused"){ mvcc_cause }
// sort by mvccs caused descending, only keep ones that caused more than 0
processed = processed.sortByDesc("mvccs_caused").filter { "mvccs_caused"<Int>() > 0 }
// write it to disk as csv
processed.writeCSV("data/txsmvccs_post.csv")
print("done!")

done!