 # MVCC cause finder
 *This notebook uses the kotlin kernel (https://github.com/Kotlin/kotlin-jupyter) to process datasets with 
 large amounts of read conflicts.*
 Please export the dataframe from the python notebook so the csv can be read here.

In [1]:
%use dataframe
%use coroutines
import java.time.LocalDateTime

var df = DataFrame.readCSV("data/postfault0.csv")
df.schema()

untitled: Int
txid: Int
blockid: Int
status: Double
creator_msp_id: String
validation_code: String
chaincode_proposal_input: Double
chaincode: String
key: String
access_type: String
version_block: Double?
version_tx: Double?
mvcc_cause: Int


In [2]:
import kotlinx.coroutines.sync.Mutex
import kotlinx.coroutines.sync.withLock
import kotlinx.coroutines.Dispatchers.Default

val mutex = Mutex()
val jobs = mutableListOf<Job>()
val start = System.currentTimeMillis()
val rowCount = df.rowsCount()
val percentile = 10
val threshold = rowCount.toDouble()/percentile.toDouble()
runBlocking{
    var idk = 1
    for(i in 0 until(rowCount)){
        if(i>threshold*idk){
            val done = (percentile*idk)
            print("$done%...")
            idk++
        }
        val row = df[i]
        if(row["validation_code"]=="MVCC_READ_CONFLICT" && row["access_type"]=="READ"){
            var j = i
            val key = row["key"]
            val versionB = row.version_block
            if(versionB!=null){
                val job = CoroutineScope(Default).launch{
                    while(df.blockid[j]>versionB || j==0){
                        j--
                        val rowj = df[j]
                        if(rowj.key==key && rowj.validation_code=="VALID" && rowj.access_type=="WRITE"){
                            mutex.withLock{
                                df = df.update{ mvcc_cause }.at(j).with { it+1 }
                            }
                        break
                        }
                    } 
                }
                jobs.add(job)
            }
        }
    }
    val done = (idk)*percentile
    println("$done%")
    print("waiting for background threads...")
    jobs.joinAll()
}
val end = System.currentTimeMillis()
val diff = end-start
print("Operation took: $diff ms")
df

10%...20%...30%...40%...50%...60%...70%...80%...90%...100%
waiting for background threads...Operation took: 95724 ms

In [3]:
var processed = df.groupBy{ key }.sum("mvccs_caused"){ mvcc_cause }
processed = processed.sortByDesc("mvccs_caused").filter { "mvccs_caused"<Int>() > 0 }
processed.writeCSV("data/postfault0_mvccs.csv")
print("done!")

done!