In [1]:
%useLatestDescriptors
%use dataframe

In [2]:
val path = "./src/main/resources/Top500Answerers.csv"
val topFolks = DataFrame.read(path)
topFolks.head()


User Link,Answers,Average Answer Score
489564,15,909.07
115218,11,477.36
1445334,15,422.6
361023,23,402.04
214090,34,365.91


In [3]:
fun htmlLink(text: String, url: String) = "<a href=\"$url\">$text</a>"
fun soUrl(userID: String) = "https://stackoverflow.com/users/$userID"


val topFive = topFolks
    .sortBy { `Average Answer Score` }
    .tail()
    .reverse()


val content = buildString {
    append("<ul>")
    topFive.forEach {
        val userID = `User Link`.toString()
        val average = `Average Answer Score`
        val linkMarkup = htmlLink(userID, soUrl(userID))
        append("<li>User $linkMarkup with an average of $average</li>")
    }
    append("</ul>")
}


DISPLAY(HTML(content))

In [4]:
%use kandy

In [5]:
plot {
    bars {
        x(topFive.map { "ID: $`User Link`" })
        y(topFive.map { `Average Answer Score` })
    }
}


In [6]:
topFolks.map { Answers }.sum()


18216

In [7]:
topFolks.map { Answers }.reduce { a, b -> a + b }

18216

In [8]:
val Answers by column<Int>()
topFolks.map { Answers() }.sum()

18216

In [9]:
val topFolksClean = topFolks
    .rename { `Average Answer Score` }.into("AverageScore")
    .rename { `User Link` }.into("UserID")


topFolksClean

UserID,Answers,AverageScore
489564,15,909.07
115218,11,477.36
1445334,15,422.6
361023,23,402.04
214090,34,365.91
295800,14,293.93
4079317,19,268.79
19082,35,265.03
39933,52,262.73
245602,14,257.14


In [10]:
topFolksClean
    .filter { Answers >= 20 }
    .sortBy { AverageScore }
    .tail(3)
    .select { UserID }


UserID
19082
214090
361023


In [11]:
topFolksClean
    .distinctBy { Answers }
    .count()


93

In [12]:
topFolksClean
    .filter { Answers == 15 }


UserID,Answers,AverageScore
489564,15,909.07
1445334,15,422.6
717950,15,167.53
2862474,15,114.33
32617,15,104.33
12123,15,104.2
2605574,15,89.2
874672,15,87.47
349228,15,85.33
1390892,15,85.27


In [13]:
topFolksClean
    .sortBy { AverageScore }
    .distinctBy { Answers }
    .sortBy { Answers }


UserID,Answers,AverageScore
321730,11,58.18
1187046,12,59.42
355491,13,59.77
1820510,14,58.43
5674,15,58.33
2441506,16,59.13
2189578,17,58.24
495174,18,59.94
54999,19,59.26
183277,20,60.55


In [14]:
topFolksClean
    .sortByDesc { AverageScore }
    .distinctBy { Answers }
    .sortBy { Answers }


UserID,Answers,AverageScore
115218,11,477.36
2446509,12,200.75
2063,13,210.92
295800,14,293.93
489564,15,909.07
5902074,16,138.31
4682,17,161.0
634089,18,165.83
4079317,19,268.79
422476,20,174.35


In [15]:
topFolksClean
    .sortBy { AverageScore }
    .tail(10)
    .groupBy { Answers }


Answers,group,Unnamed: 2_level_0
UserID,Answers,AverageScore
UserID,Answers,AverageScore
UserID,Answers,AverageScore
UserID,Answers,AverageScore
UserID,Answers,AverageScore
UserID,Answers,AverageScore
UserID,Answers,AverageScore
UserID,Answers,AverageScore
14,DataFrame [2 x 3]UserIDAnswersAverageScore24560214257.14000029580014293.930000,
UserID,Answers,AverageScore
245602,14,257.140000
295800,14,293.930000
52,DataFrame [1 x 3]UserIDAnswersAverageScore3993352262.730000,
UserID,Answers,AverageScore
39933,52,262.730000
35,DataFrame [1 x 3]UserIDAnswersAverageScore1908235265.030000,
UserID,Answers,AverageScore
19082,35,265.030000

UserID,Answers,AverageScore
245602,14,257.14
295800,14,293.93

UserID,Answers,AverageScore
39933,52,262.73

UserID,Answers,AverageScore
19082,35,265.03

UserID,Answers,AverageScore
4079317,19,268.79

UserID,Answers,AverageScore
214090,34,365.91

UserID,Answers,AverageScore
361023,23,402.04

UserID,Answers,AverageScore
1445334,15,422.6
489564,15,909.07

UserID,Answers,AverageScore
115218,11,477.36


In [16]:
val groupedData = topFolksClean
    .sortBy { AverageScore }
    .tail(10)
    .groupBy { Answers }

groupedData.keys


Answers
14
52
35
19
34
23
15
11


In [17]:
groupedData.groups

group,Unnamed: 1_level_0,Unnamed: 2_level_0
UserID,Answers,AverageScore
UserID,Answers,AverageScore
UserID,Answers,AverageScore
UserID,Answers,AverageScore
UserID,Answers,AverageScore
UserID,Answers,AverageScore
UserID,Answers,AverageScore
UserID,Answers,AverageScore
DataFrame [2 x 3]UserIDAnswersAverageScore24560214257.14000029580014293.930000,,
UserID,Answers,AverageScore
245602,14,257.140000
295800,14,293.930000
DataFrame [1 x 3]UserIDAnswersAverageScore3993352262.730000,,
UserID,Answers,AverageScore
39933,52,262.730000
DataFrame [1 x 3]UserIDAnswersAverageScore1908235265.030000,,
UserID,Answers,AverageScore
19082,35,265.030000

UserID,Answers,AverageScore
245602,14,257.14
295800,14,293.93

UserID,Answers,AverageScore
39933,52,262.73

UserID,Answers,AverageScore
19082,35,265.03

UserID,Answers,AverageScore
4079317,19,268.79

UserID,Answers,AverageScore
214090,34,365.91

UserID,Answers,AverageScore
361023,23,402.04

UserID,Answers,AverageScore
1445334,15,422.6
489564,15,909.07

UserID,Answers,AverageScore
115218,11,477.36


In [18]:
groupedData
    .groups
    .filter { df ->
        df.rowsCount() > 1
    }.forEach { df ->
        println(df.first().Answers)
    }


14
15


In [20]:
val ratedFolks = topFolksClean
    .sortBy { AverageScore }
    .remove("Answers")
    .add("Rating") {
        when (AverageScore) {
            in 0.0 ..< 100.0 -> "Low"
            in 100.0 ..< 300.0 -> "Medium"
            else -> "High"
        }
    }


In [21]:
val topAndBottom = listOf(ratedFolks.head(3), ratedFolks.tail(3)).concat()
topAndBottom


UserID,AverageScore,Rating
321730,58.18,Low
177019,58.19,Low
2189578,58.24,Low
1445334,422.6,High
115218,477.36,High
489564,909.07,High


In [22]:
val answersPairedWithCounts = topFolksClean
    .groupBy { Answers }
    .count()
    .filter { column<Int>("count") >= 20 }

In [23]:
answersPairedWithCounts

Answers,count
15,22
11,63
14,31
13,31
12,38
17,29


In [24]:
answersPairedWithCounts.plot {
    bars {
        x(Answers)
        y(count)
    }
}


In [25]:
topFolksClean
    .sortBy { AverageScore }
    .tail(10)
    .toCsv()


UserID,Answers,AverageScore
245602,14,257.14
39933,52,262.73
19082,35,265.03
4079317,19,268.79
295800,14,293.93
214090,34,365.91
361023,23,402.04
1445334,15,422.6
115218,11,477.36
489564,15,909.07


In [26]:
topFolksClean
    .sortBy { AverageScore }
    .tail(10)
    .toJson()


[{"UserID":245602,"Answers":14,"AverageScore":257.14},{"UserID":39933,"Answers":52,"AverageScore":262.73},{"UserID":19082,"Answers":35,"AverageScore":265.03},{"UserID":4079317,"Answers":19,"AverageScore":268.79},{"UserID":295800,"Answers":14,"AverageScore":293.93},{"UserID":214090,"Answers":34,"AverageScore":365.91},{"UserID":361023,"Answers":23,"AverageScore":402.04},{"UserID":1445334,"Answers":15,"AverageScore":422.6},{"UserID":115218,"Answers":11,"AverageScore":477.36},{"UserID":489564,"Answers":15,"AverageScore":909.07}]

In [27]:
topFolksClean
    .sortBy { AverageScore }
    .tail(10)
    .toStandaloneHTML()
    .openInBrowser()
