In [1]:
%useLatestDescriptors

// Combination of versions that works well in notebooks:
// dataframe 1.0.0-dev-7089
// kandy 0.8.1-dev-67
// kandy stats 0.4.2-dev-2
%use dataframe(v=1.0.0-dev-7089)
%use kandy(kandyVersion=0.8.1-dev-67, statsVersion=0.4.2-dev-2)

## [Get started with Kotlin Notebook](https://kotlinlang.org/docs/get-started-with-kotlin-notebooks.html)

<img alt="nb-logo.svg" src="nb-logo.svg" width="150"/>



### [Kotlin for Data Analysis](https://kotlinlang.org/docs/data-analysis-overview.html)

<img alt="frame2.png" src="frame2.png" width="200"/>

### [Work with data sources](https://kotlinlang.org/docs/data-analysis-work-with-data-sources.html)
### [Data visualization in Kotlin Notebook with Kandy](https://kotlinlang.org/docs/data-analysis-visualization.html)

# What is a dataframe?

[Useful functions](#useful-functions)

In [3]:
// Old style

dataFrameOf(
    "value" to listOf(1, 2, 3, 4, 5),
    "name" to listOf("one", "two", "three", "four", "five")
)

value,name
1,one
2,two
3,three
4,four
5,five


In [2]:
val table1 = dataFrameOf(
    columnOf(1, 2, 3, 4, 5) named "value",
    columnOf("one", "two", "three", "four", "five") named "name",
)

table1

value,name
1,one
2,two
3,three
4,four
5,five


In [3]:
table1.schema()

value: Int
name: String

### Useful DataFrame functions:

In [4]:
table1.remove { value }

name
one
two
three
four
five


In [5]:
table1.convert { value }.toDouble()

value,name
1.0,one
2.0,two
3.0,three
4.0,four
5.0,five


In [6]:
table1.filter { "e" in name }

value,name
1,one
3,three
5,five


In [7]:
table1
    .update { name }.where { value > 3 }.with { it.uppercase() }

value,name
1,one
2,two
3,three
4,FOUR
5,FIVE


In [8]:
table1
    .convert { value }.asColumn { it + 0.5 }

value,name
1.5,one
2.5,two
3.5,three
4.5,four
5.5,five


### Type safe accessors:

In [9]:
val value = table1.value // ?

In [10]:
%trackExecution
val dataFrame = dataFrameOf("a", "b")(1, 3, 2, 4)

Executing:

val dataFrame = dataFrameOf("a", "b")(1, 3, 2, 4)

Executing:
@DataSchema
interface _DataFrameType1 {
    val a: Int
    val b: Int
}

val ColumnsScope<_DataFrameType1>.a: DataColumn<Int> @JvmName("_DataFrameType1_a") get() = this["a"] as DataColumn<Int>
val DataRow<_DataFrameType1>.a: Int @JvmName("_DataFrameType1_a") get() = this["a"] as Int
val ColumnsScope<_DataFrameType1>.b: DataColumn<Int> @JvmName("_DataFrameType1_b") get() = this["b"] as DataColumn<Int>
val DataRow<_DataFrameType1>.b: Int @JvmName("_DataFrameType1_b") get() = this["b"] as Int
(dataFrame as org.jetbrains.kotlinx.dataframe.DataFrame<*>).cast<_DataFrameType1>()
Executing:
val dataFrame = res34


In [11]:
%trackExecution off

In [12]:
val a = dataFrame.a
val b = dataFrame.b

a

a
1
2


### Compatible with many data sources!

CSV and other delimiter-based text:

(and drag+dropping)

JSON, will even be retrieved from URL (optionally with OpenAPI schema types),
reflecting its hierarchical structure in the table

In [2]:
val userData =
    DataFrame.readJson("https://dummyjson.com/users")
        .getFrameColumn("users")
        .first()

userData

id,firstName,lastName,maidenName,age,gender,email,phone,username,password,birthDate,image,bloodGroup,height,weight,eyeColor,hair,Unnamed: 17_level_0,ip,address,Unnamed: 20_level_0,Unnamed: 21_level_0,Unnamed: 22_level_0,Unnamed: 23_level_0,Unnamed: 24_level_0,Unnamed: 25_level_0,Unnamed: 26_level_0,macAddress,university,bank,Unnamed: 30_level_0,Unnamed: 31_level_0,Unnamed: 32_level_0,Unnamed: 33_level_0,company,Unnamed: 35_level_0,Unnamed: 36_level_0,Unnamed: 37_level_0,Unnamed: 38_level_0,Unnamed: 39_level_0,Unnamed: 40_level_0,Unnamed: 41_level_0,Unnamed: 42_level_0,Unnamed: 43_level_0,Unnamed: 44_level_0,ein,ssn,userAgent,crypto,Unnamed: 49_level_0,Unnamed: 50_level_0,role
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,color,type,Unnamed: 18_level_1,address,city,state,stateCode,postalCode,coordinates,Unnamed: 25_level_1,country,Unnamed: 27_level_1,Unnamed: 28_level_1,cardExpire,cardNumber,cardType,currency,iban,department,name,title,address,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,coin,wallet,network,Unnamed: 51_level_1
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,lat,lng,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,address,city,state,stateCode,postalCode,coordinates,Unnamed: 43_level_2,country,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2
Unnamed: 0_level_3,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3,Unnamed: 24_level_3,Unnamed: 25_level_3,Unnamed: 26_level_3,Unnamed: 27_level_3,Unnamed: 28_level_3,Unnamed: 29_level_3,Unnamed: 30_level_3,Unnamed: 31_level_3,Unnamed: 32_level_3,Unnamed: 33_level_3,Unnamed: 34_level_3,Unnamed: 35_level_3,Unnamed: 36_level_3,Unnamed: 37_level_3,Unnamed: 38_level_3,Unnamed: 39_level_3,Unnamed: 40_level_3,Unnamed: 41_level_3,lat,lng,Unnamed: 44_level_3,Unnamed: 45_level_3,Unnamed: 46_level_3,Unnamed: 47_level_3,Unnamed: 48_level_3,Unnamed: 49_level_3,Unnamed: 50_level_3,Unnamed: 51_level_3
1,Emily,Johnson,Smith,28,female,emily.johnson@x.dummyjson.com,+81 965-431-3024,emilys,emilyspass,1996-5-30,https://dummyjson.com/icon/emilys/128,O-,193.240005,63.16,Green,Brown,Curly,42.48.100.32,626 Main Street,Phoenix,Mississippi,MS,29112,-77.162132,-92.084824,United States,47:fa:41:18:ec:eb,University of Wisconsin--Madison,03/26,9289760655481815,Elo,CNY,YPUXISOBI7TTHPK2BR3HAIXL,Engineering,"Dooley, Kozey and Cronin",Sales Manager,263 Tenth Street,San Francisco,Wisconsin,WI,37657,71.814522,-161.150269,United States,977-175,900-590-289,Mozilla/5.0 (Macintosh; Intel Mac OS ...,Bitcoin,0xb9fc2fe63b2a6c003f1c324c3bfa5325916...,Ethereum (ERC20),admin
2,Michael,Williams,,35,male,michael.williams@x.dummyjson.com,+49 258-627-6644,michaelw,michaelwpass,1989-8-10,https://dummyjson.com/icon/michaelw/128,B+,186.220001,76.32,Red,Green,Straight,12.13.116.142,385 Fifth Street,Houston,Alabama,AL,38807,22.815468,115.608582,United States,79:15:78:99:60:aa,Ohio State University,02/27,6737807858721625,Elo,SEK,83IDT77FWYLCJVR8ISDACFH0,Support,Spinka - Dickinson,Support Specialist,395 Main Street,Los Angeles,New Hampshire,NH,73442,79.098328,-119.624847,United States,912-602,108-953-962,Mozilla/5.0 (Windows NT 10.0; Win64; ...,Bitcoin,0xb9fc2fe63b2a6c003f1c324c3bfa5325916...,Ethereum (ERC20),admin
3,Sophia,Brown,,42,female,sophia.brown@x.dummyjson.com,+81 210-652-2785,sophiab,sophiabpass,1982-11-6,https://dummyjson.com/icon/sophiab/128,O-,177.720001,52.599998,Hazel,White,Wavy,214.225.51.195,1642 Ninth Street,Washington,Alabama,AL,32822,45.289368,46.832664,United States,12:a3:d3:6f:5c:5b,Pepperdine University,04/25,7795895470082859,Korean Express,SEK,90XYKT83LMM7AARZ8JN958JC,Research and Development,Schiller - Zieme,Accountant,1896 Washington Street,Dallas,Nevada,NV,88511,20.086742,-34.577106,United States,963-113,638-461-822,Mozilla/5.0 (Windows NT 10.0; Win64; ...,Bitcoin,0xb9fc2fe63b2a6c003f1c324c3bfa5325916...,Ethereum (ERC20),admin
4,James,Davis,,45,male,james.davis@x.dummyjson.com,+49 614-958-9364,jamesd,jamesdpass,1979-5-4,https://dummyjson.com/icon/jamesd/128,AB+,193.309998,62.099998,Amber,Blonde,Straight,101.118.131.66,238 Jefferson Street,Seattle,Pennsylvania,PA,68354,16.782513,-139.347229,United States,10:7d:df:1f:97:58,University of Southern California,05/29,5005519846254763,Mastercard,INR,7N7ZH1PJ8Q4WU1K965HQQR27,Support,Pagac and Sons,Research Analyst,1622 Lincoln Street,Fort Worth,Pennsylvania,PA,27768,54.91193,-79.498329,United States,904-810,116-951-314,Mozilla/5.0 (Macintosh; Intel Mac OS ...,Bitcoin,0xb9fc2fe63b2a6c003f1c324c3bfa5325916...,Ethereum (ERC20),admin
5,Emma,Miller,Johnson,30,female,emma.miller@x.dummyjson.com,+91 759-776-1614,emmaj,emmajpass,1994-6-13,https://dummyjson.com/icon/emmaj/128,AB-,192.800003,63.619999,Green,White,Straight,224.126.22.183,607 Fourth Street,Jacksonville,Colorado,CO,26593,0.505589,-157.432816,United States,32:b9:7e:8d:f5:e8,Northeastern University,03/26,5772950119588627,American Express,CAD,TAVHURD845KVBTB8W81AQXRY,Human Resources,Graham - Gulgowski,Quality Assurance Engineer,1460 Sixth Street,San Antonio,Idaho,ID,21965,44.346546,-26.9447,United States,403-505,526-210-885,Mozilla/5.0 (Windows NT 10.0; Win64; ...,Bitcoin,0xb9fc2fe63b2a6c003f1c324c3bfa5325916...,Ethereum (ERC20),admin
6,Olivia,Wilson,,22,female,olivia.wilson@x.dummyjson.com,+91 607-295-6448,oliviaw,oliviawpass,2002-4-20,https://dummyjson.com/icon/oliviaw/128,B+,182.610001,58.0,Hazel,Gray,Curly,249.178.112.207,547 First Street,Fort Worth,Tennessee,TN,83843,75.326271,-26.152849,United States,9c:7f:ea:34:18:19,University of North Carolina--Chapel ...,05/28,6771923832947881,Diners Club International,BRL,V6H0O5OE3Q4JVKWDTYWZABMD,Product Management,Pfannerstill Inc,Research Analyst,425 Sixth Street,Indianapolis,Oklahoma,OK,74263,74.986641,-132.916885,United States,921-709,836-772-168,Mozilla/5.0 (Macintosh; Intel Mac OS ...,Bitcoin,0xb9fc2fe63b2a6c003f1c324c3bfa5325916...,Ethereum (ERC20),moderator
7,Alexander,Jones,,38,male,alexander.jones@x.dummyjson.com,+61 260-824-4986,alexanderj,alexanderjpass,1986-10-20,https://dummyjson.com/icon/alexanderj...,AB-,153.889999,77.419998,Blue,White,Straight,166.204.84.32,664 Maple Street,Indianapolis,Delaware,DE,86684,35.289665,7.063255,United States,d2:64:58:2d:1c:46,University of Illinois--Urbana-Champaign,05/25,7344951706130140,JCB,EUR,49V4GVDVMP0MHIDD4VXMQ3A2,Engineering,Dickens - Beahan,Web Developer,996 Eighth Street,Washington,Kansas,KS,27858,-75.462364,-128.025696,United States,638-127,722-993-925,Mozilla/5.0 (Macintosh; Intel Mac OS ...,Bitcoin,0xb9fc2fe63b2a6c003f1c324c3bfa5325916...,Ethereum (ERC20),moderator
8,Ava,Taylor,,27,female,ava.taylor@x.dummyjson.com,+1 458-853-7877,avat,avatpass,1997-8-25,https://dummyjson.com/icon/avat/128,AB-,168.470001,57.080002,Hazel,Red,Kinky,150.73.197.233,1197 First Street,Fort Worth,Rhode Island,RI,24771,-81.194832,-87.948158,United States,8d:2e:c2:d6:e7:a8,University of Wisconsin--Madison,01/29,6412128967460199,Maestro,CNY,TS66YQ8R16OX7IJKLUONDQHP,Marketing,Nikolaus Inc,Chief Executive Officer,930 Lincoln Street,Austin,Colorado,CO,47592,87.970085,-42.769352,United States,297-762,257-419-109,Mozilla/5.0 (Windows NT 10.0; Win64; ...,Bitcoin,0xb9fc2fe63b2a6c003f1c324c3bfa5325916...,Ethereum (ERC20),moderator
9,Ethan,Martinez,,33,male,ethan.martinez@x.dummyjson.com,+92 933-608-5081,ethanm,ethanmpass,1991-2-12,https://dummyjson.com/icon/ethanm/128,AB+,159.190002,68.809998,Hazel,Purple,Curly,63.191.127.71,466 Pine Street,San Antonio,Louisiana,LA,72360,74.074921,-25.312702,United States,59:e:9e:e3:29:da,Syracuse University,02/25,7183482484317509,Visa,CAD,CW5U5KS23U7JYD22TVQL7SIH,Support,Gorczany - Gottlieb,Legal Counsel,1597 Oak Street,Chicago,Florida,FL,28100,-67.45208,-23.209887,United States,790-434,569-650-348,Mozilla/5.0 (Windows NT 10.0; Win64; ...,Bitcoin,0xb9fc2fe63b2a6c003f1c324c3bfa5325916...,Ethereum (ERC20),moderator
10,Isabella,Anderson,Davis,31,female,isabella.anderson@x.dummyjson.com,+49 770-658-4885,isabellad,isabelladpass,1993-6-10,https://dummyjson.com/icon/isabellad/128,A-,150.559998,50.099998,Brown,Blonde,Curly,114.9.114.205,1964 Oak Street,New York,Utah,UT,89352,41.331326,151.78273,United States,b1:b0:d0:a2:82:80,California Institute of Technology (C...,05/27,6118714010128731,NPS,CNY,GBZRGDMKUOTO34HBCI7A986J,Marketing,Pollich - Hilpert,Chief Financial Officer,1029 Adams Street,San Diego,Maryland,MD,63847,-25.843393,-62.69268,United States,127-297,902-438-728,Mozilla/5.0 (Macintosh; Intel Mac OS ...,Bitcoin,0xb9fc2fe63b2a6c003f1c324c3bfa5325916...,Ethereum (ERC20),moderator


And more: TSV, XLS(X), Arrow, JDBC databases... and of course, normal collections:

In [14]:
data class User(
    val name: String,
    val age: Int,
    val favoriteNumbers: Pair<Int, Int?>,
)

val users = listOf(
    User("Alice", 25, 1 to 2),
    User("Bob", 30, 3 to null),
    User("Charlie", 35, 4 to 5),
)

users

val userDf = users
//    .toDataFrame()
    .toDataFrame(maxDepth = 1)
userDf

name,age,favoriteNumbers,Unnamed: 3_level_0
Unnamed: 0_level_1,Unnamed: 1_level_1,first,second
Alice,25,1,2.0
Bob,30,3,
Charlie,35,4,5.0


In [18]:
userDf.favoriteNumbers.first

first
1
3
4


# What's Kandy?

https://kotlin.github.io/kandy/examples.html

In [15]:
// %use kandy // already done

In [16]:
val monthTemp = mapOf(
    "month" to listOf(
        "January", "February",
        "March", "April", "May",
        "June", "July", "August",
        "September", "October", "November",
        "December"
    ),
    "temp" to listOf(-5, -3, 2, 10, 16, 20, 22, 21, 15, 9, 3, -2)
)

monthTemp.plot {
    line {
        x("month")
        y("temp") {
            scale = continuous(-10..25) // axis scale
        }
        color("temp") {
            scale = continuousColorGradientN(
                listOf(Color.hex("#7f52ff"), Color.hex("#c811e2"), Color.hex("#e54857"))
            )
        }
        width = 3.0
    }
}

In [17]:
val xs = (-2000..2000).map { it / 500.0f }
val function = { x: Float -> sin(x) * cos(x * 2 + 1) * sin(3 * x + 2) }
val ys = xs.map(function)

plot {
    area {
        x(xs)
        y(ys)
    }
}

And we can even animate:

In [18]:
import kotlin.time.Duration.Companion.seconds

return

fun function(x: Float, c: Float = 0f) = sin(x + c) * cos((x + c) * 2 + 1) * sin(3 * x + 2 + c)
val xs = (-2000..2000).map { it / 500.0f }

var c = 0f
ANIMATE(0.5.seconds) {
    val ys = xs.map { function(it, c) }
    c += .1f

    plot {
        area {
            x(xs)
            y(ys)
            borderLine.width = 2.0
            borderLine.color(ys.zip(xs).map { it.first * it.second }) {
                scale = continuous(Color.BLUE .. Color.RED)
            }
        }
        y.axis.limits = -1..1
    }//.toBufferedImage()
}

org.jetbrains.kotlinx.jupyter.exceptions.ReplInterruptedException: The execution was interrupted

And it works with DataFrame with type-safe columns out of the box!

In [26]:
userData.plot { 
    points {
        x(weight)
        y(height)
        symbol(gender)
        color(eyeColor) {
            scale = categorical(
                "Green" to Color.GREEN,
                "Brown" to Color.hex("#8b4513"),
                "Blue" to Color.BLUE,
                "Gray" to Color.GREY,
                "Amber" to Color.ORANGE,
            )
        }
        
        size = 5.0
    }
}

## Wanna see what else we can do?

- KotlinConf 2024 survey result plots
    - Reading from CSV, parsing, plotting interesting insights with Kandy 
- Train disruption data in the Netherlands
    - Reading from CSV, exploring delay causes and which places had it the worst
    - Reading from SQLite DB (JDBC), exploring train delays during my time at University 
- Live train departure data in the Netherlands/Denmark
    - Accessing live JSON API using types from OpenAPI schema (NL only) and generating a live departureBoard with Kandy (DN/NL)