# Dependencies management

Each notebook contains a session.
Session state consists of:
- interpretation state (you do write),
- dependencies and libraries (you do provide)

In [None]:
// Simple way to provide dependencies
%useLatestDescriptors

In [None]:
// Configure in a gradle-like maner
USE {
    repositories {
        mavenLocal()
        maven {
            url = ""
//            credentials {
//
//            }
        }
    }
    dependencies {
        implementation("")
    }
}

In [None]:
%use @/path/to.json

# Draft and run

In [1]:
%useLatestDescriptors

// Combination of versions that works well in notebooks:
// dataframe 1.0.0-dev-7089
// kandy 0.8.1-dev-67
// kandy stats 0.4.2-dev-2
%use dataframe(v=1.0.0-dev-7089)
%use kandy(kandyVersion=0.8.1-dev-67, statsVersion=0.4.2-dev-2)

In [3]:
val disruptions2023 = DataFrame.readCSV("data/disruptions-2023.csv", delimiter = ',')
disruptions2023

rdt_id,ns_lines,rdt_lines,rdt_lines_id,rdt_station_names,rdt_station_codes,cause_nl,cause_en,statistical_cause_nl,statistical_cause_en,cause_group,start_time,end_time,duration_minutes
45999,Amsterdam-Rotterdam-Brussel (HSL),Amsterdam Centraal - Schiphol Airport...,2432.0,"Amsterdam Centraal,Amsterdam Lelylaan...","ASD, ASDL, ASS, RTD, SHL",wisselstoring,points failure,wisselstoring,points failure,infrastructure,2023-01-01T08:19:26,2023-01-01T22:43:08,864
46000,Zwolle-Leeuwarden,Leeuwarden - Zwolle,160.0,"Heerenveen,Wolvega,Heerenveen IJsstadion","HR, WV, HRY",dier op het spoor,an animal on the railway track,dier op het spoor,an animal on the railway track,external,2023-01-01T10:31:49,2023-01-01T10:56:17,24
46001,Heerlen-Aachen Hbf,Aachen Hbf - Heerlen,130.0,"Aachen Hbf,Eygelshoven Markt,Heerlen,...","AHBF, EGHM, HRL, HRLK, HZ, LG, AW",beperkingen in de materieelinzet,problems with the rolling stock,beperkingen in de materieelinzet,problems with the rolling stock,rolling stock,2023-01-01T13:19:24,2023-01-02T00:02:39,643
46002,Zutphen-Winterswijk,Winterswijk - Zutphen,83.0,"Vorden,Zutphen","VD, ZP",aanrijding,collision,aanrijding,collision,accidents,2023-01-01T17:15:22,2023-01-01T20:14:23,179
46003,Heerlen-Aachen Hbf,Aachen Hbf - Heerlen,130.0,"Aachen Hbf,Eygelshoven Markt,Heerlen,...","AHBF, EGHM, HRL, HRLK, HZ, LG, AW",beperkingen in de materieelinzet,problems with the rolling stock,beperkingen in de materieelinzet,problems with the rolling stock,rolling stock,2023-01-02T05:57:27,2023-01-03T02:07:13,1210
46004,Amersfoort-Ede-Wageningen,Amersfoort - Ede-Wageningen,47.0,"Amersfoort Centraal,Barneveld Centrum...","AMF, BNC, BNN, ED, EDC, LTN, HVL, BNZ",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T06:36:39,2023-01-02T07:28:16,52
46005,Dordrecht-Breda; Dordrecht-Roosendaal,"Breda - Dordrecht, Dordrecht - Roosen...",170171.0,"Dordrecht,Dordrecht Zuid,Lage Zwaluwe","DDR, DDZD, ZLW",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T07:31:33,2023-01-02T08:09:37,38
46006,'s-Hertogenbosch-Tilburg,'s-Hertogenbosch - Tilburg,69.0,"'s-Hertogenbosch,Tilburg","HT, TB",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T11:33:15,2023-01-02T11:44:27,11
46007,Rotterdam-Breda (HSL),Breda - Rotterdam Centraal (HSL),15.0,"Breda,Rotterdam Centraal","BD, RTD",gestrande trein,stranded train,gestrande trein,stranded train,rolling stock,2023-01-02T11:50:11,2023-01-02T12:25:39,35
46008,Amsterdam-Schiphol-Rotterdam (HSL),Amsterdam Centraal - Schiphol Airport...,2432.0,"Amsterdam Centraal,Amsterdam Lelylaan...","ASD, ASDL, ASS, RTD, SHL",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T12:40:11,2023-01-02T13:08:08,28


In [4]:
disruptions2023
    .add("date") { start_time.date }
    .groupBy { cause_group }
    .plot {
        countPlot("date") {
            borderLine.width = 0.0
            position = Position.stack()
        }
        layout {
            title = "Disruptions per day"
            size = 1500 to 500
        }
    }

In [None]:
disruptions2023.schema()

In [5]:
val disruptions2023 = DataFrame.readCsv(
    fileOrUrl = "data/disruptions-2023.csv",
    delimiter = ',',
    colTypes = mapOf("rdt_lines_id" to ColType.String),
).renameToCamelCase()

In [6]:
import kotlin.time.Duration.Companion.minutes

val df1 = disruptions2023

    // let's also remove some rows where durationMinutes == null
    .dropNulls { durationMinutes }

    // Parsing minutes into kotlin.time.Duration and creating an extra date column
    .add {
        "duration" from { durationMinutes!!.minutes }
        "date" from { startTime.date }
    }

    // renaming columns to remove "rdt" and "En" from the beginning and end
    .rename { all() }.into {
        it.name
            .removePrefix("rdt").replaceFirstChar { it.lowercase() }
            .removeSuffix("En")
    }

df1

id,nsLines,lines,linesId,stationNames,stationCodes,causeNl,cause,statisticalCauseNl,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
45999,Amsterdam-Rotterdam-Brussel (HSL),Amsterdam Centraal - Schiphol Airport...,2432,"Amsterdam Centraal,Amsterdam Lelylaan...","ASD, ASDL, ASS, RTD, SHL",wisselstoring,points failure,wisselstoring,points failure,infrastructure,2023-01-01T08:19:26,2023-01-01T22:43:08,864,14h 24m,2023-01-01
46000,Zwolle-Leeuwarden,Leeuwarden - Zwolle,160,"Heerenveen,Wolvega,Heerenveen IJsstadion","HR, WV, HRY",dier op het spoor,an animal on the railway track,dier op het spoor,an animal on the railway track,external,2023-01-01T10:31:49,2023-01-01T10:56:17,24,24m,2023-01-01
46001,Heerlen-Aachen Hbf,Aachen Hbf - Heerlen,130,"Aachen Hbf,Eygelshoven Markt,Heerlen,...","AHBF, EGHM, HRL, HRLK, HZ, LG, AW",beperkingen in de materieelinzet,problems with the rolling stock,beperkingen in de materieelinzet,problems with the rolling stock,rolling stock,2023-01-01T13:19:24,2023-01-02T00:02:39,643,10h 43m,2023-01-01
46002,Zutphen-Winterswijk,Winterswijk - Zutphen,83,"Vorden,Zutphen","VD, ZP",aanrijding,collision,aanrijding,collision,accidents,2023-01-01T17:15:22,2023-01-01T20:14:23,179,2h 59m,2023-01-01
46003,Heerlen-Aachen Hbf,Aachen Hbf - Heerlen,130,"Aachen Hbf,Eygelshoven Markt,Heerlen,...","AHBF, EGHM, HRL, HRLK, HZ, LG, AW",beperkingen in de materieelinzet,problems with the rolling stock,beperkingen in de materieelinzet,problems with the rolling stock,rolling stock,2023-01-02T05:57:27,2023-01-03T02:07:13,1210,20h 10m,2023-01-02
46004,Amersfoort-Ede-Wageningen,Amersfoort - Ede-Wageningen,47,"Amersfoort Centraal,Barneveld Centrum...","AMF, BNC, BNN, ED, EDC, LTN, HVL, BNZ",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T06:36:39,2023-01-02T07:28:16,52,52m,2023-01-02
46005,Dordrecht-Breda; Dordrecht-Roosendaal,"Breda - Dordrecht, Dordrecht - Roosen...",170171,"Dordrecht,Dordrecht Zuid,Lage Zwaluwe","DDR, DDZD, ZLW",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T07:31:33,2023-01-02T08:09:37,38,38m,2023-01-02
46006,'s-Hertogenbosch-Tilburg,'s-Hertogenbosch - Tilburg,69,"'s-Hertogenbosch,Tilburg","HT, TB",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T11:33:15,2023-01-02T11:44:27,11,11m,2023-01-02
46007,Rotterdam-Breda (HSL),Breda - Rotterdam Centraal (HSL),15,"Breda,Rotterdam Centraal","BD, RTD",gestrande trein,stranded train,gestrande trein,stranded train,rolling stock,2023-01-02T11:50:11,2023-01-02T12:25:39,35,35m,2023-01-02
46008,Amsterdam-Schiphol-Rotterdam (HSL),Amsterdam Centraal - Schiphol Airport...,2432,"Amsterdam Centraal,Amsterdam Lelylaan...","ASD, ASDL, ASS, RTD, SHL",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T12:40:11,2023-01-02T13:08:08,28,28m,2023-01-02


In [7]:
val df2 = df1
    // splitting lines, linesId, stationNames, stationCodes by ","
    .split {
        cols(lines, linesId, stationNames, stationCodes)
    }.by(",").inplace()

df2

id,nsLines,lines,linesId,stationNames,stationCodes,causeNl,cause,statisticalCauseNl,statisticalCause,causeGroup,startTime,endTime,durationMinutes,duration,date
45999,Amsterdam-Rotterdam-Brussel (HSL),[Amsterdam Centraal - Schiphol Airpor...,"[24, 32]","[Amsterdam Centraal, Amsterdam Lelyla...","[ASD, ASDL, ASS, RTD, SHL]",wisselstoring,points failure,wisselstoring,points failure,infrastructure,2023-01-01T08:19:26,2023-01-01T22:43:08,864,14h 24m,2023-01-01
46000,Zwolle-Leeuwarden,[Leeuwarden - Zwolle],[160],"[Heerenveen, Wolvega, Heerenveen IJss...","[HR, WV, HRY]",dier op het spoor,an animal on the railway track,dier op het spoor,an animal on the railway track,external,2023-01-01T10:31:49,2023-01-01T10:56:17,24,24m,2023-01-01
46001,Heerlen-Aachen Hbf,[Aachen Hbf - Heerlen],[130],"[Aachen Hbf, Eygelshoven Markt, Heerl...","[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]",beperkingen in de materieelinzet,problems with the rolling stock,beperkingen in de materieelinzet,problems with the rolling stock,rolling stock,2023-01-01T13:19:24,2023-01-02T00:02:39,643,10h 43m,2023-01-01
46002,Zutphen-Winterswijk,[Winterswijk - Zutphen],[83],"[Vorden, Zutphen]","[VD, ZP]",aanrijding,collision,aanrijding,collision,accidents,2023-01-01T17:15:22,2023-01-01T20:14:23,179,2h 59m,2023-01-01
46003,Heerlen-Aachen Hbf,[Aachen Hbf - Heerlen],[130],"[Aachen Hbf, Eygelshoven Markt, Heerl...","[AHBF, EGHM, HRL, HRLK, HZ, LG, AW]",beperkingen in de materieelinzet,problems with the rolling stock,beperkingen in de materieelinzet,problems with the rolling stock,rolling stock,2023-01-02T05:57:27,2023-01-03T02:07:13,1210,20h 10m,2023-01-02
46004,Amersfoort-Ede-Wageningen,[Amersfoort - Ede-Wageningen],[47],"[Amersfoort Centraal, Barneveld Centr...","[AMF, BNC, BNN, ED, EDC, LTN, HVL, BNZ]",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T06:36:39,2023-01-02T07:28:16,52,52m,2023-01-02
46005,Dordrecht-Breda; Dordrecht-Roosendaal,"[Breda - Dordrecht, Dordrecht - Roose...","[170, 171]","[Dordrecht, Dordrecht Zuid, Lage Zwal...","[DDR, DDZD, ZLW]",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T07:31:33,2023-01-02T08:09:37,38,38m,2023-01-02
46006,'s-Hertogenbosch-Tilburg,['s-Hertogenbosch - Tilburg],[69],"['s-Hertogenbosch, Tilburg]","[HT, TB]",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T11:33:15,2023-01-02T11:44:27,11,11m,2023-01-02
46007,Rotterdam-Breda (HSL),[Breda - Rotterdam Centraal (HSL)],[15],"[Breda, Rotterdam Centraal]","[BD, RTD]",gestrande trein,stranded train,gestrande trein,stranded train,rolling stock,2023-01-02T11:50:11,2023-01-02T12:25:39,35,35m,2023-01-02
46008,Amsterdam-Schiphol-Rotterdam (HSL),[Amsterdam Centraal - Schiphol Airpor...,"[24, 32]","[Amsterdam Centraal, Amsterdam Lelyla...","[ASD, ASDL, ASS, RTD, SHL]",defecte trein,broken down train,defecte trein,broken down train,rolling stock,2023-01-02T12:40:11,2023-01-02T13:08:08,28,28m,2023-01-02


In [8]:
val causes = df2
    // group by causeGroup and get `valueCounts()` of statisticalCause
    .groupBy { causeGroup }.aggregate {
        statisticalCause.valueCounts() into "causes"
    }
    // sort descending by the number of rows in causes
    .sortByDesc {
        expr { getFrameColumn("causes").count() }
    }

causes

causeGroup,causes
statisticalCause,count
statisticalCause,count
statisticalCause,count
statisticalCause,count
statisticalCause,count
statisticalCause,count
statisticalCause,count
statisticalCause,count
statisticalCause,count
external,DataFrame [18 x 2]statisticalCausecountan emergency call151person on the railway track146people on the railway track72police action46fire alarm35... showing only top 5 of 18 rows
statisticalCause,count
an emergency call,151
person on the railway track,146
people on the railway track,72
police action,46
fire alarm,35
infrastructure,DataFrame [13 x 2]statisticalCausecountsignalling and points failure289points failure229signal failure163damaged overhead wires104defective railway track87... showing only top 5 of 13 rows
statisticalCause,count
signalling and points failure,289

statisticalCause,count
an emergency call,151
person on the railway track,146
people on the railway track,72
police action,46
fire alarm,35

statisticalCause,count
signalling and points failure,289
points failure,229
signal failure,163
damaged overhead wires,104
defective railway track,87

statisticalCause,count
weather circumstances,25
overhead wires covered with frost,11
lightning strike,9
slippery railway tracks,6
an amended timetable,3

statisticalCause,count
staffing problems,160
strike of Arriva staff,76
strike of Keolis staff,36
staff strikes abroad,24
strike of Connexxion staff,11

statisticalCause,count
broken down train,1704
stranded train,134
problems with the rolling stock,81
defective trains,2
the use of alternative train units,1

statisticalCause,count
logistical limitations,99
disruption elsewhere,78
railway problems abroad,39
an earlier disruption,14
excessive delays,1

statisticalCause,count
collision,493
damaged railway bridge,37
damaged level crossing,7

statisticalCause,count
repair works,146
over-running engineering works,47
engineering works,9

statisticalCause,count
technical investigation,70
multiple disruptions,7


In [9]:
val counts by column<Int>()

causes
    .add(counts) { it.causes.sumOf { count } }
    .plot {
        pie {
            slice(counts)
            fillColor(causeGroup)
            size = 25.0

            tooltips(anchor = Anchor.TOP_LEFT) {
                line(counts)
                line(causeGroup)
                line(causes.map {
                    it.rows().joinToString("\n") { "${it.statisticalCause}:  ${it.count}" }
                })
            }
        }
        x.axis.breaksLabeled()
        y.axis.breaksLabeled()
        layout {
            style { blankAxes() }
            title = "Causes"
        }
    }

In [None]:
import kotlin.random.Random
import kotlin.random.nextUInt

val sizeInt = 290
var i = Random.nextInt().absoluteValue
for (rows in causes.rows().windowed(size = 3, step = 3, partialWindows = true)) {
    val plots = rows.map { row ->
        row.causes.plot {
            pie {
                slice(count)
                fillColor(statisticalCause) {
                    legend.type = LegendType.None
                    scale = categoricalColorBrewer(
                        BrewerPalette.Sequential.values()[i++ % BrewerPalette.Sequential.values().size]
                    )
                }
                size = 15.0
                hole = 0.4

                tooltips(count, statisticalCause, anchor = Anchor.TOP_CENTER)
            }
            x.axis.breaksLabeled()
            y.axis.breaksLabeled()
            layout {
                style { blankAxes() }
                title = row.causeGroup
                size = sizeInt to sizeInt
            }
        }
    }

    DISPLAY(
        plotBunch {
            for ((index, plot) in plots.withIndex()) {
                add(plot = plot, x = index * sizeInt, y = 0)
            }
        }
    )
}