In [None]:
%use krangl, lets-plot

## [artificial-characters](https://www.openml.org/d/1459) dataset reading

In [3]:
val datasetPath = "artificial-characters.csv"

val dataframe = DataFrame.readCSV(datasetPath)

print(dataframe)

A DataFrame: 10218 x 8
     V1   V2   V3   V4   V5      V6      V7   Class
 1    0    0    0    0   20      20    46.1       1
 2    1   19    0   19    8       8    46.1       1
 3    2    0   20   19    8   22.47    46.1       1
 4    3    0   20    8   42   23.41    46.1       1
 5    4   19    8    8   42   35.74    46.1       1
 6    0    0    0    0   12      12   37.66       1
 7    1    7    0    7    7       7   37.66       1
 8    2    0   12    7    7     8.6   37.66       1
 9    3    0   12    1   35   23.02   37.66       1
10    4    7    7    5   30   23.09   37.66       1
and 10208 more rows

In [4]:
class Dataset(val features: List<String>, var objects: List<List<Double>>) {
    
    constructor(dataframe: DataFrame, objCount: Int?, skipObjectsWithNull: Boolean = false)
        : this(dataframe.names, emptyList()) {
            
        val values = mutableListOf<List<Double>>()
        
        for (row in dataframe.rows) {
            if (skipObjectsWithNull && row.containsValue(null)) {
                continue
            }
            val rowValues = row.values.toList()
            val clearRow = List<Double>(rowValues.size) { ind ->
                val elem = rowValues.get(ind)
                if (elem != null) {
                    elem.toString().toDouble()
                } else {
                    0.0
                }
            }
            values.add(clearRow)
        }
        if (objCount != null) {
            values.shuffle()
            objects = values.slice(0..objCount)
        } else {
            objects = values
        }
    }
}


var dataset = Dataset(dataframe, 300, true)

## Min-max normalization: $x_{scaled} = \frac{x - min(x)}{max(x) - min(x)}$

In [5]:
fun normalize(dataset: Dataset): Dataset {
    val features = dataset.features
    val objects = dataset.objects
    val minmax = Array(features.size) { Pair(0.0, 0.0) }
    
    for (feature in features.indices) {
        val min = objects.minByOrNull { it[feature] }
        val max = objects.maxByOrNull { it[feature] }
        if (min == null || max == null) {
            return dataset
        }
        minmax[feature] = Pair(min[feature], max[feature])
    }
 
    val normalizedValues = List(objects.size) { row ->
        List<Double>(features.size) { col ->
            val featureMinmax = minmax[col]
            val section = featureMinmax.second - featureMinmax.first
            if (col == features.size - 1) {
                objects[row][col]
            } else if (section != 0.0) {
                (objects[row][col] - featureMinmax.first) / section 
            } else {
                1.0
            }
        }
    }
 
    return Dataset(features, normalizedValues)
}

dataset = normalize(dataset)

## Distances:

- manhattan: $d(p, q) = \sum_{i=1}^n |p_i-q_i|$
- euclidean: $d(p,q) = \sqrt{\sum_{k=1}^n (p_k-q_k)^2}$
- chebyshev: $d(p,q) = \max_{i = 1, \dots, n} |x_i - y_i|.$

In [6]:
fun minkowskiDistance(p: Double): (List<Double>, List<Double>) -> Double {
    return { vector1: List<Double>, vector2: List<Double> ->
        var distance = 0.0
        for (i in vector1.indices) {
            distance += abs(vector1[i] - vector2[i]).pow(p)
        }
        distance.pow(1 / p)
    }
}
 
val DISTANCES = mapOf(
    "manhattan" to minkowskiDistance(1.0),
    "euclidean" to minkowskiDistance(2.0),
    "chebyshev" to { v1: List<Double>, v2: List<Double> ->
        val abs = List(v1.size) { ind -> abs(v1[ind] - v2[ind]) }
        abs.maxOrNull()!!
    }
)

## Kernels:


- uniform: $K(u) = \frac12$
- triangular: $K(u) = (1-|u|)$
- epanechnikov: $K(u) = \frac{3}{4}(1-u^2)$
- quartic: $K(u) = \frac{15}{16}(1-u^2)^2$
- triweight: $K(u) = \frac{35}{32}(1-u^2)^3$
- triweight: $K(u) = \frac{70}{81}(1- {\left| u \right|}^3)^3$
- gaussian: $K(u) = \frac{1}{\sqrt{2\pi}}e^{-\frac{1}{2}u^2}$
- cosine: $K(u) = \frac{\pi}{4}\cos\left(\frac{\pi}{2}u\right)$
- logistic: $K(u) = \frac{1}{e^{u}+2+e^{-u}}$
- sigmoid: $K(u) = \frac{2}{\pi}\frac{1}{e^{u}+e^{-u}}$

In [7]:
fun finiteKernel(f: (Double) -> Double): (Double) -> Double {
    return { u: Double -> if (abs(u) < 1.0) f(u) else 0.0 }
}
 
val KERNELS = mapOf(
    "uniform" to finiteKernel { u -> 0.5 },
    "triangular" to finiteKernel { u -> 1 - abs(u) },
    "epanechnikov" to finiteKernel { u -> 3 * (1 - u * u) / 4 },
    "quartic" to finiteKernel { u -> 15 * (1 - u * u).pow(2) / 16 },
    "triweight" to finiteKernel { u -> 35 * (1 - u * u).pow(3) / 32 },
    "tricube" to finiteKernel { u -> 70 * (1 - abs(u).pow(3)).pow(3) / 81 },
    "gaussian" to { u: Double -> exp(-u * u / 2) / sqrt(2 * PI) },
    "cosine" to finiteKernel { u -> PI * cos(PI * u / 2) / 4 },
    "logistic" to { u: Double -> 1.0 / (exp(u) + 2 + exp(-u)) },
    "sigmoid" to { u: Double -> 2.0 / (PI * (exp(u) + exp(-u))) }
)

In [8]:
fun getNeighbors(
    train: List<List<Double>>,
    testRow: List<Double>,
    distance: (List<Double>, List<Double>) -> Double,
    neighborsCount: Int
): List<List<Double>> {
    val sortedTrain = train.sortedWith(compareBy { distance(testRow, it.dropLast(1)) })
    return sortedTrain.slice(0 until neighborsCount)
}

In [9]:
fun getWindowSizeByNeighborsCount(
    dataset: List<List<Double>>, 
    obj: List<Double>, 
    distance: (List<Double>, List<Double>) -> Double,
    neighborsCount: Int): Double {
    val neighbors = getNeighbors(dataset, obj, distance, neighborsCount + 1)
    return distance(obj, neighbors[neighborsCount].dropLast(1))
}

<font size="4">$NW(x, D_{train})=\frac{\sum_{x_i \in D_{train}} y_i K(\frac{d(x_i, x)}{h})}{\sum_{x_i \in D_{train}} K(\frac{d(x_i, x)}{h})}$</font>

In [10]:
fun nadarayaWatson(
    train: List<List<Double>>,
    testRow: List<Double>,
    distance: (List<Double>, List<Double>) -> Double,
    normalizer: Double,
    kernel: (Double) -> Double
): Double {
    fun kern(it: List<Double>) = kernel(distance(testRow, it.dropLast(1)) / normalizer)
    val numerator = train.sumByDouble { it.last() * kern(it) }
    val denominator = train.sumByDouble { kern(it) }
    return if (denominator == 0.0) train.map { it.last() }.average() else numerator / denominator
}

In [11]:
fun confusionMatrix(
    dataset: Dataset,
    distance: (List<Double>, List<Double>) -> Double, 
    windowSize: Double, 
    kernel: (Double) -> Double,
    useWindowSizeAsNeighborsCount: Boolean = false
): List<List<Int>> {
    val objects = dataset.objects
    val maxClass = objects.maxByOrNull { it.last() }!!.last().toInt()
    val confusionMatrix = List(maxClass + 1) { MutableList(maxClass + 1) { 0 } }

    for (objIndex in objects.indices) {
        val datasetWithoutObject = dataset.objects.toMutableList()
        datasetWithoutObject.removeAt(objIndex)

        val obj = objects[objIndex]
        val objTarget = obj.last().toInt()
        val objFeatures = obj.dropLast(1)

        val normalizer = if (!useWindowSizeAsNeighborsCount) 
            windowSize 
        else
            getWindowSizeByNeighborsCount(datasetWithoutObject, objFeatures, distance, windowSize.toInt())
        
        val predictionNadarayaWatson = nadarayaWatson(
            datasetWithoutObject, 
            objFeatures, 
            distance,
            normalizer,
            kernel
        )
        val prediction = Math.round(predictionNadarayaWatson).toInt()

        confusionMatrix[objTarget][prediction]++
    }
    return confusionMatrix
}

In [12]:
fun oneHotConfusionMatrix(
    dataset: Dataset, 
    distance: (List<Double>, List<Double>) -> Double,
    windowSize: Double,
    kernel: (Double) -> Double,
    useWindowSizeAsNeighborsCount: Boolean = false
): List<List<Int>> {
    val objects = dataset.objects
    val featuresCount = dataset.features.size
    
    val maxClass = objects.maxByOrNull { it.last() }!!.last().toInt()
    val confusionMatrix = List(maxClass + 1) { MutableList(maxClass + 1) { 0 } }

    for (objIndex in objects.indices) {
        val datasetWithoutObject = dataset.objects.toMutableList()
        datasetWithoutObject.removeAt(objIndex)

        val obj = objects[objIndex]
        val objTarget = obj.last().toInt()
        val objFeatures = obj.dropLast(1)
        val probabilities = MutableList<Double>(maxClass + 1) { 0.0 }
        
        val normalizer = if (!useWindowSizeAsNeighborsCount) 
            windowSize 
        else
            getWindowSizeByNeighborsCount(datasetWithoutObject, objFeatures, distance, windowSize.toInt())
        
        for (cl in 1..maxClass) {
            var oneHotDataset = datasetWithoutObject.map { it.toMutableList() }
            oneHotDataset.forEach { 
                it[featuresCount - 1] = if (it.last() == cl.toDouble()) 1.0 else 0.0 
            }
            probabilities[cl] = nadarayaWatson(
                oneHotDataset, 
                objFeatures, 
                distance,
                normalizer,
                kernel
            )
        }
        
        val prediction = probabilities.indices.maxByOrNull { probabilities[it] }!!
        confusionMatrix[objTarget][prediction]++
    }
    
    return confusionMatrix
}

$Recall = \dfrac{TP}{TP + FN}$

$Precision = \dfrac{TP}{TP + FP}$

## F-measure:

$F_\beta = (1 + \beta^2) \frac{Precition · Recall}{\beta^2 · Precition + Recall}$

$F_1 = 2 \frac{Precition · Recall}{Precition + Recall}$

In [13]:
fun fBetaMeasure(beta: Double, precision: Double, recall: Double): Double {
    val betaSqr = beta * beta;
    val denominator = (betaSqr * precision + recall)
    if (denominator == 0.0) {
        return 0.0
    }
    return (1 + betaSqr) * (precision * recall) / denominator
}
 
fun f1Measure(precision: Double, recall: Double): Double {
    return fBetaMeasure(1.0, precision, recall);
}

$Precision_W = \dfrac{\sum\limits_{i = 1}^{N} \dfrac{T_i P_i}{C_i}}{All}$

$Recall_W = \dfrac{\sum\limits_{i = 1}^{N} T_i}{All}$

$macro F_β = (1 + β^2) \dfrac{Precision_W \cdot Recall_W}{β^2 \cdot Precision_W + Recall_W}$

$micro F_β = \sum\limits_{c \in Classes} \dfrac{P_c F_β(c)}{All}$

In [14]:
fun fMeasure(confusionMatrix: List<List<Int>>, macroMeasure: Boolean = true): Double {
    val colSum = { col: Int -> confusionMatrix.sumBy { it[col] } }
    val rowSum = { row: Int -> confusionMatrix[row].sum() }
    val sum = confusionMatrix.sumBy { it.sum() } 
 
 
    var micro = 0.0
    var recallW = 0.0
    var precW = 0.0

    for (i in confusionMatrix.indices) {
        val tp = confusionMatrix[i][i].toDouble()
        val cs = colSum(i)
        val rs = rowSum(i)
        val precision = if (cs == 0) 0.0 else tp / cs
        val recall = if (rs == 0) 0.0 else tp / rs
        val coef = rs.toDouble()

        recallW += recall * coef
        precW += precision * coef
        val f1 = f1Measure(precision, recall)
        micro += f1 * coef
    }

    if (sum == 0) {
        micro = 0.0
        recallW = 0.0
        precW = 0.0
    } else {
        micro /= sum
        recallW /= sum
        precW /= sum
    }
    val macro = f1Measure(precW, recallW)
    
    return if (macroMeasure) macro else micro
}

In [15]:
fun findOptimal(mode: String): Map<String, Any> {
    var optimalDistance = "euclidean"
    var optimalWindowSize = 0.05
    var optimalKernel = "uniform"
    var maxFMeasure = 0.0

    for (distance in DISTANCES) {
        for (window in 1..20) {
            val windowSize = window.toDouble() / 10000
            for (kernel in KERNELS) {
                val confusionMatrix = when (mode){
                    "naive" -> confusionMatrix(dataset, distance.value, windowSize, kernel.value)
                    "one hot" -> oneHotConfusionMatrix(dataset, distance.value, windowSize, kernel.value)
                    else -> {
                        println("Unsupported mode") 
                        return mapOf()
                    }
                }
                val fMeasure = fMeasure(confusionMatrix)
                if (fMeasure > maxFMeasure) {
                    maxFMeasure = fMeasure
                    optimalDistance = distance.key
                    optimalWindowSize = windowSize
                    optimalKernel = kernel.key
                }
            }
        }
    }
    
    return mapOf (
        "f measure" to maxFMeasure,
        "distance" to optimalDistance,
        "window size" to optimalWindowSize,
        "kernel" to optimalKernel
    )
}

In [16]:
val naive = findOptimal("naive")
println(naive)

{f measure=0.44368831872353326, distance=manhattan, window size=0.0014, kernel=logistic}


In [17]:
val oneHot = findOptimal("one hot")
println(oneHot)

{f measure=0.4529590624205767, distance=chebyshev, window size=5.0E-4, kernel=logistic}


In [18]:
val getConfusionMatrix = if (naive["f measure"].toString().toDouble() < oneHot["f measure"].toString().toDouble()) 
    ::oneHotConfusionMatrix 
else 
    ::confusionMatrix

In [19]:
val fMeasures = MutableList<Double>(0) { 0.0 }
val windowSizes = List<Double>(20) { (it + 1.0) / 10000 }
for (windowSize in windowSizes) {
    val confusionMatrix = getConfusionMatrix(dataset, DISTANCES["manhattan"]!!, windowSize, KERNELS["logistic"]!!, false)
    fMeasures.add(fMeasure(confusionMatrix))
}
val plotDataWindowSize = mapOf(
    "windowSize" to windowSizes,
    "fMeasure" to fMeasures
)

lets_plot(plotDataWindowSize) +
    geom_line {
        x = "windowSize"
        y = "fMeasure"
    }

In [22]:
val fMeasures = MutableList<Double>(0) { 0.0 }
val neighborsCount = List<Double>(10) { (it + 1).toDouble() }
for (windowSize in neighborsCount) {
    val confusionMatrix = getConfusionMatrix(dataset, DISTANCES["manhattan"]!!, windowSize, KERNELS["logistic"]!!, true)
    fMeasures.add(fMeasure(confusionMatrix))
}
val plotDataNeighborCount = mapOf(
    "neighborsCount" to neighborsCount,
    "fMeasure" to fMeasures
)

lets_plot(plotDataNeighborCount) +
    geom_line {
        x = "neighborsCount"
        y = "fMeasure"
    }