In [2]:
%use smile, lets-plot
import java.io.File
import kotlin.random.Random

In [3]:
operator fun List<Double>.plus(v: List<Double>): List<Double> =
    this.zip(v).map { it.first + it.second }

operator fun List<Double>.minus(v: List<Double>): List<Double> =
    this.zip(v).map { it.first - it.second }

operator fun List<Double>.times(x: Double): List<Double> =
    this.map { it * x }

operator fun Double.times(v: List<Double>): List<Double> =
    v * this

fun List<Double>.dot(v: List<Double>): Double =
    this.zip(v).map { it.first * it.second }.reduce { a, b -> a + b }

In [4]:
fun normalize(dataset: List<List<Double>>): List<List<Double>> {
    val featuresCount = dataset[0].size
    val minmax = MutableList(featuresCount) { Pair(0.0, 0.0) }
    for (feature in 0 until featuresCount) {
        val min = dataset.minByOrNull { it[feature] }
        val max = dataset.maxByOrNull { it[feature] }
        if (min == null || max == null) {
            return emptyList()
        }
        minmax[feature] = Pair(min[feature], max[feature])
    }

    val normalizedValues = List(dataset.size) { row ->
        List(featuresCount) { col ->
            val featureMinmax = minmax[col]
            val section = featureMinmax.second - featureMinmax.first
            if (col == featuresCount - 1) {
                dataset[row][col]
            } else if (section != 0.0) {
                (dataset[row][col] - featureMinmax.first) / section
            } else {
                1.0
            }
        }
    }

    return normalizedValues
}

In [5]:
fun addBias(values: List<Double>): List<Double> {
    val values = values.toMutableList()
    values.add(0, 1.0)
    return values
}

In [6]:
fun getXY(obj: List<Double>): Pair<List<Double>, Double> {
    return Pair(obj.dropLast(1), obj.last())
}

In [7]:
fun nrmse(objects: List<List<Double>>, w: List<Double>): Double {
    var sum = 0.0
    var maxY = Double.MIN_VALUE
    var minY = Double.MAX_VALUE
    
    for (obj in objects) {
        val (x, y) = getXY(obj)
        maxY = max(maxY, y)
        minY = min(minY, y)
        val a = w.dot(x)
        sum += (a - y).pow(2.0)
    }
    var normalizer = maxY - minY
    if (normalizer == 0.0) {
        normalizer = if (maxY == 0.0) 1.0 else maxY
    }
    return sqrt(sum) / normalizer
}

In [8]:
fun sgd(train: List<List<Double>>, stepsCount: Int, lambda: Double, p: Double, tau: Double): List<Double> {
    val featuresCount = train[0].size
    
    var w = List(featuresCount) { Random.nextDouble(10000.0) }

    for (i in 0 until stepsCount) {
        val step = lambda * (1.0 / (1.0 + i)).pow(p)
        val objInd = Random.nextInt(train.size)
        val (x, y) = getXY(train[objInd])
        val a = w.dot(x)
        val grad = (a - y) * x
        w = w * (1 - step * tau) - step * grad
    }
    
    return w
}

In [13]:
fun readFileAsLinesUsingUseLines(fileName: String): List<String> = File(fileName).useLines { it.toList() }

val inputFile = readFileAsLinesUsingUseLines("LR/2.txt")
val featuresCount = inputFile[0].toInt() + 1
val objectsCount = inputFile[1].toInt()
var objects = List(objectsCount) { i -> addBias(inputFile[i + 2].split(' ').map { it.toDouble() }) }
objects = normalize(objects)

val testData = inputFile.drop(2 + objectsCount)
val testObjectsCount = testData[0].toInt()
var testObjects = List(testObjectsCount) { i -> addBias(testData[i + 1].split(' ').map { it.toDouble() }) }
testObjects = normalize(testObjects)

val STEPS_COUNT = 100
val STEP_SIZE = 0.1
val TAU = 1.5

val stepsCount = List<Int>(300) { (it + 2) * 100 }
val nrmse = List<Double>(300) {i -> 
    val ww = sgd(objects, stepsCount[i], 0.0001, 0.45, TAU)
    nrmse(testObjects, ww)
}

val plotDataWindowSize = mapOf(
    "stepsCount" to stepsCount,
    "nrmse" to nrmse
)

lets_plot(plotDataWindowSize) +
    geom_line {
        x = "stepsCount"
        y = "nrmse"
    }

In [14]:
val stepsCount = List<Int>(300) { (it + 2) * 100 }
val nrmse = List<Double>(300) {i -> 
    val ww = sgd(objects, stepsCount[i], 0.0001, 0.45, TAU)
    nrmse(objects, ww)
}

val plotDataWindowSize = mapOf(
    "stepsCount" to stepsCount,
    "nrmse" to nrmse
)

lets_plot(plotDataWindowSize) +
    geom_line {
        x = "stepsCount"
        y = "nrmse"
    }

In [54]:
import smile.math.matrix.JMatrix

val F = objects.map { it.dropLast(1).toDoubleArray() }
val FMatrix = JMatrix(F.toTypedArray())
val svd = FMatrix.svd()
val v = svd.getV()
val s = svd.getS()
val u = svd.getU()
// println("$v\n$s\n$u")
var sum = List(featuresCount) { MutableList(objectsCount) {0.0} }
for (feature in 0 until featuresCount) {
    for (i in 0 until featuresCount) {
        for (j in 0 until objectsCount) {
            val lambda = s.get(feature, feature)
            sum[i][j] += (lambda / (lambda * lambda + 2.0)) * v.get(i, feature) * u.get(j, feature)
        }
    }
}

val w = List(featuresCount) {i -> 
    var ss = 0.0
    for (j in 0 until objectsCount) {
        ss += sum[i][j] * objects[j].last()
    } 
    ss
}

val nrmse = nrmse(testObjects, w)
println(nrmse)

val nrmse2 = nrmse(objects, w)
println(nrmse2)

2.1117965102184306
0.10729673754642742


In [55]:
fun naiveAnnealing(stepsCount: Int, samplesCount: Int, scatter: Double): List<Double> {
    var w = List(featuresCount) { Random.nextDouble(scatter) }
    for (i in 0 until stepsCount) {
        val ws = List(samplesCount) { List(featuresCount) {i -> w[i] + Random.nextDouble(scatter) } }
        var minNrmse = nrmse(testObjects, w)
        for (newW in ws) {
            val nrmse = nrmse(testObjects, newW)
            if (nrmse < minNrmse) {
                minNrmse = nrmse
                w = newW
            }
        }
    }
    return w
}

In [64]:
// val annealingW = naiveAnnealing(1000, 10, 10000.0)
// println(nrmse(testObjects, annealingW))

val stepsCount = List<Int>(10) { (it + 1) * 10 }
val nrmse = List<Double>(10) {i -> 
    val ww = naiveAnnealing(stepsCount[i], 10, 10000.0)
    nrmse(testObjects, ww)
}

val plotDataWindowSize = mapOf(
    "stepsCount" to stepsCount,
    "nrmse" to nrmse
)

lets_plot(plotDataWindowSize) +
    geom_line {
        x = "stepsCount"
        y = "nrmse"
    }

In [57]:
val stepsCount = List<Int>(10) { (it + 5) * 10 }
val nrmse = List<Double>(10) {i -> 
    val ww = naiveAnnealing(stepsCount[i], 10, 100000.0)
    nrmse(objects, ww)
}

val plotDataWindowSize = mapOf(
    "stepsCount" to stepsCount,
    "nrmse" to nrmse
)

lets_plot(plotDataWindowSize) +
    geom_line {
        x = "stepsCount"
        y = "nrmse"
    }