In [None]:
@file:DependsOn("/data/tools/actin-personalization/actin-personalization.jar")
import com.hartwig.actin.personalization.datamodel.serialization.ReferencePatientJson

val patients = ReferencePatientJson.read("/data/patient_like_me/ncr/patientRecords.json")

In [None]:
// Use similarity module from repo
@file:DependsOn("nz.ac.waikato.cms.weka:weka-stable:3.8.6")

import com.hartwig.actin.personalization.datamodel.AnorectalVergeDistanceCategory
import com.hartwig.actin.personalization.datamodel.NumberOfCciCategories
import com.hartwig.actin.personalization.datamodel.Episode
import com.hartwig.actin.personalization.datamodel.Location
import com.hartwig.actin.personalization.datamodel.PfsMeasure
import com.hartwig.actin.personalization.datamodel.PfsMeasureType
import com.hartwig.actin.personalization.datamodel.StageTnm
import com.hartwig.actin.personalization.datamodel.TumorEntry
import com.hartwig.actin.personalization.datamodel.TumorType
import com.hartwig.actin.personalization.similarity.weka.createPatientDb
import com.hartwig.actin.personalization.similarity.weka.DoubleField
import com.hartwig.actin.personalization.similarity.weka.IntField
import com.hartwig.actin.personalization.similarity.weka.NominalField.Companion.booleanField
import com.hartwig.actin.personalization.similarity.weka.NominalField.Companion.enumField

fun pfs(entry: TumorEntry): Int? {
    return entry.episodes.flatMap(Episode::pfsMeasures)
        .filter { it.type != PfsMeasureType.CENSOR }
        .mapNotNull(PfsMeasure::intervalTumorIncidencePfsMeasureDays)
        .minOrNull()
}

val fields = listOf(
    enumField(TumorType::class.java, { it.diagnosis.consolidatedTumorType }),
    enumField(Location::class.java, { it.episodes.firstOrNull()?.tumorLocation }),
    DoubleField("stage", { it.episodes.mapNotNull { episode -> episode.stageTNM?.asNumeric }.firstOrNull() }),
    booleanField("hasHadPriorTumor", { it.diagnosis.hasHadPriorTumor }),
    IntField("who", { it.episodes.mapNotNull(Episode::whoStatusPreTreatmentStart).firstOrNull() }),
    // IntField("cci", {it.diagnosis.cci }),
    // enumField(NumberOfCciCategories::class.java, { it.diagnosis.cciNumberOfCategories }),
    // booleanField("cciHasAids", { it.diagnosis.cciHasAids }),
    // booleanField("cciHasCongestiveHeartFailure", { it.diagnosis.cciHasCongestiveHeartFailure }),
    // booleanField("cciHasCollagenosis", { it.diagnosis.cciHasCollagenosis }),
    // booleanField("cciHasCopd", { it.diagnosis.cciHasCopd }),
    // booleanField("cciHasCerebrovascularDisease", { it.diagnosis.cciHasCerebrovascularDisease }),
    // booleanField("cciHasDementia", { it.diagnosis.cciHasDementia }),
    // booleanField("cciHasDiabetesMellitus", { it.diagnosis.cciHasDiabetesMellitus }),
    // booleanField("cciHasDiabetesMellitusWithEndOrganDamage", { it.diagnosis.cciHasDiabetesMellitusWithEndOrganDamage }),
    // booleanField("cciHasOtherMalignancy", { it.diagnosis.cciHasOtherMalignancy }),
    // booleanField("cciHasOtherMetastaticSolidTumor", { it.diagnosis.cciHasOtherMetastaticSolidTumor }),
    // booleanField("cciHasMyocardialInfarct", { it.diagnosis.cciHasMyocardialInfarct }),
    // booleanField("cciHasMildLiverDisease", { it.diagnosis.cciHasMildLiverDisease }),
    // booleanField("cciHasHemiplegiaOrParaplegia", { it.diagnosis.cciHasHemiplegiaOrParaplegia }),
    // booleanField("cciHasPeripheralVascularDisease", { it.diagnosis.cciHasPeripheralVascularDisease }),
    // booleanField("cciHasRenalDisease", { it.diagnosis.cciHasRenalDisease }),
    // booleanField("cciHasLiverDisease", { it.diagnosis.cciHasLiverDisease }),
    // booleanField("cciHasUlcerDisease", { it.diagnosis.cciHasUlcerDisease }),

//     booleanField("presentedWithIleus", { it.diagnosis.presentedWithIleus }),
//     booleanField("presentedWithPerforation", { it.diagnosis.presentedWithPerforation }),
//     enumField(AnorectalVergeDistanceCategory::class.java, { it.diagnosis.anorectalVergeDistanceCategory }),

//     booleanField("hasMsi", { it.diagnosis.hasMsi }),
//     booleanField("hasBrafMutation", { it.diagnosis.hasBrafMutation }),
//     booleanField("hasBrafV600EMutation", { it.diagnosis.hasBrafV600EMutation }),
//     booleanField("hasRasMutation", { it.diagnosis.hasRasMutation }),
    // booleanField("hasKrasG12CMutation", { it.diagnosis.hasKrasG12CMutation }),
    IntField("pfs", ::pfs)
)

val tumors = patients.flatMap { it.tumorEntries }
val tumorEntries = tumors.filter { pfs(it) != null }

val patientDb = createPatientDb(tumorEntries, fields)

In [None]:
import weka.classifiers.trees.REPTree
import weka.core.DenseInstance

val classifier = REPTree()
classifier.buildClassifier(patientDb)

val newPatient = DenseInstance(fields.size)
newPatient.setDataset(patientDb)
listOf(
    "tumorType" to TumorType.CRC_ADENOCARCINOMA.ordinal.toDouble(),
    "location" to Location.COECUM.ordinal.toDouble(),
    "stage" to 4.0,
    "hasHadPriorTumor" to 0.0,
    "who" to 0.0,
    // "cciNumberOfCategories" to NumberOfCciCategories.ZERO_CATEGORIES.ordinal.toDouble(),
    // "hasMsi" to 0.0,
    // "hasBrafMutation" to 0.0,
    // "hasBrafV600EMutation" to 0.0,
    // "hasRasMutation" to 1.0
)
    .forEach { (name, value) ->
        newPatient.setValue(patientDb.attribute(name), value)
    }

classifier.classifyInstance(newPatient)

In [None]:
import com.hartwig.actin.personalization.similarity.weka.Field
import weka.clusterers.SimpleKMeans
import weka.core.DenseInstance
import weka.core.Instances
import weka.core.Utils

private val missingValue = Utils.missingValue()

fun createInstancesForCluster(patients: List<TumorEntry>, fields: List<Field>): Instances {
    val attributes = ArrayList(fields.map(Field::toAttribute))
    val patientDb = Instances("patients", attributes, patients.count())

    patients.forEach { patient ->
        val values = fields.map { it.getFor(patient) ?: missingValue }.toDoubleArray()
        val patientInstance = DenseInstance(1.0, values)
        patientInstance.setDataset(patientDb)
        patientDb.add(patientInstance)
    }
    return patientDb
}

val clusterInstances = createInstancesForCluster(tumorEntries, fields)

val clusterer = SimpleKMeans()
clusterer.setNumClusters(5)
clusterer.buildClusterer(clusterInstances)

In [None]:
import com.hartwig.actin.personalization.similarity.weka.DoubleField
import com.hartwig.actin.personalization.similarity.weka.IntField
import weka.clusterers.ClusterEvaluation
import weka.core.Instance

fun printSummaryForInstances(instances: List<Instance>, fields: List<Field>, attributeMap: Map<String, Int>) {
    val summary = instances.asSequence()
        .flatMap { instance ->
            fields.map { f ->
                val index = attributeMap[f.name]!!
                val value = when(f) {
                    is IntField, is DoubleField -> instance.value(index)
                    else -> instance.stringValue(index)
                }
                f.name to value
            }
        }
        .groupBy({ it.first }, { it.second })
        .mapValues { (_, values) -> values.groupingBy { it }.eachCount() }

    println(summary.toString().replace("}, ", "},\n"))
}

val clusterEval = ClusterEvaluation()
clusterEval.setClusterer(clusterer)
clusterEval.evaluateClusterer(clusterInstances)
val clusterAssignments = clusterEval.clusterAssignments

val clusterIndex = clusterer.clusterInstance(newPatient).toDouble()

val instancesInCluster = clusterInstances.filterIndexed { index, _ ->
    clusterAssignments[index] == clusterIndex
}
printSummaryForInstances(instancesInCluster, fields.dropLast(1), fields.mapIndexed { i, field -> field.name to i }.toMap())