In [None]:
@file:DependsOn("/data/repos/actin-personalization/ncr/target/ncr-local-SNAPSHOT-jar-with-dependencies.jar")

import com.hartwig.actin.personalization.datamodel.PatientRecord
import com.hartwig.actin.personalization.ncr.datamodel.NcrRecord
import com.hartwig.actin.personalization.ncr.interpretation.PatientRecordFactory
import com.hartwig.actin.personalization.ncr.serialization.NcrDataReader

val records = NcrDataReader.read("/data/patient_like_me/ncr/K23244.csv")

In [None]:
// 1 DIA record per patient-tumor pair:
println(records.groupBy { Pair(it.identification.keyNkr, it.identification.keyZid) }
    .values.map { epiList -> epiList.count { it.identification.epis == "DIA" } }
    .groupBy { it }
    .mapValues { (_, values) -> values.size })

In [None]:
// Find metaEpis sets by tumor
println(records.groupBy { Pair(it.identification.keyNkr, it.identification.keyZid) }
    .values.map { epiList ->
        val (dia, fup) = epiList.partition { it.identification.epis == "DIA" }
        dia.single().identification.metaEpis to fup.map { it.identification.metaEpis }.toSet()
    }
    .groupBy { it }
    .mapValues { (_, values) -> values.size })

In [None]:
val patients = PatientRecordFactory.create(records)

In [None]:
val tumors = patients.flatMap { it.episodesPerTumorOfInterest.entries }

In [None]:
patients.size

In [None]:
import com.hartwig.actin.personalization.datamodel.Episode
import com.hartwig.actin.personalization.datamodel.Drug
import com.hartwig.actin.personalization.datamodel.SystemicTreatmentComponent
import com.hartwig.actin.personalization.datamodel.SystemicTreatmentScheme

fun treatmentSchemesForTumorEpisodes(episodes: List<Episode>) =
    episodes.flatMap { e -> e.systemicTreatmentSchemes!! }

fun schemeToDrugSet(scheme: SystemicTreatmentScheme) = 
    scheme.treatmentComponents.map(SystemicTreatmentComponent::drug)
        .map { 
            if (it == Drug.TEGAFUR || it == Drug.TEGAFUR_OR_GIMERACIL_OR_OTERACIL) {
                Drug.FLUOROURACIL
            } else it
        }
        .toSet()

In [None]:
// Unique treatment schemes by treatment name sets

val treatmentSchemeSets = patients.flatMap { it.episodesPerTumorOfInterest.values }
    .flatMap(::treatmentSchemesForTumorEpisodes)
    .map(::schemeToTreatmentNameSet)
    .groupBy({ it })
    .mapValues { (_, values) -> values.count() }
    .entries.sortedByDescending { (_, count) -> count }
    .takeWhile { (_, count) -> count >= 10 }
    
println(treatmentSchemeSets.map { (names, count) -> "$names: $count" }.joinToString("\n"))
println(treatmentSchemeSets.size)


In [None]:
import com.hartwig.actin.personalization.datamodel.SystemicTreatmentPlan

// Unique patient tumors by treatment plan sequences in all schemes

fun classifyEpisode(episode: Episode) = episode.systemicTreatmentSchemes.firstOrNull()
    ?.let(::schemeToDrugSet)
    ?.let(SystemicTreatmentPlan::findForDrugs)

val tumorTreatmentSets = patients.flatMap { it.episodesPerTumorOfInterest.values }
    // .filter { episodesForTumor ->
    //     episodesForTumor.mapNotNull(Episode::distantMetastasesStatus).toSet() == setOf(1)
    //     && episodesForTumor.none { it.hasHadPreSurgerySystemicChemotherapy || it.hasHadPostSurgerySystemicChemotherapy 
    //         || it.hasHadPreSurgerySystemicTargetedTherapy || it.hasHadPostSurgerySystemicTargetedTherapy }
    // }
    .map { episodesForTumor -> episodesForTumor.mapNotNull(::classifyEpisode) }
    .groupBy({ it })
    .mapValues { (_, values) -> values.count() }
    .entries.sortedByDescending { (_, count) -> count }
    //.takeWhile { (_, count) -> count >= 10 }

In [None]:
println(tumorTreatmentSets.map { (names, count) -> "$names: $count" }.joinToString("\n"))
println(tumorTreatmentSets.size)

In [None]:
import com.hartwig.actin.personalization.datamodel.Location
import com.hartwig.actin.personalization.datamodel.LocationGroup

data class RoughClassification(
    val sameLocation: Boolean,
    val sameMetastasisLocation: Boolean,
    val sameMolecular: Boolean,
    val approximateWhoMatch: Boolean
) {
    override fun toString(): String {
        return listOf(::sameLocation, ::sameMetastasisLocation, ::sameMolecular, ::approximateWhoMatch)
            .filter { it.get() }
            .joinToString(", ") { it.name }
            .ifEmpty { "no matching attributes" }
    }
}

val tumors = patients.flatMap { it.episodesPerTumorOfInterest.entries }
tumors.groupBy { (diagnosis, episodes) ->
    val sameLocation = diagnosis.tumorLocations.contains(Location.COECUM)
    val sameMetastasisLocation = episodes.any { epi -> epi.metastases.any { it.metastasisLocation.locationGroup == LocationGroup.RETROPERITONEUM_AND_PERITONEUM } }
    val sameMolecular = with(diagnosis) { hasMsi != true && hasBrafMutation != true && hasBrafV600EMutation != true && hasRasMutation == true && hasKrasG12CMutation != true }
    val approximateWhoMatch = episodes.any { epi -> epi.whoStatusPreTreatmentStart?.let { it <= 1 } ?: false }
    RoughClassification(sameLocation, sameMetastasisLocation, sameMolecular, approximateWhoMatch)
}
    .mapValues { (_, values) -> values.size }
    .entries.sortedByDescending { it.value }
    .joinToString("\n") { (classification, num) -> "$classification: $num" }


In [None]:
import com.hartwig.actin.personalization.datamodel.CciNumberOfCategories
import com.hartwig.actin.personalization.datamodel.AnorectalVergeDistanceCategory

data class TumorSummary(
    val cci: Map<Int?, Int> = emptyMap(),
    val cciNumberOfCategories: Map<CciNumberOfCategories?, Int> = emptyMap(),
    val cciHasAids: Map<Boolean?, Int> = emptyMap(),
    val cciHasCongestiveHeartFailure: Map<Boolean?, Int> = emptyMap(),
    val cciHasCollagenosis: Map<Boolean?, Int> = emptyMap(),
    val cciHasCopd: Map<Boolean?, Int> = emptyMap(),
    val cciHasCerebrovascularDisease: Map<Boolean?, Int> = emptyMap(),
    val cciHasDementia: Map<Boolean?, Int> = emptyMap(),
    val cciHasDiabetesMellitus: Map<Boolean?, Int> = emptyMap(),
    val cciHasDiabetesMellitusWithEndOrganDamage: Map<Boolean?, Int> = emptyMap(),
    val cciHasOtherMalignancy: Map<Boolean?, Int> = emptyMap(),
    val cciHasOtherMetastaticSolidTumor: Map<Boolean?, Int> = emptyMap(),
    val cciHasMyocardialInfarct: Map<Boolean?, Int> = emptyMap(),
    val cciHasMildLiverDisease: Map<Boolean?, Int> = emptyMap(),
    val cciHasHemiplegiaOrParaplegia: Map<Boolean?, Int> = emptyMap(),
    val cciHasPeripheralVascularDisease: Map<Boolean?, Int> = emptyMap(),
    val cciHasRenalDisease: Map<Boolean?, Int> = emptyMap(),
    val cciHasLiverDisease: Map<Boolean?, Int> = emptyMap(),
    val cciHasUlcerDisease: Map<Boolean?, Int> = emptyMap(),

    val presentedWithIleus: Map<Boolean?, Int> = emptyMap(),
    val presentedWithPerforation: Map<Boolean?, Int> = emptyMap(),
    val anorectalVergeDistanceCategory: Map<AnorectalVergeDistanceCategory?, Int> = emptyMap(),

    val hasMsi: Map<Boolean?, Int> = emptyMap(),
    val hasBrafMutation: Map<Boolean?, Int> = emptyMap(),
    val hasBrafV600EMutation: Map<Boolean?, Int> = emptyMap(),
    val hasRasMutation: Map<Boolean?, Int> = emptyMap(),
    val hasKrasG12CMutation: Map<Boolean?, Int> = emptyMap(),
)

fun <T> updatedMap(oldMap: Map<T, Int>, instance: T) = 
    oldMap + mapOf(instance to oldMap.getOrDefault(instance, 0) + 1)
    
val summary = tumors.fold(TumorSummary()) { acc, tumor ->
    val x = tumor.key
    TumorSummary(
        updatedMap(acc.cci, x.cci),
        updatedMap(acc.cciNumberOfCategories, x.cciNumberOfCategories),
        updatedMap(acc.cciHasAids, x.cciHasAids),
        updatedMap(acc.cciHasCongestiveHeartFailure, x.cciHasCongestiveHeartFailure),
        updatedMap(acc.cciHasCollagenosis, x.cciHasCollagenosis),
        updatedMap(acc.cciHasCopd, x.cciHasCopd),
        updatedMap(acc.cciHasCerebrovascularDisease, x.cciHasCerebrovascularDisease),
        updatedMap(acc.cciHasDementia, x.cciHasDementia),
        updatedMap(acc.cciHasDiabetesMellitus, x.cciHasDiabetesMellitus),
        updatedMap(acc.cciHasDiabetesMellitusWithEndOrganDamage, x.cciHasDiabetesMellitusWithEndOrganDamage),
        updatedMap(acc.cciHasOtherMalignancy, x.cciHasOtherMalignancy),
        updatedMap(acc.cciHasOtherMetastaticSolidTumor, x.cciHasOtherMetastaticSolidTumor),
        updatedMap(acc.cciHasMyocardialInfarct, x.cciHasMyocardialInfarct),
        updatedMap(acc.cciHasMildLiverDisease, x.cciHasMildLiverDisease),
        updatedMap(acc.cciHasHemiplegiaOrParaplegia, x.cciHasHemiplegiaOrParaplegia),
        updatedMap(acc.cciHasPeripheralVascularDisease, x.cciHasPeripheralVascularDisease),
        updatedMap(acc.cciHasRenalDisease, x.cciHasRenalDisease),
        updatedMap(acc.cciHasLiverDisease, x.cciHasLiverDisease),
        updatedMap(acc.cciHasUlcerDisease, x.cciHasUlcerDisease),
        updatedMap(acc.presentedWithIleus, x.presentedWithIleus),
        updatedMap(acc.presentedWithPerforation, x.presentedWithPerforation),
        updatedMap(acc.anorectalVergeDistanceCategory, x.anorectalVergeDistanceCategory),
        updatedMap(acc.hasMsi, x.hasMsi),
        updatedMap(acc.hasBrafMutation, x.hasBrafMutation),
        updatedMap(acc.hasBrafV600EMutation, x.hasBrafV600EMutation),
        updatedMap(acc.hasRasMutation, x.hasRasMutation),
        updatedMap(acc.hasKrasG12CMutation, x.hasKrasG12CMutation),
    )
}
summary.toString().replace("}, ", "},\n")

In [None]:
@file:DependsOn("nz.ac.waikato.cms.weka:weka-stable:3.8.6")
import weka.core.Attribute
import weka.core.DenseInstance
import weka.core.Instance
import weka.core.Instances
import com.hartwig.actin.personalization.datamodel.AnorectalVergeDistanceCategory
import com.hartwig.actin.personalization.datamodel.CciNumberOfCategories
import com.hartwig.actin.personalization.datamodel.Episode
import com.hartwig.actin.personalization.datamodel.Diagnosis

typealias TumorEntry = Pair<Diagnosis, List<Episode>>

fun setValue(instance: DenseInstance, attribute: Attribute, value: Any) {
    when (value) {
        is String -> { instance.setValue(attribute, value) }
        is Double -> { instance.setValue(attribute, value) }
        is Int -> instance.setValue(attribute, value.toDouble())
        is Boolean -> instance.setValue(attribute, if (value) "true" else "false")
        is Enum<*> -> { instance.setValue(attribute, value.toString()) }
        else -> throw IllegalArgumentException("Unsupported value type: ${value::class.simpleName}")
    }
}
    
fun createPatientDb(patients: List<TumorEntry>, classAttributeName: String, classAttributeFunction: (TumorEntry) -> Int?): Instances {
    val booleanValues = listOf("true", "false")
    val attributeData: List<Triple<String, List<String>, (TumorEntry) -> Any?>> = listOf(
        Triple("cciNumberOfCategories", CciNumberOfCategories.values().map { it.toString() }.toList(), { it.first.cciNumberOfCategories }),
        Triple("cciHasAids", booleanValues, { it.first.cciHasAids }),
        Triple("cciHasCongestiveHeartFailure", booleanValues, { it.first.cciHasCongestiveHeartFailure }),
        Triple("cciHasCollagenosis", booleanValues, { it.first.cciHasCollagenosis }),
        Triple("cciHasCopd", booleanValues, { it.first.cciHasCopd }),
        Triple("cciHasCerebrovascularDisease", booleanValues, { it.first.cciHasCerebrovascularDisease }),
        Triple("cciHasDementia", booleanValues, { it.first.cciHasDementia }),
        Triple("cciHasDiabetesMellitus", booleanValues, { it.first.cciHasDiabetesMellitus }),
        Triple("cciHasDiabetesMellitusWithEndOrganDamage", booleanValues, { it.first.cciHasDiabetesMellitusWithEndOrganDamage }),
        Triple("cciHasOtherMalignancy", booleanValues, { it.first.cciHasOtherMalignancy }),
        Triple("cciHasOtherMetastaticSolidTumor", booleanValues, { it.first.cciHasOtherMetastaticSolidTumor }),
        Triple("cciHasMyocardialInfarct", booleanValues, { it.first.cciHasMyocardialInfarct }),
        Triple("cciHasMildLiverDisease", booleanValues, { it.first.cciHasMildLiverDisease }),
        Triple("cciHasHemiplegiaOrParaplegia", booleanValues, { it.first.cciHasHemiplegiaOrParaplegia }),
        Triple("cciHasPeripheralVascularDisease", booleanValues, { it.first.cciHasPeripheralVascularDisease }),
        Triple("cciHasRenalDisease", booleanValues, { it.first.cciHasRenalDisease }),
        Triple("cciHasLiverDisease", booleanValues, { it.first.cciHasLiverDisease }),
        Triple("cciHasUlcerDisease", booleanValues, { it.first.cciHasUlcerDisease }),

        Triple("presentedWithIleus", booleanValues, { it.first.presentedWithIleus }),
        Triple("presentedWithPerforation", booleanValues, { it.first.presentedWithPerforation }),
        Triple("anorectalVergeDistanceCategory", AnorectalVergeDistanceCategory.values().map { it.toString() }.toList(), { it.first.anorectalVergeDistanceCategory }),

        Triple("hasMsi", booleanValues, { it.first.hasMsi }),
        Triple("hasBrafMutation", booleanValues, { it.first.hasBrafMutation }),
        Triple("hasBrafV600EMutation", booleanValues, { it.first.hasBrafV600EMutation }),
        Triple("hasRasMutation", booleanValues, { it.first.hasRasMutation }),
        Triple("hasKrasG12CMutation", booleanValues, { it.first.hasKrasG12CMutation })
    )
    val numericAttributes: List<Pair<Attribute, (TumorEntry) -> Any?>> = listOf(Attribute("cci") to { it.first.cci })
    val nominalAttributes = attributeData.map { (name, values, function) ->
        Attribute(name, values) to function
    }
    val attributes: List<Pair<Attribute, (TumorEntry) -> Any?>> = numericAttributes + nominalAttributes + Pair(Attribute(classAttributeName), classAttributeFunction)
    
    val patientDb = Instances("patients", ArrayList(attributes.map { it.first }), patients.count())
    patientDb.setClassIndex(attributes.last().first.index())
    
    // val attributeValueMap = attributes.map { (attribute, _) ->
    //     attribute.name() to attribute.enumerateValues().mapIndexed { (i, v) -> v to i }.toMap()
    // }
    
    patients.forEach { patient ->
        val patientInstance = DenseInstance(attributes.size)
        patientInstance.setDataset(patientDb)
        attributes.forEach { (attribute, function) ->
            function.invoke(patient)?.let { setValue(patientInstance, attribute, it) }
        }
        patientDb.add(patientInstance)
    }
    
    return patientDb
}

In [None]:
import com.hartwig.actin.personalization.datamodel.PfsMeasure
import com.hartwig.actin.personalization.datamodel.PfsMeasureType
import weka.classifiers.functions.MultilayerPerceptron

fun pfs(entry: TumorEntry): Int? {
    return entry.second.flatMap(Episode::pfsMeasures)
        .filter { it.pfsMeasureType != PfsMeasureType.CENSOR }
        .mapNotNull(PfsMeasure::intervalTumorIncidencePfsMeasureDate)
        .minOrNull()
}

val patientDb = createPatientDb(tumors.map { it.toPair() }.filter { pfs(it) != null}, "pfs", ::pfs)

val predictor = MultilayerPerceptron()
predictor.buildClassifier(patientDb)