In [2]:
import misc

object ActiveLearning {

    val f1 = Feature("f1")
    val f2 = Feature("f2")
    val f3 = Feature("f3")
    val features = Set(f1,f2)
    val fVals = Set(1,2,3)
    
    // helper
    def expand[T](heads: IndexedSeq[IndexedSeq[T]],extensions: IndexedSeq[T]):IndexedSeq[IndexedSeq[T]] = {
        if(extensions.isEmpty) heads else
           for(h <- heads; e <- extensions) yield h :+ e
    }

    // for getting set products
    def product[T](v:IndexedSeq[IndexedSeq[T]]) = {
        var heads:IndexedSeq[IndexedSeq[T]] = v.head.map(IndexedSeq(_))
        v.tail.map(te => {
            heads = expand(heads,te)
        })
        heads
    }

    lazy val allHyps: Set[ZendoHyp] = {
        (for(f <- features; fv <- fVals; fn <- 1 to 3) yield ZendoHyp(f,fv,fn)).toSet
    }

    lazy val enumShapes = {
      val fwv: IndexedSeq[IndexedSeq[(Feature,Int)]] = for(f <- features.toIndexedSeq) yield {
        for(fv <- fVals.toIndexedSeq) yield (f,fv)
      }
      product(fwv).map(ens => Shape(ens.toMap))  
    }

    lazy val possShapes:IndexedSeq[Option[Shape]] = enumShapes.map(s => Some(s)) :+ None

    lazy val allTests: Set[IndexedSeq[Shape]] = {
        product(IndexedSeq.fill(3)(possShapes)).map(_.flatten).toSet
    }

    def filterHyps(hyps: Set[ZendoHyp],event:ZEvent): Set[ZendoHyp] = {
        hyps.filter(h => h.isCompatible(event))
    }

    def entropy(hyps: Set[ZendoHyp]): Double = {
        math.log(hyps.size)/math.log(2)
    }

    def expectedConditionalEntropy(hyps: Set[ZendoHyp], test: IndexedSeq[Shape],verbose: Boolean=false) = {
        val his = hyps.toIndexedSeq
        val condEntropy = his.map(h => {
            val outcome = h.outcome(test)
            val event = ZEvent(test,outcome)
            val compatibleHyps = hyps.filter(h => h.isCompatible(event))
            (entropy(compatibleHyps),h,outcome)
        })
        if(verbose) {
            println(s"count(outcome=1)=${condEntropy.count(_._3==true)} (of ${condEntropy.size})")
        }
        condEntropy.map(_._1).sum/condEntropy.size
    }


    def run() = {
        println("All hypotheses:")
        println(allHyps.toIndexedSeq.sortBy(_.toString).mkString("\n"))

        val s11 = Shape(features.map(f => (f -> 1)).toMap)
        val s22 = Shape(features.map(f => (f -> 2)).toMap)

    
        val fe = ZEvent(IndexedSeq(s11),false)

        val post11f = allHyps.filter(_.isCompatible(fe))

        println(s"Initial hyps count: ${allHyps.size}, count after conditioning: ${post11f.size}")

        val ent11f = allTests.map(t => (t -> expectedConditionalEntropy(post11f,t))).toMap

        println(s"------ Expected entropy after different tests, having seen $fe ---------------------")
        val k = ent11f.size
        val topK = ent11f.toIndexedSeq.sortBy(_._1.toString).sortBy(_._2).splitAt(k)._1
        println(topK.map(p => s"${p._1} -> H=${f"${p._2}%2.2f"}").mkString("\n"))

        println("---- Possible outcomes for top-1:")
        val top1 = topK(0)._1

        def reportEntropies(hyps: Set[ZendoHyp], test: IndexedSeq[Shape]) = {
            for(outcome <- IndexedSeq(false,true)) {
                val ze = ZEvent(test,outcome)
                val condHyps = hyps.filter(_.isCompatible(ze))
                println(s"If event is $ze, hyps are \n\t${condHyps.mkString("\n\t")}")
                println(s"Entropy would be ${entropy(condHyps)}")
            }
        }

        reportEntropies(post11f,top1)
        expectedConditionalEntropy(post11f,top1,true)

    }
}

case class ZEvent(shapes: IndexedSeq[Shape],outcome: Boolean) {
    override lazy val toString = s"[${shapes.mkString(",")}]:$outcome"
}

case class Feature(name: String) {
    override val toString: String = name
}

case class Shape(fs: Map[Feature,Int]) {

    override val toString: String = s"shape(${fs.map(p => s"${p._1}=${p._2}").mkString(",")})"
}

// Simple equality hyps
case class ZendoHyp(feature: Feature, fVal: Int, fNum: Int) {
    def isCompatible(e: ZEvent) = {
      (e.shapes.count(s => s.fs(feature) == fVal) == fNum)==e.outcome
    }
    def outcome(shapes:IndexedSeq[Shape]) = {
      (shapes.count(s => s.fs(feature) == fVal) == fNum)
    }

    override val toString: String = s"hyp($feature=$fVal,count=$fNum)"

}

SyntaxError: invalid syntax (Temp/ipykernel_21276/3425502612.py, line 3)