# A Passionate Child's Introduction to Gen

## Load data

In [1]:
using DataFrames, CSV

input_path = "../Data/Intermediate_Files/"
output_path = "../Data/Processed_Data/"

# Load pacmap output data
df = CSV.read(output_path*"pacmap_output/pacmap_5d_output_acute_leukemia_cleaned.csv", DataFrame)

# Define X and y
X = Matrix(df[:, ["PaCMAP 1", "PaCMAP 2", "PaCMAP 3", "PaCMAP 4", "PaCMAP 5"]])  # shape (n_samples=1399, n_features=5)
y = df[:, "ELN AML 2022 Diagnosis"]  # shape (n_samples=1399,) with 11 string classes

X_train = X[df[!, "Train Test"] .== "Discovery (train) Samples", :]
y_train = y[df[!, "Train Test"] .== "Discovery (train) Samples"]
X_test = X[df[!, "Train Test"] .== "Validation (test) Samples", :]
y_test = y[df[!, "Train Test"] .== "Validation (test) Samples"]


110-element PooledArrays.PooledVector{String, UInt32, Vector{UInt32}}:
 "AML with t(9;11)(p22;q23.3)/KMT2A-rearrangement"
 "AML with t(9;11)(p22;q23.3)/KMT2A-rearrangement"
 "AML with inv(16)(p13.1q22) or t(16;16)(p13.1;q22)/CBFB::MYH11"
 "AML with t(9;11)(p22;q23.3)/KMT2A-rearrangement"
 "AML with inv(16)(p13.1q22) or t(16;16)(p13.1;q22)/CBFB::MYH11"
 "AML with t(9;11)(p22;q23.3)/KMT2A-rearrangement"
 "AML with t(8;21)(q22;q22.1)/RUNX1::RUNX1T1"
 "AML with t(8;21)(q22;q22.1)/RUNX1::RUNX1T1"
 "AML with t(8;21)(q22;q22.1)/RUNX1::RUNX1T1"
 "AML with t(9;11)(p22;q23.3)/KMT2A-rearrangement"
 "AML with inv(16)(p13.1q22) or t(16;16)(p13.1;q22)/CBFB::MYH11"
 "AML with other rare recurring translocations"
 "AML with t(8;21)(q22;q22.1)/RUNX1::RUNX1T1"
 ⋮
 "AML with inv(16)(p13.1q22) or t(16;16)(p13.1;q22)/CBFB::MYH11"
 "AML with t(9;11)(p22;q23.3)/KMT2A-rearrangement"
 "AML with t(8;21)(q22;q22.1)/RUNX1::RUNX1T1"
 "AML with t(9;11)(p22;q23.3)/KMT2A-rearrangement"
 "AML with t(8;21)(q22;q22.1)/R

In [9]:
using Gen
using LinearAlgebra

@gen function classifier_model(features::Array{Float64}, num_classes::Int)
    # Define prior over weights and bias
    weights = @trace(mvnormal(zeros(num_classes, size(features, 2)), I(num_classes)), :weights)
    bias = @trace(normal(0, 1), :bias)
    
    # Compute the logits
    logits = weights * features' .+ bias
    
    # Define likelihood
    @trace(categorical_softmax(logits), :class)
end


DynamicDSLFunction{Any}(Dict{Symbol, Any}(), Dict{Symbol, Any}(), Type[Array{Float64}, Int64], false, Union{Nothing, Some{Any}}[nothing, nothing], var"##classifier_model#297", Bool[0, 0], false)

In [10]:
num_classes = length(unique(y_train))  # Assuming y_train is your vector of class labels


11

In [11]:
# Define a proposal distribution
@gen function proposal_distribution(prev_trace)
    # Sample new parameters around the previous parameters
    weights = @trace(mvnormal(get_retval(prev_trace[:weights]), 0.1), :weights)
    bias = @trace(normal(get_retval(prev_trace[:bias]), 0.1), :bias)
end

# Initialize the trace
initial_trace = Gen.simulate(classifier_model, (X_train, num_classes))
traces = [initial_trace]

# Perform inference
for i in 1:1000
    # Propose a new trace
    proposal_trace = Gen.simulate(proposal_distribution, (traces[end],))
    
    # Compute acceptance probability
    acc_prob = Gen.get_score(proposal_trace) - Gen.get_score(traces[end])
    
    # Accept or reject
    if rand() < exp(acc_prob)
        push!(traces, proposal_trace)
    end
end

# Make predictions for test data
predictions = []
for trace in traces
    weights = get_retval(trace[:weights])
    bias = get_retval(trace[:bias])
    logits = weights * X_test' .+ bias
    push!(predictions, argmax(logits, dims=1))
end

# Compute accuracy
accuracy = mean([predictions[i] == y_test[i] for i in 1:length(y_test)])

println("Accuracy: ", accuracy)


LoadError: MethodError: no method matching random(::Gen.MultivariateNormal, ::Matrix{Float64}, ::Diagonal{Bool, Vector{Bool}})

[0mClosest candidates are:
[0m  random([91m::Gen.TransformedDistribution{T, U}[39m, ::Any...) where {T, U}
[0m[90m   @[39m [35mGen[39m [90m~/.julia/packages/Gen/Dne3u/src/modeling_library/dist_dsl/[39m[90m[4mtransformed_distribution.jl:19[24m[39m
[0m  random([91m::HeterogeneousMixture{T}[39m, ::Any, ::Any...) where T
[0m[90m   @[39m [35mGen[39m [90m~/.julia/packages/Gen/Dne3u/src/modeling_library/[39m[90m[4mmixture.jl:214[24m[39m
[0m  random([91m::HomogeneousMixture[39m, ::Any, ::Any...)
[0m[90m   @[39m [35mGen[39m [90m~/.julia/packages/Gen/Dne3u/src/modeling_library/[39m[90m[4mmixture.jl:72[24m[39m
[0m  ...


## SPPL alternative

- __SPPL__: Sum-Product Probabilistic Language

- __Github__: [https://github.com/probsys/sppl](https://github.com/probsys/sppl)

- __Paper__: [SPPL: Probabilistic Programming with Fast Exact Symbolic Inference](https://arxiv.org/abs/2010.03485)

- __Intro on SPNs__: [Visualizing and understanding Sum-Product Networks](https://link.springer.com/article/10.1007/s10994-018-5760-y)