In [1]:
english::Vector{String} = ["A", "B", "C", "D", "E", "I"];
greek::Vector{String} = ["α", "β", "γ", "δ", "ϵ", "ϕ"];
alphabets::Vector{String} = [];

append!(alphabets, english)
append!(alphabets, greek)

12-element Vector{String}:
 "A"
 "B"
 "C"
 "D"
 "E"
 "I"
 "α"
 "β"
 "γ"
 "δ"
 "ϵ"
 "ϕ"

In [2]:
P = Dict();
for alphabet in alphabets
    if string(alphabet) ∈ greek
        P[alphabet] = 1 // 12;
    elseif string(alphabet) ∈ ["A", "E", "I"]
        P[alphabet] = 1 // 9;
    else
        P[alphabet] = 1 // 18;
    end
end

P

Dict{Any, Any} with 12 entries:
  "C" => 1//18
  "ϵ" => 1//12
  "δ" => 1//12
  "B" => 1//18
  "A" => 1//9
  "ϕ" => 1//12
  "D" => 1//18
  "α" => 1//12
  "E" => 1//9
  "γ" => 1//12
  "I" => 1//9
  "β" => 1//12

In [3]:
P_2 = Dict();
for (alphabet, probability) in P
    for (alphabet_2, probability_2) in P
        P_2[alphabet * alphabet_2] = probability * probability_2;
    end
end

P_3 = Dict();
for (alphabet, probability) in P
    for (alphabet_2, probability_2) in P_2
        P_3[alphabet * alphabet_2] = probability * probability_2;
    end
end

In [164]:
function sort_prob(P_Dict)
    probabilities = [];
    alphabets = [];
    for (alphabet, probability) in P_Dict
        # println("$(typeof(alphabet))")
        push!(probabilities, probability)
        push!(alphabets, alphabet)
    end

    sorted_idx = sortperm(probabilities);
    probabilities = probabilities[sorted_idx];
    alphabets = alphabets[sorted_idx];

    println("$alphabets")
    println("$probabilities")

    P_sorted_dict = Dict();
    for (idx, probability) in enumerate(probabilities)
        P_sorted_dict[ string(alphabets[idx]) ] = probability
    end
    return probabilities, alphabets, P_sorted_dict
end

sorted_probabilities, sorted_alphabets, P_sorted_dict = sort_prob(P)

Any["C", "B", "D", "ϵ", "δ", "ϕ", "α", "γ", "β", "A", "E", "I"]
Any[1//18, 1//18, 1//18, 1//12, 1//12, 1//12, 1//12, 1//12, 1//12, 1//9, 1//9, 1//9]


(Any[1//18, 1//18, 1//18, 1//12, 1//12, 1//12, 1//12, 1//12, 1//12, 1//9, 1//9, 1//9], Any["C", "B", "D", "ϵ", "δ", "ϕ", "α", "γ", "β", "A", "E", "I"], Dict{Any, Any}("C" => 1//18, "ϵ" => 1//12, "δ" => 1//12, "B" => 1//18, "A" => 1//9, "ϕ" => 1//12, "D" => 1//18, "α" => 1//12, "E" => 1//9, "γ" => 1//12…))

In [184]:
mutable struct huffman_node{A, B}
    code::A
    probability::B
    left::Union{Nothing, huffman_node}
    right::Union{Nothing, huffman_node}
end
# For Huffman Leaf Construction:
huffman_node(x::A, y::B) where A where B = huffman_node{A, B}(x, y, nothing, nothing)

function construct_huffman_tree(P_sorted_dict)
    trees = [huffman_node( alphabet, probability) for (alphabet, probability) in P_sorted_dict]
    while length(trees) > 1
        sort!(trees, lt = (x, y) -> x.probability < y.probability)
        smallest = popfirst!(trees)
        next_smallest = popfirst!(trees)
        tree = huffman_node("", smallest.probability + next_smallest.probability)
        tree.left = smallest
        tree.right = next_smallest
        push!(trees, tree )
    end
    return trees[1]
end

huffman_tree = construct_huffman_tree(P_sorted_dict)

huffman_node{String, Rational{Int64}}("", 1//1, huffman_node{String, Rational{Int64}}("", 5//12, huffman_node{String, Rational{Int64}}("", 7//36, huffman_node{String, Rational{Int64}}("β", 1//12, nothing, nothing), huffman_node{String, Rational{Int64}}("A", 1//9, nothing, nothing)), huffman_node{String, Rational{Int64}}("", 2//9, huffman_node{String, Rational{Int64}}("E", 1//9, nothing, nothing), huffman_node{String, Rational{Int64}}("I", 1//9, nothing, nothing))), huffman_node{String, Rational{Int64}}("", 7//12, huffman_node{String, Rational{Int64}}("", 1//4, huffman_node{String, Rational{Int64}}("", 1//9, huffman_node{String, Rational{Int64}}("C", 1//18, nothing, nothing), huffman_node{String, Rational{Int64}}("B", 1//18, nothing, nothing)), huffman_node{String, Rational{Int64}}("", 5//36, huffman_node{String, Rational{Int64}}("D", 1//18, nothing, nothing), huffman_node{String, Rational{Int64}}("ϵ", 1//12, nothing, nothing))), huffman_node{String, Rational{Int64}}("", 1//3, huffman_n

In [185]:
function print_encoding(node, code, encoder)
    code *= "0"
    if ~(isnothing(node.left))
        print_encoding(node.left, code, encoder)
    end
    code = code[1:end-1]

    code *= "1"
    if ~(isnothing(node.right))
        print_encoding(node.right, code, encoder)
    end
    code = code[1:end-1]
    encoder[node.code] = code

    delete!(encoder, "")
end

encoder = Dict()
@time print_encoding(huffman_tree, "", encoder)

  0.023589 seconds (52.52 k allocations: 2.514 MiB, 99.79% compilation time)


Dict{Any, Any} with 12 entries:
  "C" => "1000"
  "ϵ" => "1011"
  "δ" => "1100"
  "B" => "1001"
  "A" => "001"
  "ϕ" => "1101"
  "D" => "1010"
  "α" => "1110"
  "E" => "010"
  "γ" => "1111"
  "I" => "011"
  "β" => "000"

In [186]:
encoder

Dict{Any, Any} with 12 entries:
  "C" => "1000"
  "ϵ" => "1011"
  "δ" => "1100"
  "B" => "1001"
  "A" => "001"
  "ϕ" => "1101"
  "D" => "1010"
  "α" => "1110"
  "E" => "010"
  "γ" => "1111"
  "I" => "011"
  "β" => "000"

In [183]:
delete!(encoder, "")

Dict{Any, Any} with 13 entries:
  "C" => "1000"
  "ϵ" => "1011"
  "δ" => "1100"
  "B" => "1001"
  "s" => ""
  "A" => "001"
  "ϕ" => "1101"
  "D" => "1010"
  "α" => "1110"
  "E" => "010"
  "γ" => "1111"
  "I" => "011"
  "β" => "000"

In [9]:
length(encoder)

12

In [10]:
# encoder   : Dictionary encoding alphabet -> codeword
# P         : Dictionary describing probability mass function (alphabet -> probability)
expected_length(encoder, P) = sum([P[alphabet] * length(codeword) for (alphabet, codeword) in encoder])

expected_length(encoder, P)

43//12

In [11]:
function mcmillan_inequality(encoder)
    value = sum([ (2//1) ^ (-1 * length(codeword)) for (___, codeword) in encoder]);
    return value
end

mcmillan_inequality(encoder)

1//1

In [12]:
P_Y = Dict();
P_Y["english"] = 0;
P_Y["greek"] = 0;
for alphabet in alphabets
    if string(alphabet) ∈ english
        P_Y["english"] += P[alphabet];
    elseif string(alphabet) ∈ greek
        P_Y["greek"] += P[alphabet];
    end
end

In [13]:
P_Y

Dict{Any, Any} with 2 entries:
  "greek"   => 1//2
  "english" => 1//2

In [189]:
function decode_huffman(huffman_tree, codeword)
    ans = ""
    curr = huffman_tree

    is_leaf(node) = isnothing(node.left) && isnothing(node.right)
    for idx in eachindex(codeword) #idx = 1:length(codeword)
        if string(codeword[idx]) == "0" && ~( is_leaf(curr) )
            println("Going left")
            curr = curr.left
        elseif string(codeword[idx]) == "1" && ~( is_leaf(curr) )
            println("Going Right")
            curr = curr.right
        end

        if is_leaf(curr)
            println("Successfuly Decoded: $(curr.code)")
            ans *= curr.code
            curr = huffman_tree
        end
    end

    return ans * "\0"
end

decode_huffman(huffman_tree, "000010")

Going left
Going left
Going left
Successfuly Decoded: β
Going left
Going Right
Going left
Successfuly Decoded: E


"βE\0"

In [15]:
huffman_tree

huffman_node{String, Rational{Int64}}("s", 1//1, huffman_node{String, Rational{Int64}}("s", 5//12, huffman_node{String, Rational{Int64}}("s", 7//36, huffman_node{String, Rational{Int64}}("β", 1//12, nothing, nothing), huffman_node{String, Rational{Int64}}("A", 1//9, nothing, nothing)), huffman_node{String, Rational{Int64}}("s", 2//9, huffman_node{String, Rational{Int64}}("E", 1//9, nothing, nothing), huffman_node{String, Rational{Int64}}("I", 1//9, nothing, nothing))), huffman_node{String, Rational{Int64}}("s", 7//12, huffman_node{String, Rational{Int64}}("s", 1//4, huffman_node{String, Rational{Int64}}("s", 1//9, huffman_node{String, Rational{Int64}}("C", 1//18, nothing, nothing), huffman_node{String, Rational{Int64}}("B", 1//18, nothing, nothing)), huffman_node{String, Rational{Int64}}("s", 5//36, huffman_node{String, Rational{Int64}}("D", 1//18, nothing, nothing), huffman_node{String, Rational{Int64}}("ϵ", 1//12, nothing, nothing))), huffman_node{String, Rational{Int64}}("s", 1//3, 

In [19]:
P_english = Dict();
P_greek = Dict();
for alphabet in alphabets
    if string(alphabet) ∈ greek
        P_greek[alphabet] = 1 // 12 // (1 // 2);
    elseif string(alphabet) ∈ ["A", "E", "I"]
        P_english[alphabet] = 1 // 9 // (1 // 2);
    else
        P_english[alphabet] = 1 // 18 // (1 // 2);
    end
end


In [26]:
___, ___, P_sorted_english = sort_prob(P_english)
___, ___, P_sorted_greek = sort_prob(P_greek)

encoder_english = Dict();
encoder_greek = Dict();
huffman_english = construct_huffman_tree(P_sorted_english);
huffman_greek = construct_huffman_tree(P_sorted_greek);

print_encoding(huffman_english, "", encoder_english);
print_encoding(huffman_greek, "", encoder_greek);

delete!(encoder_english, "s");
delete!(encoder_greek, "s");

Any['B', 'C', 'D', 'A', 'I', 'E']
Any[1//9, 1//9, 1//9, 2//9, 2//9, 2//9]
Any['γ', 'ϕ', 'ϵ', 'α', 'δ', 'β']
Any[1//6, 1//6, 1//6, 1//6, 1//6, 1//6]


In [33]:
encoder_side_info = Dict();
merge!(encoder_side_info, encoder_english, encoder_greek);

In [36]:
expected_length(encoder_side_info, P)

47//18

In [55]:
language_dict = Dict();
for alphabet in alphabets
    if alphabet ∈ english
        language_dict[alphabet] = "e";
    elseif alphabet ∈ greek
        language_dict[alphabet] = "g";
    end
end


In [128]:
#=
values  : Vector containing all values to permute thru
k       : Number of Permutations to go
=#
function permutation_with_repetition(values, k)
    x = vec(collect(Base.Iterators.product(Base.Iterators.repeated(values, k)...)))
    y = [];
    for tup in x
        dummy = [];
        for (idx, value) in enumerate(tup)
            append!(dummy, value);
        end
        push!(y, dummy)
    end

    return y
end

permutation_with_repetition (generic function with 1 method)

In [129]:
y = permutation_with_repetition(["0", "1"], 5)

32-element Vector{Any}:
 Any['0', '0', '0', '0', '0']
 Any['1', '0', '0', '0', '0']
 Any['0', '1', '0', '0', '0']
 Any['1', '1', '0', '0', '0']
 Any['0', '0', '1', '0', '0']
 Any['1', '0', '1', '0', '0']
 Any['0', '1', '1', '0', '0']
 Any['1', '1', '1', '0', '0']
 Any['0', '0', '0', '1', '0']
 Any['1', '0', '0', '1', '0']
 ⋮
 Any['1', '1', '1', '0', '1']
 Any['0', '0', '0', '1', '1']
 Any['1', '0', '0', '1', '1']
 Any['0', '1', '0', '1', '1']
 Any['1', '1', '0', '1', '1']
 Any['0', '0', '1', '1', '1']
 Any['1', '0', '1', '1', '1']
 Any['0', '1', '1', '1', '1']
 Any['1', '1', '1', '1', '1']

In [208]:
# import Pkg; Pkg.add("Combinatorics")
using Combinatorics
block_length = 2;
    
#=
sorted_alphabets    : Sorted Source Symbols according to its probabilities
sorted_probabilities: Sorted Probabilities corresponding to source symbols
block_length        : Length of Block on blocking symbols (Ex. aa -> 2)

Output:
blocks              : Vector containing all possible block codes using the defined source symbols
prob_blocks         : Corresponding Probabilities of the block codes
=#
function create_block_coding(sorted_alphabets, sorted_probabilities, block_length=2)
    possibilites = collect(permutation_with_repetition(sorted_alphabets, block_length))
    blocks = [prod(block_alphabets) for block_alphabets in possibilites]
    prob = collect(permutation_with_repetition(sorted_probabilities, block_length))
    
    prob_blocks = [prod(block_prob) for block_prob in prob]
    # println("possibilites: $possibilites")
    # println("prob: $prob")
    

    return blocks, prob_blocks
end
alphabet_2_list, prob_2_list = create_block_coding(sorted_alphabets, sorted_probabilities, 2);

side_info_2 = create_side_information_codebook(alphabet_2_list, language_dict);

In [209]:
side_info_2

Dict{Any, Any} with 144 entries:
  "DC" => "ee"
  "αD" => "ge"
  "δα" => "gg"
  "EC" => "ee"
  "DB" => "ee"
  "γA" => "ge"
  "CC" => "ee"
  "BC" => "ee"
  "BB" => "ee"
  "Eϕ" => "eg"
  "Dγ" => "eg"
  "Bβ" => "eg"
  "Dα" => "eg"
  "ϵA" => "ge"
  "αα" => "gg"
  "αϕ" => "gg"
  "ϕA" => "ge"
  "ϕI" => "ge"
  "γγ" => "gg"
  ⋮    => ⋮

In [204]:
#=

Input:
block_symbols   : Vector containing all possible (partitioned) block symbols
side_info_dict  : Dictionary mapping symbol => side information symbol
                    Ex. [a, b, c, γ, ϕ] => [e, e, e, g, g]
                        where e: English letters, g: Greek letters
Output:
side_informations   : Dictionary mapping block symbols => side information block symbol
                    Ex. "abγ" => "eeg"
=#
function create_side_information_codebook(block_symbols, side_info_dict)
    side_informations = Dict();

    # Concatenating symbols in each block
    # blocks = [prod(block_symbol) for block_symbol in block_symbols];
    for (idx, block_symbol) in enumerate(block_symbols)
        side_info = "";
        for symbol in block_symbol
            side_info *= side_info_dict[string(symbol)]
        end
        side_informations[ prod(block_symbol) ] = side_info;
    end

    return side_informations
end

create_side_information_codebook (generic function with 1 method)

In [191]:
side_info_2

Dict{Any, Any} with 144 entries:
  "DC" => "ee"
  "αD" => "ge"
  "δα" => "gg"
  "EC" => "ee"
  "DB" => "ee"
  "γA" => "ge"
  "CC" => "ee"
  "BC" => "ee"
  "BB" => "ee"
  "Eϕ" => "eg"
  "Dγ" => "eg"
  "Bβ" => "eg"
  "Dα" => "eg"
  "ϵA" => "ge"
  "αα" => "gg"
  "αϕ" => "gg"
  "ϕA" => "ge"
  "ϕI" => "ge"
  "γγ" => "gg"
  ⋮    => ⋮

In [210]:
dict_list(key_list::Vector) = Dict([(key, []) for key in key_list]);

#=
Input:
alphabet_2_list : Vector containing block symbols
prob_2_list     : Vector containing corresponding probabilities of block symbols
side_info_2     : Dictionary mapping block symbols => side information block symbols
=#
function side_information_table(alphabet_2_list, prob_2_list, side_info_2)
    unique_side_info2 = unique!([value for (key, value) in side_info_2])
    inv_side_info2 = dict_list(unique_side_info2)
    inv_side_prob2 = dict_list(unique_side_info2)
    for (idx, source_block) in enumerate(alphabet_2_list)
        side_info = side_info_2[source_block]
        println("Now collecting: $source_block -> $(prob_2_list[idx]) for $side_info")
        push!(inv_side_info2[side_info], string(source_block))
        push!(inv_side_prob2[side_info], prob_2_list[idx])
    end

    renormalization(probabilities::Vector) = probabilities ./ sum(probabilities); 

    # Renormalization based on given side information
    for (side_info, probabilities) in inv_side_prob2
        inv_side_prob2[side_info] = renormalization(probabilities)
    end

    return inv_side_info2, inv_side_prob2
end

side_information_table (generic function with 1 method)

In [211]:
inv_side_info2, inv_side_prob2 = side_information_table(alphabet_2_list, prob_2_list, side_info_2)

Now collecting: CC -> 1//324 for ee
Now collecting: BC -> 1//324 for ee
Now collecting: DC -> 1//324 for ee
Now collecting: ϵC -> 1//216 for ge
Now collecting: δC -> 1//216 for ge
Now collecting: ϕC -> 1//216 for ge
Now collecting: αC -> 1//216 for ge
Now collecting: γC -> 1//216 for ge
Now collecting: βC -> 1//216 for ge
Now collecting: AC -> 1//162 for ee
Now collecting: EC -> 1//162 for ee
Now collecting: IC -> 1//162 for ee
Now collecting: CB -> 1//324 for ee
Now collecting: BB -> 1//324 for ee
Now collecting: DB -> 1//324 for ee
Now collecting: ϵB -> 1//216 for ge
Now collecting: δB -> 1//216 for ge
Now collecting: ϕB -> 1//216 for ge
Now collecting: αB -> 1//216 for ge
Now collecting: γB -> 1//216 for ge
Now collecting: βB -> 1//216 for ge
Now collecting: AB -> 1//162 for ee
Now collecting: EB -> 1//162 for ee
Now collecting: IB -> 1//162 for ee
Now collecting: CD -> 1//324 for ee
Now collecting: BD -> 1//324 for ee
Now collecting: DD -> 1//324 for ee
Now collecting: ϵD -> 1//216

(Dict{String, Vector{Any}}("ee" => ["CC", "BC", "DC", "AC", "EC", "IC", "CB", "BB", "DB", "AB"  …  "DE", "AE", "EE", "IE", "CI", "BI", "DI", "AI", "EI", "II"], "eg" => ["Cϵ", "Bϵ", "Dϵ", "Aϵ", "Eϵ", "Iϵ", "Cδ", "Bδ", "Dδ", "Aδ"  …  "Dγ", "Aγ", "Eγ", "Iγ", "Cβ", "Bβ", "Dβ", "Aβ", "Eβ", "Iβ"], "gg" => ["ϵϵ", "δϵ", "ϕϵ", "αϵ", "γϵ", "βϵ", "ϵδ", "δδ", "ϕδ", "αδ"  …  "ϕγ", "αγ", "γγ", "βγ", "ϵβ", "δβ", "ϕβ", "αβ", "γβ", "ββ"], "ge" => ["ϵC", "δC", "ϕC", "αC", "γC", "βC", "ϵB", "δB", "ϕB", "αB"  …  "ϕE", "αE", "γE", "βE", "ϵI", "δI", "ϕI", "αI", "γI", "βI"]), Dict{String, Vector{Any}}("ee" => [1//81, 1//81, 1//81, 2//81, 2//81, 2//81, 1//81, 1//81, 1//81, 2//81  …  2//81, 4//81, 4//81, 4//81, 2//81, 2//81, 2//81, 4//81, 4//81, 4//81], "eg" => [1//54, 1//54, 1//54, 1//27, 1//27, 1//27, 1//54, 1//54, 1//54, 1//27  …  1//54, 1//27, 1//27, 1//27, 1//54, 1//54, 1//54, 1//27, 1//27, 1//27], "gg" => [1//36, 1//36, 1//36, 1//36, 1//36, 1//36, 1//36, 1//36, 1//36, 1//36  …  1//36, 1//36, 1//36, 1//36

In [216]:
inv_side_prob2["gg"]

36-element Vector{Any}:
 1//36
 1//36
 1//36
 1//36
 1//36
 1//36
 1//36
 1//36
 1//36
 1//36
  ⋮
 1//36
 1//36
 1//36
 1//36
 1//36
 1//36
 1//36
 1//36
 1//36

In [135]:
unique_side_info2 = unique!([value for (key, value) in side_info_2])

4-element Vector{String}:
 "ee"
 "ge"
 "gg"
 "eg"

In [155]:
dict_list(key_list::Vector) = Dict([(key, []) for key in key_list]); # Creates Dictionary of Lists with defined keys

In [156]:
inv_side_info2 = dict_list(unique_side_info2) # Dict([(unique_side_info, []) for unique_side_info in unique_side_info2]);
inv_side_prob2 = dict_list(unique_side_info2) # Dict([(unique_side_info, []) for unique_side_info in unique_side_info2]);
for (idx, source_block) in enumerate(alphabet_2_list)
    side_info = side_info_2[source_block]
    println("Now collecting: $source_block -> $(prob_2_list[idx]) for $side_info")
    push!(inv_side_info2[side_info], string(source_block))
    push!(inv_side_prob2[side_info], prob_2_list[idx])
end

renormalization(probabilities::Vector) = probabilities ./ sum(probabilities); 

# Renormalization based on given side information
for (side_info, probabilities) in inv_side_prob2
    inv_side_prob2[side_info] = renormalization(probabilities)
end

Now collecting: CC -> 1//324 for ee
Now collecting: BC -> 1//324 for ee
Now collecting: DC -> 1//324 for ee
Now collecting: ϵC -> 1//216 for ge
Now collecting: δC -> 1//216 for ge
Now collecting: ϕC -> 1//216 for ge
Now collecting: αC -> 1//216 for ge
Now collecting: γC -> 1//216 for ge
Now collecting: βC -> 1//216 for ge
Now collecting: AC -> 1//162 for ee
Now collecting: EC -> 1//162 for ee
Now collecting: IC -> 1//162 for ee
Now collecting: CB -> 1//324 for ee
Now collecting: BB -> 1//324 for ee
Now collecting: DB -> 1//324 for ee
Now collecting: ϵB -> 1//216 for ge
Now collecting: δB -> 1//216 for ge
Now collecting: ϕB -> 1//216 for ge
Now collecting: αB -> 1//216 for ge
Now collecting: γB -> 1//216 for ge
Now collecting: βB -> 1//216 for ge
Now collecting: AB -> 1//162 for ee
Now collecting: EB -> 1//162 for ee
Now collecting: IB -> 1//162 for ee
Now collecting: CD -> 1//324 for ee
Now collecting: BD -> 1//324 for ee
Now collecting: DD -> 1//324 for ee
Now collecting: ϵD -> 1//216

In [171]:
unique_trees = Dict();  #dict_list(unique_side_info2);
unique_encoders = Dict(); #dict_list(unique_side_info2);
# unique_decoders = Dict(); #dict_list(unique_side_info2);
for (side_info, conditional_probabilities) in inv_side_prob2
    conditioned_alphabets = inv_side_info2[side_info];
    P_dict = Dict([(conditioned_alphabets[idx], cond_prob) for (idx, cond_prob) in enumerate(conditional_probabilities)])
    ___, ____, P_dict = sort_prob(P_dict);
    tree = construct_huffman_tree(P_dict);
    encoder = Dict();
    print_encoding(tree, "", encoder);
    delete!(encoder, "s");

    unique_trees[side_info] = tree;
    unique_encoders[side_info] = encoder;
end
    

Any["CD", "DC", "DB", "CC", "BC", "BB", "CB", "BD", "DD", "ED", "BI", "DI", "EC", "DE", "IC", "IB", "CI", "DA", "EB", "ID", "CA", "AD", "AC", "CE", "BE", "BA", "AB", "EE", "II", "AE", "EA", "EI", "AI", "IE", "AA", "IA"]
Any[1//81, 1//81, 1//81, 1//81, 1//81, 1//81, 1//81, 1//81, 1//81, 2//81, 2//81, 2//81, 2//81, 2//81, 2//81, 2//81, 2//81, 2//81, 2//81, 2//81, 2//81, 2//81, 2//81, 2//81, 2//81, 2//81, 2//81, 4//81, 4//81, 4//81, 4//81, 4//81, 4//81, 4//81, 4//81, 4//81]
Any["Dϕ", "Dγ", "Cϵ", "Dα", "Bβ", "Cδ", "Cα", "Bγ", "Dβ", "Dδ", "Cϕ", "Bδ", "Cγ", "Cβ", "Bα", "Bϵ", "Bϕ", "Dϵ", "Eδ", "Aα", "Eγ", "Eϕ", "Eα", "Aϕ", "Iϕ", "Aϵ", "Aβ", "Iϵ", "Aγ", "Eϵ", "Iβ", "Eβ", "Iγ", "Aδ", "Iδ", "Iα"]
Any[1//54, 1//54, 1//54, 1//54, 1//54, 1//54, 1//54, 1//54, 1//54, 1//54, 1//54, 1//54, 1//54, 1//54, 1//54, 1//54, 1//54, 1//54, 1//27, 1//27, 1//27, 1//27, 1//27, 1//27, 1//27, 1//27, 1//27, 1//27, 1//27, 1//27, 1//27, 1//27, 1//27, 1//27, 1//27, 1//27]
Any["ϕα", "βα", "δα", "ϕβ", "αβ", "γϵ", "ϕδ", "α

In [176]:
unique_encoders["gg"]

Dict{Any, Any} with 36 entries:
  "ϕα" => "111000"
  "βα" => "111001"
  "δα" => "111010"
  "ϕβ" => "111011"
  "αβ" => "111100"
  "γϵ" => "111101"
  "ϕδ" => "111110"
  "αγ" => "111111"
  "ϵϕ" => "00000"
  "γδ" => "00001"
  "ϕϕ" => "00010"
  "βϵ" => "00011"
  "γϕ" => "00100"
  "αϕ" => "00101"
  "αα" => "00110"
  "δγ" => "00111"
  "ϕγ" => "01000"
  "γγ" => "01001"
  "ϵδ" => "01010"
  ⋮    => ⋮

In [173]:
decode_huffman(unique_trees["ee"], "00010")

Going left
Going left
Going left
Going Right
Going left
Successfuly Decoded: DI


"DI\0"

In [178]:
include("./test.jl")
tester("Hello World!")

Hello World!


In [217]:
greek

6-element Vector{String}:
 "α"
 "β"
 "γ"
 "δ"
 "ϵ"
 "ϕ"

In [221]:
alphabets
probabilities

12-element Vector{Any}:
 1//18
 1//18
 1//18
 1//12
 1//12
 1//12
 1//12
 1//12
 1//12
 1//9
 1//9
 1//9

In [378]:
choice = "A"
num_states = length(P);
transition_matrix = zeros(num_states, num_states);
ϵ = 0.02;
for (idx, alphabet) in enumerate(alphabets)
    transition_matrix[idx, idx] = (1 - ϵ); # * P(itself) // P(itself) -> 1
end
for (row_idx, row) in enumerate(eachrow(transition_matrix))
    normalization = (1//1) - probabilities[row_idx];
    for (entry_idx, prob) in enumerate(row)
        if entry_idx == row_idx
            row[entry_idx] = (1 - ϵ); # * P(itself) // P(itself) -> 1
        else
            row[entry_idx] = ϵ * (probabilities[entry_idx] // normalization)
        end
    end
    println("$row_idx : $row w/ norm = $normalization")
end

# transition_matrix = rationalize.(transition_matrix);

1 : [0.98, 0.0011764705882352942, 0.0011764705882352942, 0.0017647058823529412, 0.0017647058823529412, 0.0017647058823529412, 0.0017647058823529412, 0.0017647058823529412, 0.0017647058823529412, 0.0023529411764705885, 0.0023529411764705885, 0.0023529411764705885] w/ norm = 17//18
2 : [0.0011764705882352942, 0.98, 0.0011764705882352942, 0.0017647058823529412, 0.0017647058823529412, 0.0017647058823529412, 0.0017647058823529412, 0.0017647058823529412, 0.0017647058823529412, 0.0023529411764705885, 0.0023529411764705885, 0.0023529411764705885] w/ norm = 17//18
3 : [0.0011764705882352942, 0.0011764705882352942, 0.98, 0.0017647058823529412, 0.0017647058823529412, 0.0017647058823529412, 0.0017647058823529412, 0.0017647058823529412, 0.0017647058823529412, 0.0023529411764705885, 0.0023529411764705885, 0.0023529411764705885] w/ norm = 17//18
4 : [0.0012121212121212121, 0.0012121212121212121, 0.0012121212121212121, 0.98, 0.0018181818181818182, 0.0018181818181818182, 0.0018181818181818182, 0.001818

In [406]:
function create_emission_matrix(probabilities, ϵ = 0.02)
    num_states = length(probabilities);
    emission_matrix = zeros(num_states, num_states);

    for (row_idx, row) in enumerate(eachrow(emission_matrix))
        normalization = (1//1) - probabilities[row_idx];
        for (entry_idx, prob) in enumerate(row)
            if entry_idx == row_idx
                row[entry_idx] = (1 - ϵ); # * P(itself) // P(itself) -> 1
            else
                row[entry_idx] = ϵ * (probabilities[entry_idx] // normalization)
            end
        end
        # println("$row_idx : $row w/ norm = $normalization")
    end

    return emission_matrix
end

# function recreate_emission_matrix(probabilities, state_idx = 2, ϵ = 0.02)
#     next_emission_matrix = zeros(num_states, num_states);
#     for (row_idx, row) in enumerate(eachrow(next_emission_matrix))
#         normalization = (1//1) - probabilities[row_idx] - probabilities[prev_idx];
#         for (entry_idx, prob) in enumerate(row)
#             if entry_idx == row_idx
#                 row[entry_idx] = (1 - ϵ); # * P(itself) // P(itself) -> 1
#             else
#                 row[entry_idx] = ϵ * (probabilities[entry_idx] // normalization)
#             end
#         end
#         # println("$row_idx : $row w/ norm = $normalization")
#         # println(sum(row))
#     end

#     next_emission_matrix[prev_idx, :] .= 0;
#     next_emission_matrix[:, prev_idx] .= 0;

#     return next_emission_matrix
# end

function source_transition_matrix(probabilities)
    num_states = length(probabilities);
    reset_transition_matrix = zeros(num_states, num_states);
    for (row_idx, row) in enumerate(eachrow(reset_transition_matrix))
        reset_transition_matrix[row_idx, :] = probabilities;
    end
    return reset_transition_matrix
end

source_transition_matrix (generic function with 1 method)

In [284]:
prev_idx = 2;
num_states = length(P);
next_transition_matrix = zeros(num_states, num_states);
for (row_idx, row) in enumerate(eachrow(next_transition_matrix))
    normalization = (1//1) - probabilities[row_idx] - probabilities[prev_idx];
    for (entry_idx, prob) in enumerate(row)
        if entry_idx == row_idx
            row[entry_idx] = (1 - ϵ); # * P(itself) // P(itself) -> 1
        else
            row[entry_idx] = ϵ * (probabilities[entry_idx] // normalization)
        end
        # if entry_idx == prev_idx
        #     row[prev_idx] = 0;
        # end
    end
    # println("$row_idx : $row w/ norm = $normalization")
    # println(sum(row))
end

next_transition_matrix[prev_idx, :] .= 0;
next_transition_matrix[:, prev_idx] .= 0;

In [318]:
reset_transition_matrix = zeros(num_states, num_states);
for (row_idx, row) in enumerate(eachrow(reset_transition_matrix))
    reset_transition_matrix[row_idx, :] = probabilities;
end
# reset_transition_matrix = rationalize.(reset_transition_matrix)

12×12 Matrix{Rational{Int64}}:
 1//18  1//18  1//18  1//12  1//12  1//12  …  1//12  1//12  1//9  1//9  1//9
 1//18  1//18  1//18  1//12  1//12  1//12     1//12  1//12  1//9  1//9  1//9
 1//18  1//18  1//18  1//12  1//12  1//12     1//12  1//12  1//9  1//9  1//9
 1//18  1//18  1//18  1//12  1//12  1//12     1//12  1//12  1//9  1//9  1//9
 1//18  1//18  1//18  1//12  1//12  1//12     1//12  1//12  1//9  1//9  1//9
 1//18  1//18  1//18  1//12  1//12  1//12  …  1//12  1//12  1//9  1//9  1//9
 1//18  1//18  1//18  1//12  1//12  1//12     1//12  1//12  1//9  1//9  1//9
 1//18  1//18  1//18  1//12  1//12  1//12     1//12  1//12  1//9  1//9  1//9
 1//18  1//18  1//18  1//12  1//12  1//12     1//12  1//12  1//9  1//9  1//9
 1//18  1//18  1//18  1//12  1//12  1//12     1//12  1//12  1//9  1//9  1//9
 1//18  1//18  1//18  1//12  1//12  1//12  …  1//12  1//12  1//9  1//9  1//9
 1//18  1//18  1//18  1//12  1//12  1//12     1//12  1//12  1//9  1//9  1//9

In [410]:
using LinearAlgebra
θ_transition = Matrix(1I, num_states, num_states)
Z_sequence = ["A", "β", "β", "β", "D", "D"];

using LinearAlgebra
Matrix(1I, num_states, num_states);

function Viterbi(Y_sequence = Z_sequence, θ_transition = θ, Y_emission = transition_matrix,
                 Y_states=alphabets, θ_states = alphabets, θ_0 = probabilities,
                 θ_reset = reset_transition_matrix, Y_delete = next_transition_matrix, n = 6)
    T1 = Matrix(undef, size(θ_states, 1), size(Y_sequence, 1)); # Collect Values (Trellis)
    T2 = Matrix(undef, size(θ_states, 1), size(Y_sequence, 1)); # Track Hidden States (Pointers)
    Y_indexes = Dict([(Y_state, idx) for (idx, Y_state) in enumerate(Y_states)])
    # println(Y_emission)

    # Initialization of Markov Chain
    for (θ_idx, θ_state) in enumerate(θ_states)
        Y_idx = Y_indexes[ Y_sequence[1] ];
        T1[θ_idx, 1] = θ_0[θ_idx] * Y_emission[θ_idx, Y_idx]
        T2[θ_idx, 1] = 0;
    end

    θ_trans = θ_transition;
    Y_emiss = Y_emission;

    Y_changes = Dict();
    # Collect the MAPs
    for t = 2:size(T1, 2)
        Y_idx = Y_indexes[ Y_sequence[t] ];

        if t == n;
            θ_trans = source_transition_matrix(θ_0);
            # θ_most_likely = argmax(T1[:, t-1]);
            # println("θ change: $(θ_most_likely)")
            # Y_emiss = recreate_emission_matrix(θ_0, θ_most_likely, 0.02);
        else
            θ_trans = θ_transition;
        end

        for (θ_idx, θ_state) in enumerate(θ_states)
            # println(Y_emission[θ_idx, Y_idx])
            # if t == n

            compare = [];
            for state in 1:size(T1, 1)
                # println(T1[state, t-1])
                value = T1[state, t-1] * θ_trans[state, θ_idx] * Y_emiss[θ_idx, Y_idx];
                push!(compare, value)
            end
            # println("compare: $(size(compare))")
            T1[θ_idx, t], T2[θ_idx, t] = findmax( compare );
        end
    end

    println("T1: $T1")
    println("T2: $T2")
    # println("$(size(θ_states))")
    # println("T2: $(θ_states[ T2[3, 3] ] )")

    # Backtrack on finding the Hidden State Sequence
    z = argmax(T1[:, size(Y_sequence, 1)]);
    x = Vector(undef, size(Y_sequence, 1)); # Best path
    x[end] = θ_states[z] ;
    for k = size(T1, 2):-1:2
        z = T2[z, k];
        x[k-1] = θ_states[z];
    end

    return x, T1, T2
end

x, T1, T2 = Viterbi()

T1: Any[0.05444444444444444 9.607843137254902e-5 1.6955017301038063e-7 2.9920618766537757e-10 5.280109194094899e-13 1.6946506058805523e-11; 6.535947712418301e-5 1.1534025374855826e-7 2.0354162426216164e-10 3.591911016391088e-13 6.338666499513685e-16 1.6946506058805523e-11; 6.535947712418301e-5 1.1534025374855826e-7 2.0354162426216164e-10 3.591911016391088e-13 6.338666499513685e-16 1.6946506058805523e-11; 0.00010101010101010101 1.8365472910927456e-7 3.339176892895901e-10 6.071230714356184e-13 5.94980610006906e-13 1.4116439546984999e-8; 0.00010101010101010101 1.8365472910927456e-7 3.339176892895901e-10 6.071230714356184e-13 1.1038601298829424e-15 2.6190054818153986e-11; 0.00010101010101010101 1.8365472910927456e-7 3.339176892895901e-10 6.071230714356184e-13 1.1038601298829424e-15 2.6190054818153986e-11; 0.00010101010101010101 1.8365472910927456e-7 3.339176892895901e-10 6.071230714356184e-13 1.1038601298829424e-15 2.6190054818153986e-11; 0.00010101010101010101 9.898989898989899e-5 9.70101

(Any["β", "β", "β", "β", "β", "D"], Any[0.05444444444444444 9.607843137254902e-5 … 5.280109194094899e-13 1.6946506058805523e-11; 6.535947712418301e-5 1.1534025374855826e-7 … 6.338666499513685e-16 1.6946506058805523e-11; … ; 0.0001388888888888889 2.604166666666667e-7 … 1.7166137695312498e-15 3.601132537496173e-11; 0.0001388888888888889 2.604166666666667e-7 … 1.7166137695312498e-15 3.601132537496173e-11], Any[0 1 … 1 8; 0 2 … 2 8; … ; 0 11 … 11 8; 0 12 … 12 8])

In [401]:
T1

12×6 Matrix{Any}:
 0.0544444    9.60784e-5  1.6955e-7    2.99206e-10  5.28011e-13  1.80057e-11
 6.53595e-5   1.1534e-7   2.03542e-10  3.59191e-13  6.33867e-16  0.0
 6.53595e-5   1.1534e-7   2.03542e-10  3.59191e-13  6.33867e-16  1.80057e-11
 0.00010101   1.83655e-7  3.33918e-10  6.07123e-13  5.94981e-13  1.41164e-8
 0.00010101   1.83655e-7  3.33918e-10  6.07123e-13  1.10386e-15  2.78797e-11
 0.00010101   1.83655e-7  3.33918e-10  6.07123e-13  1.10386e-15  2.78797e-11
 0.00010101   1.83655e-7  3.33918e-10  6.07123e-13  1.10386e-15  2.78797e-11
 0.00010101   9.89899e-5  9.70101e-5   9.50699e-5   1.72854e-7   2.78797e-11
 0.00010101   1.83655e-7  3.33918e-10  6.07123e-13  1.10386e-15  2.78797e-11
 0.000138889  2.60417e-7  4.88281e-10  9.15527e-13  1.71661e-15  3.84121e-11
 0.000138889  2.60417e-7  4.88281e-10  9.15527e-13  1.71661e-15  3.84121e-11
 0.000138889  2.60417e-7  4.88281e-10  9.15527e-13  1.71661e-15  3.84121e-11

In [402]:
T2

12×6 Matrix{Any}:
 0   1   1   1   1  8
 0   2   2   2   2  1
 0   3   3   3   3  8
 0   4   4   4   4  8
 0   5   5   5   5  8
 0   6   6   6   6  8
 0   7   7   7   7  8
 0   8   8   8   8  8
 0   9   9   9   9  8
 0  10  10  10  10  8
 0  11  11  11  11  8
 0  12  12  12  12  8

In [411]:
x3, T1_3, T2_3 = Viterbi(Z_sequence, θ, transition_matrix,
                            alphabets, alphabets, probabilities,
                            reset_transition_matrix, next_transition_matrix, 3)

T1: Any[0.05444444444444444 9.607843137254902e-5 9.704892057833234e-9 1.712628010205865e-11 3.022284723892703e-14 5.333443630398888e-17; 6.535947712418301e-5 1.1534025374855826e-7 9.704892057833234e-9 1.712628010205865e-11 3.022284723892703e-14 5.333443630398888e-17; 6.535947712418301e-5 1.1534025374855826e-7 9.704892057833234e-9 1.712628010205865e-11 3.022284723892703e-14 5.333443630398888e-17; 0.00010101010101010101 1.8365472910927456e-7 1.499846954392409e-8 2.726994462531653e-11 2.6724545732810196e-11 2.6190054818153992e-11; 0.00010101010101010101 1.8365472910927456e-7 1.499846954392409e-8 2.726994462531653e-11 4.958171750057551e-14 9.014857727377365e-17; 0.00010101010101010101 1.8365472910927456e-7 1.499846954392409e-8 2.726994462531653e-11 4.958171750057551e-14 9.014857727377365e-17; 0.00010101010101010101 1.8365472910927456e-7 1.499846954392409e-8 2.726994462531653e-11 4.958171750057551e-14 9.014857727377365e-17; 0.00010101010101010101 9.898989898989899e-5 8.084175084175085e-6 7.

(Any["β", "β", "D", "D", "D", "D"], Any[0.05444444444444444 9.607843137254902e-5 … 3.022284723892703e-14 5.333443630398888e-17; 6.535947712418301e-5 1.1534025374855826e-7 … 3.022284723892703e-14 5.333443630398888e-17; … ; 0.0001388888888888889 2.604166666666667e-7 … 7.25023674242424e-14 1.3594193892045452e-16; 0.0001388888888888889 2.604166666666667e-7 … 7.25023674242424e-14 1.3594193892045452e-16], Any[0 1 … 1 1; 0 2 … 2 2; … ; 0 11 … 11 11; 0 12 … 12 12])

In [322]:
sum(reset_transition_matrix[2, :])

1//1

In [230]:
test_vec = [];
for (idx, alphabet) in alphabets
    if alphabet ∉ alphabets[4]
        push!(test_vec, alpha);
    end
end

In [232]:
zeros(2,2)

2×2 Matrix{Float64}:
 0.0  0.0
 0.0  0.0