In [1]:
english::Vector{String} = ["A", "B", "C", "D", "E", "I"];
greek::Vector{String} = ["α", "β", "γ", "δ", "ϵ", "ϕ"];
alphabets::Vector{String} = [];

append!(alphabets, english)
append!(alphabets, greek)

12-element Vector{String}:
 "A"
 "B"
 "C"
 "D"
 "E"
 "I"
 "α"
 "β"
 "γ"
 "δ"
 "ϵ"
 "ϕ"

In [2]:
P = Dict();
for alphabet in alphabets
    if string(alphabet) ∈ greek
        P[alphabet] = 1 // 12;
    elseif string(alphabet) ∈ ["A", "E", "I"]
        P[alphabet] = 1 // 9;
    else
        P[alphabet] = 1 // 18;
    end
end

P

Dict{Any, Any} with 12 entries:
  "C" => 1//18
  "ϵ" => 1//12
  "δ" => 1//12
  "B" => 1//18
  "A" => 1//9
  "ϕ" => 1//12
  "D" => 1//18
  "α" => 1//12
  "E" => 1//9
  "γ" => 1//12
  "I" => 1//9
  "β" => 1//12

In [3]:
P_2 = Dict();
for (alphabet, probability) in P
    for (alphabet_2, probability_2) in P
        P_2[alphabet * alphabet_2] = probability * probability_2;
    end
end

P_3 = Dict();
for (alphabet, probability) in P
    for (alphabet_2, probability_2) in P_2
        P_3[alphabet * alphabet_2] = probability * probability_2;
    end
end

In [4]:
function sort_prob(P_Dict)
    probabilities = [];
    alphabets = [];
    for (alphabet, probability) in P_Dict
        # println("$(typeof(alphabet))")
        append!(probabilities, probability)
        append!(alphabets, alphabet)
    end

    sorted_idx = sortperm(probabilities);
    probabilities = probabilities[sorted_idx];
    alphabets = alphabets[sorted_idx];

    println("$alphabets")
    println("$probabilities")

    P_sorted_dict = Dict();
    for (idx, probability) in enumerate(probabilities)
        P_sorted_dict[ string(alphabets[idx]) ] = probability
    end
    return probabilities, alphabets, P_sorted_dict
end

probabilities, alphabets2, P_sorted_dict = sort_prob(P)

Any['C', 'B', 'D', 'ϵ', 'δ', 'ϕ', 'α', 'γ', 'β', 'A', 'E', 'I']
Any[1//18, 1//18, 1//18, 1//12, 1//12, 1//12, 1//12, 1//12, 1//12, 1//9, 1//9, 1//9]


(Any[1//18, 1//18, 1//18, 1//12, 1//12, 1//12, 1//12, 1//12, 1//12, 1//9, 1//9, 1//9], Any['C', 'B', 'D', 'ϵ', 'δ', 'ϕ', 'α', 'γ', 'β', 'A', 'E', 'I'], Dict{Any, Any}("C" => 1//18, "ϵ" => 1//12, "δ" => 1//12, "B" => 1//18, "A" => 1//9, "ϕ" => 1//12, "D" => 1//18, "α" => 1//12, "E" => 1//9, "γ" => 1//12…))

In [5]:
mutable struct huffman_node{A, B}
    code::A
    probability::B
    left::Union{Nothing, huffman_node}
    right::Union{Nothing, huffman_node}
end
# For Huffman Leaf Construction:
huffman_node(x::A, y::B) where A where B = huffman_node{A, B}(x, y, nothing, nothing)

function construct_huffman_tree(P_sorted_dict)
    trees = [huffman_node( alphabet, probability) for (alphabet, probability) in P_sorted_dict]
    while length(trees) > 1
        sort!(trees, lt = (x, y) -> x.probability < y.probability)
        smallest = popfirst!(trees)
        next_smallest = popfirst!(trees)
        tree = huffman_node("s", smallest.probability + next_smallest.probability)
        tree.left = smallest
        tree.right = next_smallest
        push!(trees, tree )
    end
    return trees[1]
end

huffman_tree = construct_huffman_tree(P_sorted_dict)

huffman_node{String, Rational{Int64}}("s", 1//1, huffman_node{String, Rational{Int64}}("s", 5//12, huffman_node{String, Rational{Int64}}("s", 7//36, huffman_node{String, Rational{Int64}}("β", 1//12, nothing, nothing), huffman_node{String, Rational{Int64}}("A", 1//9, nothing, nothing)), huffman_node{String, Rational{Int64}}("s", 2//9, huffman_node{String, Rational{Int64}}("E", 1//9, nothing, nothing), huffman_node{String, Rational{Int64}}("I", 1//9, nothing, nothing))), huffman_node{String, Rational{Int64}}("s", 7//12, huffman_node{String, Rational{Int64}}("s", 1//4, huffman_node{String, Rational{Int64}}("s", 1//9, huffman_node{String, Rational{Int64}}("C", 1//18, nothing, nothing), huffman_node{String, Rational{Int64}}("B", 1//18, nothing, nothing)), huffman_node{String, Rational{Int64}}("s", 5//36, huffman_node{String, Rational{Int64}}("D", 1//18, nothing, nothing), huffman_node{String, Rational{Int64}}("ϵ", 1//12, nothing, nothing))), huffman_node{String, Rational{Int64}}("s", 1//3, 

In [7]:
function print_encoding(node, code, encoder)
    code *= "0"
    if ~(isnothing(node.left))
        print_encoding(node.left, code, encoder)
    end
    code = code[1:end-1]

    code *= "1"
    if ~(isnothing(node.right))
        print_encoding(node.right, code, encoder)
    end
    code = code[1:end-1]
    encoder[node.code] = code

end

encoder = Dict()
@time print_encoding(huffman_tree, "", encoder)

  0.031132 seconds (74.31 k allocations: 3.648 MiB, 99.60% compilation time)


""

In [9]:
encoder

Dict{Any, Any} with 13 entries:
  "C" => "1000"
  "ϵ" => "1011"
  "δ" => "1100"
  "B" => "1001"
  "s" => ""
  "A" => "001"
  "ϕ" => "1101"
  "D" => "1010"
  "α" => "1110"
  "E" => "010"
  "γ" => "1111"
  "I" => "011"
  "β" => "000"

In [10]:
delete!(encoder, "s")

Dict{Any, Any} with 12 entries:
  "C" => "1000"
  "ϵ" => "1011"
  "δ" => "1100"
  "B" => "1001"
  "A" => "001"
  "ϕ" => "1101"
  "D" => "1010"
  "α" => "1110"
  "E" => "010"
  "γ" => "1111"
  "I" => "011"
  "β" => "000"

In [22]:
length(encoder)

12

In [11]:
# encoder   : Dictionary encoding alphabet -> codeword
# P         : Dictionary describing probability mass function (alphabet -> probability)
expected_length(encoder, P) = sum([P[alphabet] * length(codeword) for (alphabet, codeword) in encoder])

expected_length(encoder, P)

43//12

In [27]:
function mcmillan_inequality(encoder)
    value = sum([ (2//1) ^ (-1 * length(codeword)) for (___, codeword) in encoder]);
    return value
end

mcmillan_inequality(encoder)

1//1

In [12]:
P_Y = Dict();
P_Y["english"] = 0;
P_Y["greek"] = 0;
for alphabet in alphabets
    if string(alphabet) ∈ english
        P_Y["english"] += P[alphabet];
    elseif string(alphabet) ∈ greek
        P_Y["greek"] += P[alphabet];
    end
end

In [13]:
P_Y

Dict{Any, Any} with 2 entries:
  "greek"   => 1//2
  "english" => 1//2

In [21]:
function decode_huffman(huffman_tree, codeword)
    ans = ""
    curr = huffman_tree

    is_leaf(node) = isnothing(node.left) && isnothing(node.right)
    for idx = 1:length(codeword)
        if string(codeword[idx]) == "0" && ~( is_leaf(curr) )
            println("Going left")
            curr = curr.left
        elseif string(codeword[idx]) == "1" && ~( is_leaf(curr) )
            println("Going Right")
            curr = curr.right
        end

        if is_leaf(curr)
            println("Successfuly Decoded: $(curr.code)")
            ans *= curr.code
            curr = huffman_tree
        end
    end

    return ans * "\0"
end

decode_huffman(huffman_tree, "000010")

Going left
Going left
Going left
Successfuly Decoded: β
Going left
Going Right
Going left
Successfuly Decoded: E


"βE\0"

In [15]:
huffman_tree

huffman_node{String, Rational{Int64}}("s", 1//1, huffman_node{String, Rational{Int64}}("s", 5//12, huffman_node{String, Rational{Int64}}("s", 7//36, huffman_node{String, Rational{Int64}}("β", 1//12, nothing, nothing), huffman_node{String, Rational{Int64}}("A", 1//9, nothing, nothing)), huffman_node{String, Rational{Int64}}("s", 2//9, huffman_node{String, Rational{Int64}}("E", 1//9, nothing, nothing), huffman_node{String, Rational{Int64}}("I", 1//9, nothing, nothing))), huffman_node{String, Rational{Int64}}("s", 7//12, huffman_node{String, Rational{Int64}}("s", 1//4, huffman_node{String, Rational{Int64}}("s", 1//9, huffman_node{String, Rational{Int64}}("C", 1//18, nothing, nothing), huffman_node{String, Rational{Int64}}("B", 1//18, nothing, nothing)), huffman_node{String, Rational{Int64}}("s", 5//36, huffman_node{String, Rational{Int64}}("D", 1//18, nothing, nothing), huffman_node{String, Rational{Int64}}("ϵ", 1//12, nothing, nothing))), huffman_node{String, Rational{Int64}}("s", 1//3, 