In [161]:
include("forward_pass.jl")
include("backward_pass.jl");
include("utils.jl");
include("convolution.jl");
include("graph_building.jl");
include("load_data.jl");
#include("scalar_operators.jl");
include("broadcasted_operators.jl")
include("network.jl")

build_graph (generic function with 1 method)

In [162]:
net, x, y = build_graph();
net

(28, 28, 1, 1)(3, 3, 1, 6)(1014, 84)(84, 10)(10, 1)

13-element Vector{Any}:
 var x
 ┣━ ^ 28×28×1×1 Array{Float32, 4}
 ┗━ ∇ Nothing
 var wh1
 ┣━ ^ 3×3×1×6 Array{Float32, 4}
 ┗━ ∇ Nothing
 op.?(typeof(conv))
 op.x1(typeof(relu))
 op.x2(typeof(maxpool))
 op.x3(typeof(flatten))
 var wh2
 ┣━ ^ 1014×84 Matrix{Float64}
 ┗━ ∇ Nothing
 op.?(typeof(mul!))
 op.x4(typeof(relu))
 var wo
 ┣━ ^ 84×10 Matrix{Float64}
 ┗━ ∇ Nothing
 op.x5(typeof(mul!))
 var wo2
 ┣━ ^ 10×1 Matrix{Float64}
 ┗━ ∇ Nothing
 op.x6(typeof(mul!))

In [163]:

forward!(net)


1×1 Matrix{Float64}:
 -3.3646563621008285

In [164]:

backward!(net)

for (i,n) in enumerate(net)
    print(i, ". "); println(n)
end

1. var x
 ┣━ ^ 28×28×1×1 Array{Float32, 4}
 ┗━ ∇ 28×28×1 Array{Float64, 3}
2. var wh1
 ┣━ ^ 3×3×1×6 Array{Float32, 4}
 ┗━ ∇ 3×3×1×6 Array{Float64, 4}
3. op.?(typeof(conv))
4. op.x1(typeof(relu))
5. op.x2(typeof(maxpool))
6. op.x3(typeof(flatten))
7. var wh2
 ┣━ ^ 1014×84 Matrix{Float64}
 ┗━ ∇ 1014×84 Matrix{Float64}
8. op.?(typeof(mul!))
9. op.x4(typeof(relu))
10. var wo
 ┣━ ^ 84×10 Matrix{Float64}
 ┗━ ∇ 84×10 Matrix{Float64}
11. op.x5(typeof(mul!))
12. var wo2
 ┣━ ^ 10×1 Matrix{Float64}
 ┗━ ∇ 10×1 Matrix{Float64}
13. op.x6(typeof(mul!))


In [3]:
using MLDatasets, Base, Flux
train_data = MLDatasets.MNIST(split=:train)
test_data  = MLDatasets.MNIST(split=:test)

function loader(data; batchsize::Int=1)
    x4dim = reshape(data.features, 28, 28, 1, :)
    yhot  = Flux.onehotbatch(data.targets, 0:9) 
    Flux.DataLoader((x4dim, yhot); batchsize, shuffle=true)
end

loader (generic function with 1 method)

In [4]:
x1, y1 = first(loader(train_data))

(Float32[0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0;;;;], Bool[0; 0; … ; 0; 0;;])

In [5]:
function dense(w, b, x, activation) return activation(w * x .+ b) end
function dense(w, x, activation) return activation(w * x) end
function dense(w, x) return w * x end

function mean_squared_loss(y, ŷ)
    return Constant(0.5) .* (y .- ŷ) .^ Constant(2)
end
#=
net = Chain(
    Conv((3, 3), 1 => 6,  relu, bias=false),
    MaxPool((2, 2)),
    Flux.flatten,
    Dense(13*13*6 => 84, relu, bias=false), 
    Dense(84 => 10, identity, bias=false)
)
=#

mean_squared_loss (generic function with 1 method)

In [6]:
Wh  = Variable(randn(10,2), name="wh")
Wo  = Variable(randn(1,10), name="wo")
x = Variable([1.98, 4.434], name="x")
y = Variable([0.064], name="y")

var y
 ┣━ ^ 1-element Vector{Float64}
 ┗━ ∇ Nothing

In [7]:
function net(x, wh, wo, y)
    x̂ = dense(wh, x, relu)
    x̂.name = "x̂"
    ŷ = dense(wo, x̂)
    ŷ.name = "ŷ"
    E = mean_squared_loss(y, ŷ)
    E.name = "loss"

    return topological_sort(E)
end
graph = net(x, Wh, Wo, y)
forward!(graph)
backward!(graph)


In [8]:

for (i,n) in enumerate(graph)
    print(i, ". "); println(n)
end

1. const 0.5
2. var y
 ┣━ ^ 1-element Vector{Float64}
 ┗━ ∇ 1×1 Matrix{Float64}
3. var wo
 ┣━ ^ 1×10 Matrix{Float64}
 ┗━ ∇ 1×10 Matrix{Float64}
4. var wh
 ┣━ ^ 10×2 Matrix{Float64}
 ┗━ ∇ 10×2 Matrix{Float64}
5. var x
 ┣━ ^ 2-element Vector{Float64}
 ┗━ ∇ 2×1 Matrix{Float64}
6. op.?(typeof(mul!))
7. op.x̂(typeof(relu))
8. op.ŷ(typeof(mul!))
9. op.?(typeof(-))
10. const 2
11. op.?(typeof(^))
12. op.loss(typeof(*))


In [9]:
using Flux

function max_pool_backward(input, indices, grad_output, pool_size)
    """
    Backward pass for max-pooling operation.

    Parameters:
        input: Input array used in the forward pass.
        indices: Indices of the maximum values obtained during max-pooling.
        grad_output: Gradient of the loss function with respect to the output of max-pooling.
        pool_size: Size of the pooling window (height, width).

    Returns:
        Gradient of the loss function with respect to the input of max-pooling.
    """
    grad_input = zeros(size(input))

    # Loop through each pooled region
    for j in 1:size(indices, 1)
        for i in 1:size(indices, 2)
            # Get the index of the maximum value in the pooled region
            idx = indices[j, i]
            # Compute the gradient only for the maximum value
            grad_input[idx] += grad_output[j, i]
        end
    end

    return grad_input
end

# Example usage
input = rand(4, 4)  # Input feature map
pool_size = (2, 2)  # Pooling window size
indices = Flux.argmax(input, dims=(1, 2))  # Indices of maximum values
grad_output = rand(2, 2)  # Gradient of the loss function with respect to the output of max-pooling

# Perform backward pass
grad_input = max_pool_backward(input, indices, grad_output, pool_size)


4×4 Matrix{Float64}:
 0.136007  0.0  0.0  0.0
 0.0       0.0  0.0  0.0
 0.0       0.0  0.0  0.0
 0.0       0.0  0.0  0.0

In [44]:
# Example usage
sizer = (6, 6)  # Size of the array
gain = 1.0  # Gain factor
fan_in = 10  # Number of input units
fan_out = 5  # Number of output units

result = uniform_rand(sizer, gain, fan_in, fan_out)

6×6 Matrix{Float64}:
  0.499089  -0.174685  -0.568829   0.289587  -0.372544   -0.0210263
  0.189035  -0.273204  -0.10566   -0.167201  -0.514422   -0.404746
 -0.347376  -0.444532  -0.601563   0.116733   0.197133    0.556858
  0.631024  -0.557323   0.231998  -0.609688   0.418192    0.277283
  0.48974   -0.133705   0.398736   0.206601   0.0790698  -0.183227
 -0.628861   0.363727   0.233374  -0.170211   0.22924     0.483696

In [45]:
function forwarder(x)

    input_height, input_width = size(x)

    output_height = div(input_height, 2)
    output_width = div(input_width, 2)

    y = zeros(output_height, output_width)

    for j in 1:output_height
        for i in 1:output_width
            region = x[2*j - 1:2*j, 2*i - 1:i *2]
            y[j, i] = maximum(region)
        end
    end

    return y
end

forwarder (generic function with 1 method)

var x
 ┣━ ^ 28×28×1×1 Array{Float32, 4}
 ┗━ ∇ Nothing