In [1]:
import Pkg
Pkg.instantiate()

In [2]:
using BenchmarkTools: @btime, @belapsed, @ballocated
using LoopVectorization: @turbo

In [3]:
using LinearAlgebra: mul!

In [4]:
using CairoMakie

In [63]:
function add_to_A_B_times_C!(A, B, C)
    @turbo for j in axes(C, 2)
        for k in axes(B, 2)
            for i in axes(A, 1)
                A[i, j] += B[i, k] * C[k, j]
            end
        end
    end
end

function add_to_A_B_times_C!(A, B, C, bks)
    variable = zeros(7, 2)
    for j in 1:ceil(Int, size(C, 2)/bks)
        # println('j', j)
        for k in 1:ceil(Int, size(B, 2)/bks)
            # print('k', k)
            for i in 1:ceil(Int, size(A, 1)/bks)
                # print('i', i)
                for block_j in Int((j-1)*bks):min(Int((j)*bks-1), size(C, 2)-1)
                    # println()
                    # print(' ', 'b', ' ', 'j', block_j)
                    for block_k in Int((k-1)*bks):min(Int((k)*bks-1), size(B, 2)-1)
                        # print(' ', 'k', block_k)
                        for block_i in Int((i-1)*bks):min(Int((i)*bks-1), size(A, 1)-1)
                            # print(' ', 'i', block_i)
                            A[block_i+1, block_j+1] += B[block_i+1, block_k+1] * C[block_k+1, block_j+1]
                        end
                    end
                end
                # println()
            end
        end
    end
end

function oblivious_add_to_A_B_times_C!(A, B, C, bks)
    i_size = size(A, 1)
    j_size = size(C, 2)
    k_size = size(B, 2)

    # div(i_size, 2) = div(div(i_size, 2),2)
    # div(j_size, 2) = div(div(j_size, 2),2)
    # div(k_size, 2) = div(div(k_size, 2),2)

    # If we want to further subdivide
    if min(i_size, j_size, k_size) > bks
        # a11 = b11*c11 + b12 * c21
        oblivious_add_to_A_B_times_C!(
            @view(A[1:div(i_size, 2), 1:div(j_size, 2)]),
            @view(B[1:div(i_size, 2), 1:div(k_size, 2)]),
            @view(C[1:div(k_size, 2), 1:div(j_size, 2)]),
            bks
        )
        oblivious_add_to_A_B_times_C!(
            @view(A[1:div(i_size, 2), 1:div(j_size, 2)]),
            @view(B[1:div(i_size, 2), div(k_size, 2)+1:end]),
            @view(C[div(k_size, 2)+1:end, 1:div(j_size, 2)]),
            bks
        )
        # a12 = b11*c12 + b12 * c22
        oblivious_add_to_A_B_times_C!(
            @view(A[1:div(i_size, 2), div(j_size, 2)+1:end]),
            @view(B[1:div(i_size, 2), 1:div(k_size, 2)]),
            @view(C[1:div(k_size, 2), div(j_size, 2)+1:end]),
            bks
        )
        oblivious_add_to_A_B_times_C!(
            @view(A[1:div(i_size, 2), div(j_size, 2)+1:end]),
            @view(B[1:div(i_size, 2), div(k_size, 2)+1:end]),
            @view(C[div(k_size, 2)+1:end, div(j_size, 2)+1:end]),
            bks
        )
        # a21 = b21*c11 + b22 * c21
        oblivious_add_to_A_B_times_C!(
            @view(A[div(i_size, 2)+1:end, 1:div(j_size, 2)]),
            @view(B[div(i_size, 2)+1:end, 1:div(k_size, 2)]),
            @view(C[1:div(k_size, 2), 1:div(j_size, 2)]),
            bks
        )
        oblivious_add_to_A_B_times_C!(
            @view(A[div(i_size, 2)+1:end, 1:div(j_size, 2)]),
            @view(B[div(i_size, 2)+1:end, div(k_size, 2)+1:end]),
            @view(C[div(k_size, 2)+1:end, 1:div(j_size, 2)]),
            bks
        )
        # a22 = b21*c12 + b22 * c22
        oblivious_add_to_A_B_times_C!(
            @view(A[div(i_size, 2)+1:end, div(j_size, 2)+1:end]),
            @view(B[div(i_size, 2)+1:end, 1:div(k_size, 2)]),
            @view(C[1:div(k_size, 2), div(j_size, 2)+1:end]),
            bks
        )
        oblivious_add_to_A_B_times_C!(
            @view(A[div(i_size, 2)+1:end, div(j_size, 2)+1:end]),
            @view(B[div(i_size, 2)+1:end, div(k_size, 2)+1:end]),
            @view(C[div(k_size, 2)+1:end, div(j_size, 2)+1:end]),
            bks
        )
    #If we are ready to break the recursion
    else
        add_to_A_B_times_C!(A, B, C)
    end
end

oblivious_add_to_A_B_times_C! (generic function with 1 method)

In [64]:
# Testing blocked matmul function
i = 10
A = zeros(i, 6)
B = rand(1:6, i, 6)
C = rand(1:6, 6, 6)
# println(A)
# println(B)
# println(C)
println("Testing for memory allocation of blocked matmul...")
allocated_memory = @ballocated add_to_A_B_times_C!(A, B, C, 2)
print(allocated_memory)
@assert allocated_memory == 0
println("No memory allocated, good!")
println("Testing correctness of blocked matmul...")
A .= 0; 
# println()
# display(A)
oblivious_add_to_A_B_times_C!(A, B, C, 2)
display(A)
display(B*C)
# display(A)

@assert A ≈ B * C
println("Result correct, good!")

Testing for memory allocation of blocked matmul...


176

AssertionError: AssertionError: allocated_memory == 0

In [None]:
# Testing blocked matmul function
A = zeros(6, 2)
B = rand(1:6, 6, 4)
C = rand(1:6, 4, 2)
# println(A)
# println(B)
# println(C)
# println("Testing for memory allocation of blocked matmul...")
# allocated_memory = @ballocated add_to_A_B_times_C!(A, B, C, 301)
# @assert allocated_memory == 0
# println("No memory allocated, good!")
# println("Testing correctness of blocked matmul...")
A .= 0; 
# println()
println(add_to_A_B_times_C!(A, B, C, 2))
println(B*C)

@assert A ≈ B * C
println("Result correct, good!")

In [None]:
# Testing blocked matmul function
A = zeros(2014, 301)
B = randn(2014, 1037)
C = randn(1037, 301)
println("Testing for memory allocation of blocked matmul...")
allocated_memory = @ballocated add_to_A_B_times_C!(A, B, C, 301)
print(allocated_memory)
@assert allocated_memory == 0
println("No memory allocated, good!")
println("Testing correctness of blocked matmul...")
A .= 0; add_to_A_B_times_C!(A, B, C, 301)
@assert A ≈ B * C
println("Result correct, good!")

In [13]:
function test!(x)
    for i in axes(x, 1)
        for j in axes(x, 2)
            x[i, j] += 5
        end
    end
end

A = zeros(5,4)

display(A)
# display(test(view(A, 1:3, 2:4)))
test!(A)
# A *= 5
display(A)
# parent(A)

@view A



5×4 Matrix{Float64}:
 0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0

5×4 Matrix{Float64}:
 5.0  5.0  5.0  5.0
 5.0  5.0  5.0  5.0
 5.0  5.0  5.0  5.0
 5.0  5.0  5.0  5.0
 5.0  5.0  5.0  5.0

LoadError: LoadError: ArgumentError: Invalid use of @view macro: argument must be a reference expression A[...].
in expression starting at /home/jslaby6/classes/6643/HW2_code/test.ipynb:18