# 17. 컴퓨터 비전

합성곱 신경망을 이용한 컴퓨터 비전

- 이미지 분류, 전이 학습, 이미지 생성, 객체 탐지 등

## 17.1 합성곱 신경망

### 패션 아이템 분류

In [1]:
import MLDatasets

In [2]:
import Flux

In [3]:
import NNlib

In [4]:
import Flux: onehotbatch 

In [5]:
import MLUtils: DataLoader

In [6]:
import Zygote

In [7]:
import Optimisers

In [8]:
import Formatting: printfmtln

In [9]:
using Random: MersenneTwister

In [10]:
import Plots

In [11]:
using DataFrames

In [12]:
using CSV

In [13]:
ENV["DATADEPS_ALWAYS_ACCEPT"] = true # 필요 데이터 자동 다운로드

true

In [14]:
# M2의 gpu 사용을 위해 Metal.jl 패키지를 로딩
# using CUDA
using Metal

# Flux.gpu_backend!("Metal")
# Flux.GPU_BACKEND

In [15]:
Metal.versioninfo()

macOS 13.5.0, Darwin 22.6.0

Toolchain:
- Julia: 1.9.2
- LLVM: 14.0.6

Julia packages: 
- Metal.jl: 0.5.1
- Metal_LLVM_Tools_jll: 0.5.1+0

1 device:
- Apple M2 (64.000 KiB allocated)


In [16]:
Metal.functional()

true

In [17]:
# 비교 모델 (16장 mlp 모델)
function build_mlp_model(rng)
    Flux.Chain(
        Flux.flatten,
        Flux.Dense(28 * 28 => 512 , NNlib.relu; init = init(rng)),
        Flux.Dense(512 => 512, NNlib.relu; init = init(rng)),
        Flux.Dense(512 => 10 ; init = init(rng)),
    )
end

build_mlp_model (generic function with 1 method)

#### 합성곱 기본 모델 만들기

- build_cnn_model 만들기

In [29]:
function build_cnn_model(rng)
    same = Flux.SamePad()
    Flux.Chain(
        Flux.Conv((3,3), 1 => 32, NNlib.relu, pad = same;), # init=init(rng)),
        Flux.MaxPool((2,2)), # (28 x 28) => (14, 14)
        
        Flux.Conv((3,3), 32 => 64, NNlib.relu, pad = same;), #  init = init(rng)),
        Flux.MaxPool((2,2)), # (14 x 14) => (7 x 7)
        
        Flux.Conv((3, 3), 64 => 64, NNlib.relu, pad = same;), #  init = init(rng)),
        Flux.MaxPool((2,2)), # (7 x 7) => (3 x 3)
        
        Flux.flatten, 
        Flux.Dense(3 * 3 * 64 => 64, NNlib.relu;), #  init = init(rng)),
        Flux.Dense(64 => 10;), #  init = init(rng)), 
    )
end

build_cnn_model (generic function with 1 method)

* 파이토치는 특성의 차원에 따라 Conv1d, Conv2d, Conv3d 등이 구분되지만 플럭스의 합성곱 계층은 Conv 계층으로 1, 2, 3 차원 데이터를 모두 처리함
* 각 합성곱층 및 밀집층은 앞 장에서와 마찬가지로 init 키워드 인수에 init 함수를 넘겨서 가중치를 초기화 함
* Chain으로 묶인 전체 모델은 입력 데이터에 대해 합성곱 계층과 풀링 계층을 세 번 반복한 후 평탄화하여 밀집 계층으로 넘겨줌

#### 드롭아웃과 배치 정규화 적용 모델 만들기

- build_cnn2_model

기본적인 합성곱 모델에 드롭아웃과 배치 정규화를 적용한 모델을 cnn2라는 이름으로 만들어 봄

In [19]:
function build_cnn2_model(rng)
    same = Flux.SamePad()
    Flux.Chain(
        Flux.Conv((3,3), 1 => 32, NNlib.relu, pad = same; init=init(rng)),
        Flux.MaxPool((2,2)), # (28 x 28) => (14 x 14)
        Flux.Dropout(0.2),
        Flux.BatchNorm(32),

        Flux.Conv((3,3), 32 => 64, NNlib.relu, pad = same; init=init(rng)),
        Flux.MaxPool((2,2)), # (14 x 14) => (7 x 7)
        Flux.Dropout(0.2),
        Flux.BatchNorm(64),

        Flux.Conv((3,3), 64 => 64, NNlib.relu, pad = same; init=init(rng)),
        Flux.MaxPool((2,2)), # (7 x 7) => (3 x 3)
        Flux.Dropout(0.2),
        Flux.BatchNorm(64),

        Flux.flatten,
        Flux.Dense(3*3*64 => 64, NNlib.relu; init = init(rng)),
        Flux.Dropout(0.2),
        Flux.Dense(64 => 10; init = init(rng)),        
    )
end

build_cnn2_model (generic function with 1 method)

* Dropout 타입은 키워드 인수로 난수 생성기를 지정할 수 있지만 이는 CPU에서 학습할 때만 적용되고 GPU에서는 난수 생성기를 지정하고 모델을 돌리면 명시적으로 에러가 발생


#### get_data, train, test 함수(16장 동일)

In [20]:
function get_data(batchsize = 64)
    xtrain, ytrain = MLDatasets.FashionMNIST(:train)[:]
    xtest, ytest = MLDatasets.FashionMNIST(:test)[:]

    xtrain = reshape(xtrain, 28, 28, 1, :)
    xtest = reshape(xtest, 28, 28, 1, :)

    ytrain, ytest = onehotbatch(ytrain, 0:9), onehotbatch(ytest, 0:9)

    train_loader = DataLoader((xtrain, ytrain), batchsize = batchsize)
    test_loader = DataLoader((xtest, ytest), batchsize = batchsize)

    return train_loader, test_loader
end

get_data (generic function with 2 methods)

In [67]:
function train(loader, model, loss_fn, optimizer)
    num_batches = length(loader)
    losses = Float32[]
    Flux.testmode!(model, false)
    for (batch, (X, y)) in enumerate(loader)
        # X = X |> Flux.gpu
        # y = y |> Flux.gpu
        X, y = Flux.gpu(X), Flux.gpu(y)
        grad = Zygote.gradient(m -> loss_fn(m, X, y), model)[1]
        optimizer, model = Optimisers.update(optimizer, model, grad)
        if batch % 100 == 0
            loss = loss_fn(model, X, y)
            printfmtln("[Train] loss: {:.7f} [{:>3d}/{:>3d}]",
                loss, batch, num_batches)
            push!(losses, loss)
        end
    end
    model, optimizer, losses
end

train (generic function with 1 method)

In [66]:
function test(loader, model, loss_fn)
    num_batches = length(loader)
    Flux.testmode!(model, true)
    acc, tot = 0, 0
    loss = 0f0
    for (X, y) in loader
        # X = X |> Flux.gpu
        # y = y |> Flux.gpu
        X, y = Flux.gpu(X), Flux.gpu(y)
        pred = model(X)
        acc += sum(Flux.onecold(pred) .== Flux.onecold(y))
        tot += size(X)[end]
        loss += loss_fn(model, X, y)
    end
    acc, avg_loss = acc / tot * 100, loss / num_batches
    printfmtln("[Test] Accuracy: {:.1f}, Avg loss: {:.7f}", acc, avg_loss)
    acc, avg_loss
end

test (generic function with 1 method)

In [23]:
init(rng) = Flux.glorot_uniform(rng)

init (generic function with 1 method)

배치 실행 함수 run_batch 작성

In [43]:
function run_epochs(loaders, model, loss_fn, optimizer, epochs)
    train_loader, test_loader = loaders
    train_losses, accuracies = [], [];
    for t in 1:epochs
        println("Epochs $t")
        println("---------------------------")
        model, optimizer, losses =
            train(train_loader, model, loss_fn, optimizer)
        train_losses = vcat(train_losses, losses)
        acc, _ = test(test_loader, model, loss_fn)
        push!(accuracies, acc)
    end
    model, train_losses, accuracies
end

run_epochs (generic function with 1 method)

In [44]:
function run_batch(loaders, models, epochs)
    train_loader, test_loader = loaders
    loss_fn(m, x, y) = Flux.Losses.logitcrossentropy(m(x), y)
    train_losses, accuracies = [], []
    for (k, model) in enumerate(models)
        train_loss, accuracy = [], []
        optimizer = Optimisers.setup(Optimisers.Adam(), model)
        for t in 1:epochs
            println("[Model $k] Epoch $t")
            println("---------------------------------")
            model, optimizer, losses = train(train_loader, model, loss_fn, optimizer)
            train_loss = vcat(train_loss, losses)
            acc, _ = test(test_loader, model, loss_fn)
            push!(accuracy, acc)
        end
        push!(train_losses, train_loss)
        push!(accuracies, accuracy)
    end
    train_losses, accuracies
end

run_batch (generic function with 1 method)

### mlp, cnn, cnn2 비교

In [45]:
rng = MersenneTwister(1);

In [68]:
# 단일 CNN 모델 돌리기 
model = build_cnn_model(rng) |> Flux.gpu

loss_fn = (m, x, y) -> Flux.Losses.logitcrossentropy(m(Flux.gpu(x), Flux.gpu(y));
optimizer = Optimisers.setup(Optimisers.Descent(0.001f0), model)

LoadError: syntax: incomplete: premature end of input

In [69]:
model, losses, accu = run_epochs(get_data(), model, loss_fn, optimizer, 5)

Epochs 1
---------------------------


[33m[1m│ [22m[39mInvocation of getindex resulted in scalar indexing of a GPU array.
[33m[1m│ [22m[39mThis is typically caused by calling an iterating implementation of a method.
[33m[1m│ [22m[39mSuch implementations *do not* execute on the GPU, but very slowly on the CPU,
[33m[1m│ [22m[39mand therefore are only permitted from the REPL for prototyping purposes.
[33m[1m│ [22m[39mIf you did intend to index this array, annotate the caller with @allowscalar.
[33m[1m└ [22m[39m[90m@ GPUArraysCore ~/.julia/packages/GPUArraysCore/uOYfN/src/GPUArraysCore.jl:106[39m


LoadError: TaskFailedException

[91m    nested task error: [39mTaskFailedException
    
    [91m    nested task error: [39mArgumentError: cannot take the CPU address of a MtlArray{Float32, 5, Metal.MTL.MTLResourceStorageModePrivate}
        Stacktrace:
         [1] [0m[1munsafe_convert[22m[0m[1m([22m[90m#unused#[39m::[0mType[90m{Ptr{Float32}}[39m, [90mx[39m::[0mMtlArray[90m{Float32, 5, Metal.MTL.MTLResourceStorageModePrivate}[39m[0m[1m)[22m
        [90m   @[39m [35mMetal[39m [90m~/.julia/packages/Metal/lnkVP/src/[39m[90m[4marray.jl:148[24m[39m
         [2] [0m[1munsafe_convert[22m[0m[1m([22m[90m#unused#[39m::[0mType[90m{Ptr{Float32}}[39m, [90mV[39m::[0mSubArray[90m{Float32, 5, MtlArray{Float32, 5, Metal.MTL.MTLResourceStorageModePrivate}, Tuple{Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}}, false}[39m[0m[1m)[22m
        [90m   @[39m [90mBase[39m [90m./[39m[90m[4msubarray.jl:437[24m[39m
         [3] [0m[1mpointer[22m
        [90m   @[39m [90m./[39m[90m[4mabstractarray.jl:1243[24m[39m[90m [inlined][39m
         [4] [0m[1mmacro expansion[22m
        [90m   @[39m [90m~/.julia/packages/NNlib/5iRSB/src/impl/[39m[90m[4mconv_im2col.jl:58[24m[39m[90m [inlined][39m
         [5] [0m[1m(::NNlib.var"#647#648"{MtlArray{Float32, 3, Metal.MTL.MTLResourceStorageModePrivate}, Float32, Float32, SubArray{Float32, 5, MtlArray{Float32, 5, Metal.MTL.MTLResourceStorageModePrivate}, Tuple{Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}}, false}, SubArray{Float32, 5, MtlArray{Float32, 5, Metal.MTL.MTLResourceStorageModePrivate}, Tuple{Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}}, false}, MtlArray{Float32, 5, Metal.MTL.MTLResourceStorageModePrivate}, NNlib.DenseConvDims{3, 3, 3, 6, 3}, Int64, Int64, Int64, UnitRange{Int64}, Int64})[22m[0m[1m([22m[0m[1m)[22m
        [90m   @[39m [36mNNlib[39m [90m./[39m[90m[4mthreadingconstructs.jl:404[24m[39m
    Stacktrace:
     [1] [0m[1msync_end[22m[0m[1m([22m[90mc[39m::[0mChannel[90m{Any}[39m[0m[1m)[22m
    [90m   @[39m [90mBase[39m [90m./[39m[90m[4mtask.jl:445[24m[39m
     [2] [0m[1mmacro expansion[22m
    [90m   @[39m [90m./[39m[90m[4mtask.jl:477[24m[39m[90m [inlined][39m
     [3] [0m[1mconv_im2col![22m[0m[1m([22m[90my[39m::[0mSubArray[90m{Float32, 5, MtlArray{Float32, 5, Metal.MTL.MTLResourceStorageModePrivate}, Tuple{Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}}, false}[39m, [90mx[39m::[0mSubArray[90m{Float32, 5, MtlArray{Float32, 5, Metal.MTL.MTLResourceStorageModePrivate}, Tuple{Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}}, false}[39m, [90mw[39m::[0mMtlArray[90m{Float32, 5, Metal.MTL.MTLResourceStorageModePrivate}[39m, [90mcdims[39m::[0mNNlib.DenseConvDims[90m{3, 3, 3, 6, 3}[39m; [90mcol[39m::[0mMtlArray[90m{Float32, 3, Metal.MTL.MTLResourceStorageModePrivate}[39m, [90malpha[39m::[0mFloat32, [90mbeta[39m::[0mFloat32, [90mntasks[39m::[0mInt64[0m[1m)[22m
    [90m   @[39m [36mNNlib[39m [90m~/.julia/packages/NNlib/5iRSB/src/impl/[39m[90m[4mconv_im2col.jl:50[24m[39m
     [4] [0m[1mconv_im2col![22m[0m[1m([22m[90my[39m::[0mSubArray[90m{Float32, 5, MtlArray{Float32, 5, Metal.MTL.MTLResourceStorageModePrivate}, Tuple{Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}}, false}[39m, [90mx[39m::[0mSubArray[90m{Float32, 5, MtlArray{Float32, 5, Metal.MTL.MTLResourceStorageModePrivate}, Tuple{Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}}, false}[39m, [90mw[39m::[0mMtlArray[90m{Float32, 5, Metal.MTL.MTLResourceStorageModePrivate}[39m, [90mcdims[39m::[0mNNlib.DenseConvDims[90m{3, 3, 3, 6, 3}[39m[0m[1m)[22m
    [90m   @[39m [36mNNlib[39m [90m~/.julia/packages/NNlib/5iRSB/src/impl/[39m[90m[4mconv_im2col.jl:23[24m[39m
     [5] [0m[1m(::NNlib.var"#305#309"{Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, NNlib.DenseConvDims{3, 3, 3, 6, 3}, SubArray{Float32, 5, MtlArray{Float32, 5, Metal.MTL.MTLResourceStorageModePrivate}, Tuple{Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}}, false}, MtlArray{Float32, 5, Metal.MTL.MTLResourceStorageModePrivate}, SubArray{Float32, 5, MtlArray{Float32, 5, Metal.MTL.MTLResourceStorageModePrivate}, Tuple{Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}}, false}})[22m[0m[1m([22m[0m[1m)[22m
    [90m   @[39m [36mNNlib[39m [90m./[39m[90m[4mthreadingconstructs.jl:404[24m[39m

In [None]:
# models = [build_mlp_model(rng), build_cnn_model(rng), build_cnn2_model(rng)]
models2 = [build_cnn_model(rng), build_cnn2_model(rng)]

In [None]:
# models = models .|> Flux.gpu;
models2 = models2 .|> Flux.gpu;

- 

In [None]:
# models
models2

In [None]:
# epochs 설정
epochs = 10;

In [None]:
# model = build_cnn_model(rng) |> Flux.gpu;

In [None]:
Flux.GPU_BACKEND

In [None]:
train_losses, accuracies = run_batch(get_data(), models2, epochs);
# train_losses, accuracies = run_batch(get_data(), models, epochs);

In [None]:
label = ["mlp", "cnn", "cnn2"];
title = "Fashion Item Accuracy";
Plots.plot(1:epochs, accuracies, label = label, title = title)

### 숫자 손글씨 분류

- 앞의 get_data 함수의 MLDatasets.FashionMNIST를 MLDatasets.MNIST로 바꾸기만 하면 패션 아이템 이미지 대신 숫자 손글씨 이미지에 대해서도 분류할 수 있다.

In [None]:
# title = "Handwritten Digit Accuaracy";
# Plots.plot(1:epochs, accuracies, label = label, title = title)

## 17.2 전이학습

In [38]:
using GPUArrays

methods(GPUArrays.device)

In [None]:
Metal.MTL.MTLResourceStorageModePrivate

In [50]:
ENV["METAL_CAPTURE_ENABLED"] = 1

1

In [51]:
using Metal