# GPU (remote)

Let's start a worker on a gpu node of a supercomputer cluster.

In [None]:
using Distributed
addprocs([("cbauer17@gpu2", 1)]; exename=`/projects/ag-trebst/bauer/bin/julia-1.3.1/bin/julia`, exeflags=`--project=/projects/ag-trebst/bauer/JuliaOulu20/backup/gpu_remote`, dir="/projects/ag-trebst/bauer/JuliaOulu20/backup/gpu_remote", tunnel=true)
@fetch gethostname()

Extract the GPU name:

In [None]:
@fetch @eval using CUDAdrv
@fetch CUDAdrv.name(CuDevice(0))

## Benchmark matmul

In [None]:
@fetch @eval using CuArrays, BenchmarkTools

In [None]:
@fetch begin
    A, B = rand(1000,1000), rand(1000,1000);
    Agpu, Bgpu = CuArray(A), CuArray(B);
    
    println("Move array CPU -> GPU")
    @btime CuArray($A);
    
    println("A*B (cpu)")
    @btime $A * $B;

    println("A*B (gpu)")
    @btime $Agpu * $Bgpu;
    
    
    println("Move array GPU -> CPU")
    Cgpu = Agpu * Bgpu
    @btime Array($Cgpu);
    
    nothing
end

## Machine learning on the GPU

In [None]:
@fetch @eval using Flux

In [None]:
using Flux

In [None]:
m_trained = @fetch begin
    m = Chain(
    Dense(100, 10),
    Dense(10, 5),
    Dense(5, 2),
    softmax # normalize output neurons
    ) |> gpu
    
    data = rand(100, 100) |> gpu # fake data
    labels = fill(0.5, 2, 100) |> gpu # fake data
    
    loss(x, y) = sum(Flux.mse(m(x), y)) # mean squared error
    
    
    
    
    opt = Descent(0.01) # or ADAM
    
    Flux.train!(loss, params(m), [(data,labels)], opt)
    
    m |> cpu
end

In [None]:
m_trained(rand(100,100))