<a href="https://colab.research.google.com/github/carlogalli/colab-gpu/blob/main/starter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installation

In [1]:
# Installation cell
%%capture
%%shell
if ! command -v julia 3>&1 > /dev/null
then
    wget -q 'https://julialang-s3.julialang.org/bin/linux/x64/1.7/julia-1.7.2-linux-x86_64.tar.gz' \
        -O /tmp/julia.tar.gz
    tar -x -f /tmp/julia.tar.gz -C /usr/local --strip-components 1
    rm /tmp/julia.tar.gz
fi
julia -e 'using Pkg; pkg"add IJulia; precompile;"'
echo 'Done'

After you run the first cell (the cell directly above this text), go to Colab's menu bar and select **Edit** and select **Notebook settings** from the drop down. Select *Julia 1.7* in Runtime type. You can also select your prefered harwdware acceleration (defaults to GPU).

In [1]:
VERSION   # check and print Julia version

v"1.7.2"

In [2]:
using Pkg
Pkg.add(["CUDA", "Random", "Distributions", "Printf", "PyPlot", "PrettyTables", "Adapt", "DataFrames", "CSV", "Interpolations"]);
ENV["JULIA_CUDA_USE_BINARYBUILDER"] = false
using Random, Distributions, CUDA, Printf, PyPlot, PrettyTables, Adapt, DataFrames, CSV, Interpolations;

function print_gpu_properties()

    for (i,device) in enumerate(CUDA.devices())
        println("*** General properties for device $i ***")
        name = CUDA.name(device)
        println("Device name: $name")
        major = CUDA.attribute(device, CUDA.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR)
        minor = CUDA.attribute(device, CUDA.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR)
        println("Compute capabilities: $major.$minor")
        clock_rate = CUDA.attribute(device, CUDA.CU_DEVICE_ATTRIBUTE_CLOCK_RATE)
        println("Clock rate: $clock_rate")
        device_overlap = CUDA.attribute(device, CUDA.CU_DEVICE_ATTRIBUTE_GPU_OVERLAP)
        print("Device copy overlap: ")
        println(device_overlap > 0 ? "enabled" : "disabled")
        kernel_exec_timeout = CUDA.attribute(device, CUDA.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT)
        print("Kernel execution timeout: ")
        println(kernel_exec_timeout > 0 ? "enabled" : "disabled")
        # a = CUDA.attribute(device, CUDA.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X)
        # d = CUDA.attribute(device, CUDA.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X)
        a = CUDA.attribute(device, CUDA.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT)
        println("Number of multiprocessors: $a")
        b = CUDA.attribute(device, CUDA.CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR)
        println("Max blocks per MP: $b")
        c = CUDA.attribute(device, CUDA.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK)
        println("Max threads per block: $c")

        println([a b c a*b*c])
    end
end
print_gpu_properties()
# with the falseENV option it takes 117.357304 seconds (35.94 M allocations: 2.301 GiB, 1.29% gc time, 11.71% compilation time)
# without the falseENV option it takes  124.465413 seconds (39.74 M allocations: 2.537 GiB, 1.27% gc time, 12.65% compilation time)


#Pkg.add("CpuId")
#using CpuId
#cpuinfo()

[32m[1m    Updating[22m[39m registry at `~/.julia/registries/General.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m   Installed[22m[39m GPUArraysCore ─────────────── v0.1.5
[32m[1m   Installed[22m[39m IrrationalConstants ───────── v0.2.2
[32m[1m   Installed[22m[39m Scratch ───────────────────── v1.2.0
[32m[1m   Installed[22m[39m Adapt ─────────────────────── v3.6.2
[32m[1m   Installed[22m[39m ColorTypes ────────────────── v0.11.4
[32m[1m   Installed[22m[39m OffsetArrays ──────────────── v1.12.10
[32m[1m   Installed[22m[39m DualNumbers ───────────────── v0.6.8
[32m[1m   Installed[22m[39m Rmath ─────────────────────── v0.7.1
[32m[1m   Installed[22m[39m CUDA_Driver_jll ───────────── v0.5.0+1
[32m[1m   Installed[22m[39m TableTraits ───────────────── v1.0.1
[32m[1m   Installed[22m[39m StatsFuns ─────────────────── v1.3.0
[32m[1m   Installed[22m[39m HypergeometricFunctions ───── v0.3.23
[32m[1m   Installed[22m[39m Cal

*** General properties for device 1 ***
Device name: Tesla T4
Compute capabilities: 7.5
Clock rate: 1590000
Device copy overlap: enabled
Kernel execution timeout: disabled
Number of multiprocessors: 40
Max blocks per MP: 16
Max threads per block: 1024
Int32[40 16 1024 655360]


[92m+ DataAPI v1.15.0[39m
 [90m [a93c6f00] [39m[92m+ DataFrames v1.6.1[39m
 [90m [864edb3b] [39m[92m+ DataStructures v0.18.15[39m
 [90m [e2d170a0] [39m[92m+ DataValueInterfaces v1.0.0[39m
 [90m [b429d917] [39m[92m+ DensityInterface v0.4.0[39m
 [90m [31c24e10] [39m[92m+ Distributions v0.25.99[39m
 [90m [ffbed154] [39m[92m+ DocStringExtensions v0.9.3[39m
 [90m [fa6b7ba4] [39m[92m+ DualNumbers v0.6.8[39m
 [90m [e2ba6199] [39m[92m+ ExprTools v0.1.10[39m
 [90m [48062228] [39m[92m+ FilePathsBase v0.9.20[39m
 [90m [1a297f60] [39m[92m+ FillArrays v1.5.0[39m
 [90m [53c48c17] [39m[92m+ FixedPointNumbers v0.8.4[39m
 [90m [0c68f7d7] [39m[92m+ GPUArrays v8.8.1[39m
 [90m [46192b85] [39m[92m+ GPUArraysCore v0.1.5[39m
 [90m [61eb1bfa] [39m[92m+ GPUCompiler v0.21.4[39m
 [90m [34004b35] [39m[92m+ HypergeometricFunctions v0.3.23[39m
 [90m [842dd82b] [39m[92m+ InlineStrings v1.4.0[39m
 [90m [a98d9a8b] [39m[92m+ Interpolations v0.14.7[39