# Install cuplyr on Google Colab

GPU-accelerated dplyr for R.

## Setup
1. **Runtime → Change runtime type**
2. Set **Runtime type = R**, **Hardware accelerator = T4 GPU**
3. Click **Save**, then **Run all**

First run installs everything and auto-restarts the kernel.  
After restart, click **Run all** again — the install is skipped.

In [None]:
# ── Install cuplyr (runs once, then skips) ────────────────────────
if (suppressWarnings(requireNamespace('cuplyr', quietly = TRUE)) &&
    is.function(tryCatch(cuplyr::has_gpu, error = function(e) NULL))) {
  cat('\u2713 cuplyr already installed, skipping\n')
} else {

# GPU + CUDA
gpu_info <- system2('nvidia-smi', c('--query-gpu=name,driver_version,memory.total',
                                    '--format=csv,noheader'), stdout = TRUE)
if (length(gpu_info) == 0) stop('No GPU. Runtime -> Change runtime type -> GPU')
cat('GPU:', gpu_info, '\n')

for (p in c('/usr/local/cuda', '/usr/local/cuda-12.8', '/usr/local/cuda-12.4', '/usr/local/cuda-12'))
  if (file.exists(file.path(p, 'include', 'cuda.h'))) { Sys.setenv(CUDA_HOME = p); break }
cat('CUDA:', Sys.getenv('CUDA_HOME'), '\n\n')

# mamba
miniforge_dir <- '/opt/miniforge'
mamba <- file.path(miniforge_dir, 'bin', 'mamba')
if (!file.exists(mamba)) {
  cat('Installing Miniforge...\n')
  download.file(
    'https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh',
    '/tmp/miniforge.sh', quiet = TRUE)
  system2('bash', c('/tmp/miniforge.sh', '-b', '-p', miniforge_dir),
          stdout = FALSE, stderr = FALSE)
}
stopifnot('mamba not found' = file.exists(mamba))
Sys.setenv(PATH = paste0(file.path(miniforge_dir, 'bin'), ':', Sys.getenv('PATH')))
cat('mamba ready\n')

# RAPIDS
rapids_dir <- '/opt/rapids'
if (!file.exists(file.path(rapids_dir, 'lib', 'libcudf.so'))) {
  cat('Installing RAPIDS (2-3 min)...\n')
  system2('mamba', c('create', '-y', '-p', rapids_dir,
    '-c', 'rapidsai', '-c', 'conda-forge', '-c', 'nvidia',
    'libcudf=25.12', 'librmm=25.12', 'libkvikio=25.12', 'spdlog', 'fmt'),
    stdout = TRUE, stderr = TRUE)
  if (!file.exists(file.path(rapids_dir, 'include', 'cudf', 'types.hpp')))
    system2('mamba', c('install', '-y', '-p', rapids_dir,
      '-c', 'rapidsai', '-c', 'conda-forge', '-c', 'nvidia',
      'libcudf-dev=25.12', 'librmm-dev=25.12', 'libkvikio-dev=25.12'),
      stdout = TRUE, stderr = TRUE)
}
stopifnot('RAPIDS missing' = file.exists(file.path(rapids_dir, 'lib', 'libcudf.so')))
cat('RAPIDS ready\n')

# Replace system libstdc++ with RAPIDS version (ABI-compatible, just newer)
sys_lib <- '/usr/lib/x86_64-linux-gnu/libstdc++.so.6'
sys_versions <- system2('strings', sys_lib, stdout = TRUE)
if (!any(grepl('GLIBCXX_3\\.4\\.31', sys_versions))) {
  rapids_lib <- file.path(rapids_dir, 'lib', 'libstdc++.so.6')
  rapids_real <- Sys.readlink(rapids_lib)
  if (!startsWith(rapids_real, '/')) rapids_real <- file.path(dirname(rapids_lib), rapids_real)
  fname <- basename(rapids_real)
  file.copy(rapids_real, file.path(dirname(sys_lib), fname), overwrite = TRUE)
  file.remove(sys_lib)
  file.symlink(fname, sys_lib)
  system2('ldconfig', stdout = FALSE, stderr = FALSE)
}
cat('libstdc++ ready\n')

# Disable CUDA stubs
for (s in c('libcuda.so', 'libcuda.so.1'))
  for (d in c(file.path(rapids_dir, 'lib', 'stubs'), file.path(rapids_dir, 'lib'))) {
    f <- file.path(d, s)
    if (file.exists(f) && file.info(f)$size < 1e6)
      file.rename(f, paste0(f, '.disabled'))
  }

# Find NVIDIA driver
driver_lib <- NULL
for (p in c('/usr/lib64-nvidia', '/usr/lib/x86_64-linux-gnu'))
  if (file.exists(file.path(p, 'libcuda.so.1')) && file.info(file.path(p, 'libcuda.so.1'))$size >= 1e6)
    { driver_lib <- p; break }
if (is.null(driver_lib)) {
  ld_lines <- system2('ldconfig', '-p', stdout = TRUE)
  hit <- grep('libcuda\\.so\\.1.*=>', ld_lines, value = TRUE)[1]
  if (!is.na(hit)) driver_lib <- dirname(trimws(sub('.* => ', '', hit)))
}
stopifnot('Driver not found' = !is.null(driver_lib))

# Configure environment (.Renviron persists across restart)
cuda_home <- Sys.getenv('CUDA_HOME', '/usr/local/cuda')
lib_path <- paste(file.path(rapids_dir, 'lib'), driver_lib,
                  file.path(cuda_home, 'lib64'), sep = ':')
Sys.setenv(
  CONDA_PREFIX = rapids_dir,
  LD_LIBRARY_PATH = paste0(lib_path, ':', Sys.getenv('LD_LIBRARY_PATH')),
  R_LD_LIBRARY_PATH = paste0(lib_path, ':', Sys.getenv('R_LD_LIBRARY_PATH'))
)

writeLines(c(file.path(rapids_dir, 'lib'), driver_lib),
           '/etc/ld.so.conf.d/00-cuplyr-rapids.conf')
system2('ldconfig', stdout = FALSE, stderr = FALSE)

renviron <- path.expand('~/.Renviron')
old_lines <- if (file.exists(renviron))
  readLines(renviron, warn = FALSE)[!grepl('cuplyr|LD_LIBRARY_PATH=.*rapids', readLines(renviron, warn = FALSE))]
else character(0)
writeLines(c(old_lines, '',
  '# cuplyr',
  sprintf('LD_LIBRARY_PATH="%s:${LD_LIBRARY_PATH}"', lib_path),
  sprintf('R_LD_LIBRARY_PATH="%s:${R_LD_LIBRARY_PATH}"', lib_path)
), renviron)
cat('Environment ready\n')

# Compiler flags
dir.create(path.expand('~/.R'), showWarnings = FALSE)
writeLines(c('CXX20=g++', 'CXX20STD=-std=gnu++20', 'CXX20FLAGS=-O2 -fPIC'),
           path.expand('~/.R/Makevars'))

# Clone and build cuplyr
repo_dir <- '/content/cuplyr'
if (!file.exists(file.path(repo_dir, 'DESCRIPTION'))) {
  if (dir.exists(repo_dir)) unlink(repo_dir, recursive = TRUE)
  system2('git', c('clone', '--depth', '1', '-b', 'install',
                   'https://github.com/bbtheo/cuplyr.git', repo_dir),
          stdout = FALSE, stderr = FALSE)
}

cat('Building cuplyr (2-3 min)...\n')
old_wd <- getwd(); setwd(repo_dir)
system2('./configure', stdout = TRUE, stderr = TRUE)

# Patch Makevars RUNPATH
if (file.exists('src/Makevars')) {
  mk <- readLines('src/Makevars', warn = FALSE)
  idx <- grep('^PKG_LIBS=', mk)
  if (length(idx) > 0) {
    existing_flags <- sub('^PKG_LIBS=', '', mk[idx[1]])
    mk[idx[1]] <- sprintf(
      'PKG_LIBS=-Wl,--enable-new-dtags -Wl,-rpath,%s/lib -Wl,-rpath,%s -Wl,-rpath,%s/lib64 %s',
      rapids_dir, driver_lib, cuda_home, existing_flags)
    writeLines(mk, 'src/Makevars')
  }
  unlink(list.files('src', pattern = '\\.(o|so)$', full.names = TRUE))
}

status <- system2('R', c('CMD', 'INSTALL', '.'), stdout = TRUE, stderr = TRUE)
build_exit <- attr(status, 'status')
if (!is.null(build_exit) && build_exit != 0) {
  cat(tail(status, 30), sep = '\n')
  stop('Build failed')
}
setwd(old_wd)
cat('cuplyr built\n\n')

# Restart R so it picks up the new system libstdc++
cat('Install complete. Restarting kernel...\n')
cat('After restart, click Run all again.\n')
quit(save = 'no')

} # end install

In [None]:
# Ensure environment is set (after restart, .Renviron provides LD_LIBRARY_PATH)
if (dir.exists('/opt/rapids') && !nzchar(Sys.getenv('CONDA_PREFIX')))
  Sys.setenv(CONDA_PREFIX = '/opt/rapids')

library(cuplyr)
cat('GPU available:', has_gpu(), '\n')

In [None]:
tbl_gpu(mtcars) |>
  filter(mpg > 20) |>
  mutate(kpl = mpg * 0.425) |>
  select(mpg, kpl, cyl, hp) |>
  arrange(desc(mpg)) |>
  collect()

In [None]:
tbl_gpu(mtcars) |>
  group_by(cyl) |>
  summarise(count = n(), avg_mpg = mean(mpg), avg_hp = mean(hp)) |>
  arrange(cyl) |>
  collect()

In [None]:
tbl_gpu(mtcars, lazy = TRUE) |>
  filter(mpg > 15) |>
  mutate(power_weight = hp / wt) |>
  group_by(cyl) |>
  summarise(avg_pw = mean(power_weight)) |>
  arrange(desc(avg_pw)) |>
  collect()