# Install cuplyr on Google Colab

GPU-accelerated dplyr for R.

## Setup
1. **Runtime → Change runtime type**
2. Set **Runtime type = R**, **Hardware accelerator = T4 GPU**
3. Click **Save**, then **Run all**

## Step 1: Pre-flight

In [None]:
gpu_info <- system2('nvidia-smi', c('--query-gpu=name,driver_version,memory.total',
                                    '--format=csv,noheader'), stdout = TRUE)
if (length(gpu_info) == 0) stop('No GPU. Runtime → Change runtime type → GPU')
cat('GPU:', gpu_info, '\n')

for (p in c('/usr/local/cuda', '/usr/local/cuda-12.8', '/usr/local/cuda-12.4', '/usr/local/cuda-12')) {
  if (file.exists(file.path(p, 'include', 'cuda.h'))) { Sys.setenv(CUDA_HOME = p); break }
}
cat('CUDA:', Sys.getenv('CUDA_HOME'), '\n')

## Step 2: Install mamba

In [None]:
miniforge_dir <- '/opt/miniforge'
mamba <- file.path(miniforge_dir, 'bin', 'mamba')

if (!file.exists(mamba)) {
  cat('Installing Miniforge...\n')
  download.file(
    'https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh',
    '/tmp/miniforge.sh', quiet = TRUE
  )
  system2('bash', c('/tmp/miniforge.sh', '-b', '-p', miniforge_dir),
          stdout = FALSE, stderr = FALSE)
}

stopifnot('mamba not found' = file.exists(mamba))
Sys.setenv(PATH = paste0(file.path(miniforge_dir, 'bin'), ':', Sys.getenv('PATH')))
cat('✓ mamba ready\n')

## Step 3: Install RAPIDS

In [None]:
rapids_dir <- '/opt/rapids'
rapids_ok <- file.exists(file.path(rapids_dir, 'lib', 'libcudf.so')) &&
             file.exists(file.path(rapids_dir, 'include', 'cudf', 'types.hpp'))

if (rapids_ok) {
  cat('✓ RAPIDS already installed\n')
} else {
  cat('Installing RAPIDS (2-3 min)...\n')
  status <- system2('mamba', c(
    'create', '-y', '-p', rapids_dir,
    '-c', 'rapidsai', '-c', 'conda-forge', '-c', 'nvidia',
    'libcudf=25.12', 'librmm=25.12', 'libkvikio=25.12', 'spdlog', 'fmt'
  ), stdout = TRUE, stderr = TRUE)

  # Install dev headers if needed
  if (!file.exists(file.path(rapids_dir, 'include', 'cudf', 'types.hpp'))) {
    system2('mamba', c(
      'install', '-y', '-p', rapids_dir,
      '-c', 'rapidsai', '-c', 'conda-forge', '-c', 'nvidia',
      'libcudf-dev=25.12', 'librmm-dev=25.12', 'libkvikio-dev=25.12'
    ), stdout = TRUE, stderr = TRUE)
  }

  stopifnot('libcudf.so missing' = file.exists(file.path(rapids_dir, 'lib', 'libcudf.so')))
  stopifnot('cudf headers missing' = file.exists(file.path(rapids_dir, 'include', 'cudf', 'types.hpp')))
  cat('✓ RAPIDS installed\n')
}

## Step 4: Fix system libstdc++

RAPIDS needs GLIBCXX_3.4.31 which the Colab system libstdc++ doesn't have. We replace it with the RAPIDS version (fully ABI-compatible, just newer).

In [None]:
sys_lib <- '/usr/lib/x86_64-linux-gnu/libstdc++.so.6'
rapids_lib <- file.path(rapids_dir, 'lib', 'libstdc++.so.6')

# Follow symlink to get the actual file
rapids_real <- Sys.readlink(rapids_lib)
if (!startsWith(rapids_real, '/')) rapids_real <- file.path(dirname(rapids_lib), rapids_real)

# Check if system already has GLIBCXX_3.4.31
sys_versions <- system2('strings', sys_lib, stdout = TRUE)
already_ok <- any(grepl('GLIBCXX_3\\.4\\.31', sys_versions))

if (already_ok) {
  cat('✓ System libstdc++ already has GLIBCXX_3.4.31\n')
} else {
  cat('System libstdc++ is missing GLIBCXX_3.4.31\n')
  cat('Replacing with RAPIDS version (ABI-compatible)...\n')

  # Copy the actual file
  rapids_filename <- basename(rapids_real)
  sys_dir <- dirname(sys_lib)
  dest <- file.path(sys_dir, rapids_filename)

  file.copy(rapids_real, dest, overwrite = TRUE)

  # Update the symlink
  file.remove(sys_lib)
  file.symlink(rapids_filename, sys_lib)

  # Update linker cache
  system2('ldconfig', stdout = FALSE, stderr = FALSE)

  # Verify
  new_versions <- system2('strings', sys_lib, stdout = TRUE)
  stopifnot('Replacement failed' = any(grepl('GLIBCXX_3\\.4\\.31', new_versions)))
  cat('✓ System libstdc++ now has GLIBCXX_3.4.31\n')
}

## Step 5: Disable CUDA stubs

In [None]:
for (stub_name in c('libcuda.so', 'libcuda.so.1')) {
  for (loc in c(file.path(rapids_dir, 'lib', 'stubs'), file.path(rapids_dir, 'lib'))) {
    stub <- file.path(loc, stub_name)
    if (!file.exists(stub)) next
    info <- file.info(stub)
    if (is.na(info$size) || info$size >= 1e6) next  # Real driver is 30MB+
    file.rename(stub, paste0(stub, '.disabled'))
    cat('Disabled stub:', stub, '\n')
  }
}
cat('✓ Stubs handled\n')

## Step 6: Configure environment

In [None]:
# Find real NVIDIA driver
driver_lib <- NULL
for (p in c('/usr/lib64-nvidia', '/usr/lib/x86_64-linux-gnu')) {
  so <- file.path(p, 'libcuda.so.1')
  if (file.exists(so) && file.info(so)$size >= 1e6) {
    driver_lib <- p
    break
  }
}

if (is.null(driver_lib)) {
  # Try ldconfig
  ld_lines <- system2('ldconfig', '-p', stdout = TRUE)
  hit <- grep('libcuda\\.so\\.1\\b.*=>\\s*/', ld_lines, value = TRUE)[1]
  if (!is.na(hit)) {
    path <- sub('.* => ', '', hit)
    path <- trimws(path)
    if (file.exists(path) && file.info(path)$size >= 1e6) {
      driver_lib <- dirname(path)
    }
  }
}

stopifnot('NVIDIA driver not found' = !is.null(driver_lib))
cat('Driver:', driver_lib, '\n')

# Set library paths
cuda_home <- Sys.getenv('CUDA_HOME', '/usr/local/cuda')
lib_path <- paste(file.path(rapids_dir, 'lib'), driver_lib,
                  file.path(cuda_home, 'lib64'), sep = ':')
Sys.setenv(
  CONDA_PREFIX = rapids_dir,
  LD_LIBRARY_PATH = paste0(lib_path, ':', Sys.getenv('LD_LIBRARY_PATH')),
  R_LD_LIBRARY_PATH = paste0(lib_path, ':', Sys.getenv('R_LD_LIBRARY_PATH'))
)

# ldconfig
writeLines(c(file.path(rapids_dir, 'lib'), driver_lib),
           '/etc/ld.so.conf.d/00-cuplyr-rapids.conf')
system2('ldconfig', stdout = FALSE, stderr = FALSE)

# Compiler flags
dir.create(path.expand('~/.R'), showWarnings = FALSE)
writeLines(c('CXX20=g++', 'CXX20STD=-std=gnu++20', 'CXX20FLAGS=-O2 -fPIC'),
           path.expand('~/.R/Makevars'))

cat('✓ Environment configured\n')

## Step 7: Build cuplyr

In [None]:
repo_dir <- '/content/cuplyr'

# Clone if needed
if (!file.exists(file.path(repo_dir, 'DESCRIPTION'))) {
  if (dir.exists(repo_dir)) unlink(repo_dir, recursive = TRUE)
  status <- system2('git', c('clone', '--depth', '1', '-b', 'install',
                             'https://github.com/bbtheo/cuplyr.git', repo_dir),
                    stdout = FALSE, stderr = FALSE)
  stopifnot('Clone failed' = status == 0)
}

cat('Configuring...\n')
old_wd <- getwd()
setwd(repo_dir)

status <- system2('./configure', stdout = TRUE, stderr = TRUE)
conf_status <- attr(status, 'status')
if (!is.null(conf_status) && conf_status != 0) {
  cat(tail(status, 20), sep = '\n')
  stop('./configure failed')
}

# Patch Makevars for cloud RUNPATH
if (file.exists('src/Makevars')) {
  mk <- readLines('src/Makevars', warn = FALSE)
  idx <- grep('^PKG_LIBS=', mk)
  if (length(idx) > 0) {
    existing <- sub('^PKG_LIBS=', '', mk[idx[1]])
    mk[idx[1]] <- sprintf(
      'PKG_LIBS=-Wl,--enable-new-dtags -Wl,-rpath,%s/lib -Wl,-rpath,%s %s',
      rapids_dir, driver_lib, existing
    )
    writeLines(mk, 'src/Makevars')
  }
  # Clean stale objects
  unlink(list.files('src', pattern = '\\.(o|so)$', full.names = TRUE))
}

cat('Building (2-3 min)...\n')
status <- system2('R', c('CMD', 'INSTALL', '.'), stdout = TRUE, stderr = TRUE)
build_status <- attr(status, 'status')
if (!is.null(build_status) && build_status != 0) {
  cat(tail(status, 30), sep = '\n')
  stop('Build failed')
}

setwd(old_wd)
cat('✓ cuplyr built and installed\n')

## Step 8: Verify build

In [None]:
so <- file.path(.libPaths()[1], 'cuplyr', 'libs', 'cuplyr.so')
stopifnot('cuplyr.so not found' = file.exists(so))

# Check RUNPATH
rpath <- system2('readelf', c('-d', so), stdout = TRUE)
rpath_lines <- grep('RPATH|RUNPATH', rpath, value = TRUE)
cat('RUNPATH:', rpath_lines, '\n')

# Check ldd resolution
ldd_out <- system2('ldd', so, stdout = TRUE)
key_libs <- ldd_out[grepl('libstdc\\+\\+|libcudf', ldd_out)]
cat(key_libs, sep = '\n')
cat('\n✓ Build looks good\n')

## Step 9: Load cuplyr

In [None]:
library(cuplyr)
cat('✓ cuplyr loaded\n')

## Try it

In [None]:
tbl_gpu(mtcars) |>
  filter(mpg > 20) |>
  mutate(kpl = mpg * 0.425) |>
  select(mpg, kpl, cyl, hp) |>
  arrange(desc(mpg)) |>
  collect()

In [None]:
tbl_gpu(mtcars) |>
  group_by(cyl) |>
  summarise(count = n(), avg_mpg = mean(mpg), avg_hp = mean(hp)) |>
  arrange(cyl) |>
  collect()

In [None]:
tbl_gpu(mtcars, lazy = TRUE) |>
  filter(mpg > 15) |>
  mutate(power_weight = hp / wt) |>
  group_by(cyl) |>
  summarise(avg_pw = mean(power_weight)) |>
  arrange(desc(avg_pw)) |>
  collect()