# Install cuplyr on Google Colab (Native R)

This notebook installs **cuplyr** for native use in Colab's R runtime.

## Before running:
1. **Runtime -> Change runtime type**
2. Set **Runtime type = R**
3. Set **Hardware accelerator = T4 GPU** (or any available GPU)
4. Click **Save**

After installation, you can use cuplyr directly in any R cell!

## Step 1: Verify GPU and CUDA

In [None]:
cat('R version:', R.version.string, '\n\n')

# Check GPU
gpu_check <- system('nvidia-smi -L', intern = TRUE)
if (length(gpu_check) == 0 || grepl('error|fail', gpu_check[1], ignore.case = TRUE)) {
  stop('No GPU detected! Go to Runtime -> Change runtime type -> GPU')
}
cat('GPU:', gpu_check[1], '\n\n')

# Check CUDA
cuda_paths <- c('/usr/local/cuda', '/usr/local/cuda-12.4', '/usr/local/cuda-12')
cuda_home <- NULL
for (p in cuda_paths) {
  if (file.exists(file.path(p, 'include', 'cuda.h'))) {
    cuda_home <- p
    break
  }
}

if (is.null(cuda_home)) {
  stop('CUDA not found! Expected at /usr/local/cuda')
}

cat('CUDA found at:', cuda_home, '\n')
system(sprintf('%s/bin/nvcc --version | grep release', cuda_home))

## Step 2: Install RAPIDS libraries

In [None]:
# Install miniforge if needed
miniforge_dir <- '/opt/miniforge'
mamba <- file.path(miniforge_dir, 'bin', 'mamba')

if (!file.exists(mamba)) {
  cat('Installing Miniforge (fast conda)...\n')
  system('wget -q https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh -O /tmp/miniforge.sh', intern = FALSE)
  system(sprintf('bash /tmp/miniforge.sh -b -p %s 2>&1', miniforge_dir), intern = FALSE)
}

if (!file.exists(mamba)) stop('Miniforge installation failed')
cat('Mamba ready at:', mamba, '\n')

In [None]:
# Install RAPIDS with development headers needed for C++ builds
rapids_env <- '/opt/rapids'
mamba <- '/opt/miniforge/bin/mamba'

# RMM layout differs by version; accept either header location
legacy_rmm_header <- file.path(rapids_env, 'include', 'rmm', 'mr', 'per_device_resource.hpp')
device_rmm_header <- file.path(rapids_env, 'include', 'rmm', 'mr', 'device', 'per_device_resource.hpp')
nested_join_header <- file.path(rapids_env, 'include', 'cudf', 'join', 'join.hpp')
flat_join_header <- file.path(rapids_env, 'include', 'cudf', 'join.hpp')
rmm_header_candidates <- c(legacy_rmm_header, device_rmm_header)
join_header_candidates <- c(nested_join_header, flat_join_header)

if (!any(file.exists(rmm_header_candidates))) {
  cat('Installing RAPIDS libraries with development headers...\n')
  cat('This takes 3-5 minutes on first run.\n\n')
  
  # Remove old incomplete install
  if (dir.exists(rapids_env)) {
    system(sprintf('rm -rf %s', rapids_env))
  }
  
  # Install C++ RAPIDS libs (not the Python rmm package)
  cmd <- sprintf(
    '%s create -y -p %s -c rapidsai -c conda-forge -c nvidia libcudf=25.02 librmm=25.02 libkvikio=25.02 spdlog fmt 2>&1',
    mamba, rapids_env
  )
  
  cat('Running:', cmd, '\n\n')
  result <- system(cmd, intern = FALSE)
  if (result != 0) stop('RAPIDS installation failed')
}

# CRITICAL: Disable CUDA stubs that ship with RAPIDS
# These stubs allow compilation but return 0 devices at runtime
cat('Disabling RAPIDS CUDA stubs (if present)...\n')
stub_locations <- c(
  file.path(rapids_env, 'lib', 'stubs'),
  file.path(rapids_env, 'lib')
)
for (loc in stub_locations) {
  for (stub_name in c('libcuda.so', 'libcuda.so.1')) {
    stub_path <- file.path(loc, stub_name)
    disabled_path <- paste0(stub_path, '.disabled')
    if (file.exists(stub_path) && !file.exists(disabled_path)) {
      # Check if it's a stub (real driver is much larger)
      info <- file.info(stub_path)
      if (!is.na(info$size) && info$size < 1000000) {  # Stubs are small, real driver is ~30MB+
        ok <- tryCatch(file.rename(stub_path, disabled_path), error = function(e) FALSE)
        if (isTRUE(ok)) {
          cat('  Disabled stub:', stub_path, '\n')
        }
      }
    }
  }
}

# If only the new nested header exists, create a compat symlink for legacy includes
if (!file.exists(legacy_rmm_header) && file.exists(device_rmm_header)) {
  cat('Creating compatibility symlink for rmm/mr/per_device_resource.hpp...\n')
  suppressWarnings(file.symlink(device_rmm_header, legacy_rmm_header))
}

# Verify all required headers exist
required_headers <- c(
  file.path(rapids_env, 'include', 'cudf', 'types.hpp'),
  file.path(rapids_env, 'include', 'rmm', 'device_buffer.hpp')
)

for (h in required_headers) {
  if (!file.exists(h)) {
    cat('Missing header:', h, '\n')
    stop('Required header not found: ', basename(h))
  }
}

if (!any(file.exists(rmm_header_candidates))) {
  cat('Missing one of:\n')
  cat(paste(' -', rmm_header_candidates), sep = '\n')
  cat('\nAvailable headers in rmm/mr/:\n')
  system(sprintf('ls -la %s/include/rmm/mr/ 2>/dev/null | head -20', rapids_env))
  stop('Required header not found: per_device_resource.hpp')
}

if (!any(file.exists(join_header_candidates))) {
  cat('Missing one of:\n')
  cat(paste(' -', join_header_candidates), sep = '\n')
  cat('\nAvailable headers in cudf/join*:\n')
  system(sprintf('ls -la %s/include/cudf/join* 2>/dev/null', rapids_env))
  stop('Required header not found: cudf join header')
}

cat('\nRAPIDS installed successfully!\n')
cat('Headers:', file.path(rapids_env, 'include'), '\n')
cat('Libraries:', file.path(rapids_env, 'lib'), '\n')

## Step 3: Clone and build cuplyr

In [None]:
# Clone cuplyr
repo_dir <- '/content/cuplyr'

if (dir.exists(repo_dir)) {
  unlink(repo_dir, recursive = TRUE)
}

result <- system(sprintf('git clone --depth 1 https://github.com/bbtheo/cuplyr.git %s 2>&1', repo_dir), intern = FALSE)
if (result != 0) stop('Git clone failed')

cat('Cloned cuplyr\n')
system(sprintf('cd %s && git log -1 --oneline', repo_dir))

In [None]:
# Patch source for RAPIDS header/API compatibility (Colab images vary)
repo_dir <- '/content/cuplyr'
join_cpp <- file.path(repo_dir, 'src', 'ops_join.cpp')
src <- readLines(join_cpp, warn = FALSE)

# 1) Join header path compatibility: cudf/join/join.hpp vs cudf/join.hpp
if (!any(grepl('__has_include(<cudf/join/join.hpp>)', src, fixed = TRUE))) {
  include_idx <- grep('^#include <cudf/join/join.hpp>$', src)
  if (length(include_idx) == 1) {
    replacement <- c(
      '#if __has_include(<cudf/join/join.hpp>)',
      '#include <cudf/join/join.hpp>',
      '#elif __has_include(<cudf/join.hpp>)',
      '#include <cudf/join.hpp>',
      '#else',
      '#error "cuDF join headers not found (expected cudf/join/join.hpp or cudf/join.hpp)"',
      '#endif'
    )
    before <- if (include_idx > 1) src[seq_len(include_idx - 1)] else character(0)
    after <- if (include_idx < length(src)) src[(include_idx + 1):length(src)] else character(0)
    src <- c(before, replacement, after)
  }
}

# 2) Join no-match sentinel compatibility across cuDF versions
src <- gsub('if (v == cudf::JoinNoMatch) v = nrows;', 'if (v < 0) v = nrows;', src, fixed = TRUE)

writeLines(src, join_cpp)
cat('Patched:', join_cpp, '\n')
system(sprintf("grep -n 'cudf/join\\|JoinNoMatch\\|v < 0' %s | head -20", join_cpp))

In [None]:
# Configure cuplyr
repo_dir <- '/content/cuplyr'
rapids_env <- '/opt/rapids'

# Find system CUDA
cuda_paths <- c('/usr/local/cuda', '/usr/local/cuda-12.8', '/usr/local/cuda-12.4', '/usr/local/cuda-12')
cuda_home <- NULL
for (p in cuda_paths) {
  if (file.exists(file.path(p, 'include', 'cuda.h'))) {
    cuda_home <- p
    break
  }
}
stopifnot('CUDA not found' = !is.null(cuda_home))

# Detect real CUDA driver library (avoid compat/stubs)
find_driver_lib <- function() {
  # Known Colab locations for real driver
  preferred <- c(
    '/usr/lib64-nvidia',
    '/usr/lib/x86_64-linux-gnu'
  )
  for (p in preferred) {
    if (file.exists(file.path(p, 'libcuda.so.1'))) {
      return(p)
    }
  }
  
  # Try ldconfig
  ld_lines <- system('ldconfig -p 2>/dev/null', intern = TRUE)
  hits <- grep('libcuda.so.1', ld_lines, value = TRUE)
  hits <- sub('.* => ', '', hits)
  hits <- hits[nzchar(hits) & file.exists(hits)]
  hits <- hits[!grepl('/compat|/stubs|/opt/rapids', hits)]
  if (length(hits) > 0) return(dirname(hits[1]))
  
  # Fallback search
  hits <- system("find /usr -name 'libcuda.so.1' 2>/dev/null | grep -v -E '/compat|/stubs|/opt' | head -1", intern = TRUE)
  if (length(hits) > 0 && nzchar(hits[1])) return(dirname(hits[1]))
  
  NULL
}

driver_lib <- find_driver_lib()
if (is.null(driver_lib)) {
  stop('Could not find real libcuda.so.1. Ensure Colab GPU runtime is attached.')
}

# Set environment variables for configure
# CRITICAL: driver_lib must come FIRST in LD_LIBRARY_PATH to override any stubs
current_ld <- Sys.getenv('LD_LIBRARY_PATH')
ld_parts <- c(
  driver_lib,                           # Real NVIDIA driver FIRST
  file.path(cuda_home, 'lib64'),        # CUDA runtime
  file.path(rapids_env, 'lib'),         # RAPIDS libraries
  strsplit(current_ld, ':', fixed = TRUE)[[1]]
)
ld_parts <- unique(ld_parts[nzchar(ld_parts)])

Sys.setenv(
  CUDA_HOME = cuda_home,
  CONDA_PREFIX = rapids_env,
  LD_LIBRARY_PATH = paste(ld_parts, collapse = ':')
)

cat('Environment:\n')
cat('  CUDA_HOME:', Sys.getenv('CUDA_HOME'), '\n')
cat('  CONDA_PREFIX:', Sys.getenv('CONDA_PREFIX'), '\n')
cat('  CUDA_DRIVER_LIB:', driver_lib, '\n')
cat('  Driver file:', file.path(driver_lib, 'libcuda.so.1'), '\n')

# Verify driver is real (not a stub)
driver_size <- file.info(file.path(driver_lib, 'libcuda.so.1'))$size
if (!is.na(driver_size) && driver_size < 1000000) {
  stop('Driver at ', driver_lib, ' appears to be a stub (', driver_size, ' bytes). Real driver should be 30MB+')
}
cat('  Driver size:', round(driver_size / 1e6, 1), 'MB (verified real)\n')

# Set up C++20 for R
r_makevars <- path.expand('~/.R/Makevars')
dir.create(dirname(r_makevars), showWarnings = FALSE, recursive = TRUE)
writeLines(c(
  'CXX20=g++',
  'CXX20STD=-std=gnu++20',
  'CXX20FLAGS=-O2 -fPIC'
), r_makevars)
cat('  CXX20 configured\n\n')

# Run configure
cat('Running ./configure...\n\n')
old_wd <- setwd(repo_dir)
result <- system('./configure 2>&1', intern = TRUE)
cat(result, sep = '\n')

# Check if configure succeeded
if (!file.exists('src/Makevars')) {
  setwd(old_wd)
  stop('Configure failed - src/Makevars not created')
}

# Patch Makevars:
# 1. Add preprocessor defines for RMM compatibility
# 2. Use --enable-new-dtags so RUNPATH is used (can be overridden by LD_LIBRARY_PATH)
# 3. Put driver_lib FIRST in rpath so real driver is found before any conda stubs
makevars_path <- file.path(repo_dir, 'src', 'Makevars')
mk <- readLines(makevars_path, warn = FALSE)

# Add RMM compatibility flags if not present
if (!any(grepl('DRMM_ENABLE_LEGACY', mk))) {
  mk <- sub(
    '^PKG_CPPFLAGS=(.*)$',
    'PKG_CPPFLAGS=-DRMM_ENABLE_LEGACY_MR_INTERFACE -DLIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE \\1',
    mk
  )
}

# Fix PKG_LIBS: use RUNPATH (--enable-new-dtags) and put driver_lib FIRST
# Note: We don't directly link libcuda.so - cudart loads it dynamically
mk <- sub(
  '^PKG_LIBS=(.*)$',
  sprintf(
    'PKG_LIBS=-Wl,--enable-new-dtags -Wl,-rpath,%s -L$(CUDF_LIB) -Wl,-rpath,$(CUDF_LIB) -Wl,-rpath,$(RAPIDS_RPATHS) -lcudf -L$(CUDA_HOME)/lib64 -Wl,-rpath,$(CUDA_HOME)/lib64 -lcudart',
    driver_lib
  ),
  mk
)

writeLines(mk, makevars_path)
cat('\nPatched src/Makevars for Colab runtime linking.\n')
cat('PKG_CPPFLAGS and PKG_LIBS:\n')
cat(grep('^PKG_(CPPFLAGS|LIBS)=', mk, value = TRUE), sep = '\n')

cat('\nConfigure successful!\n')

In [None]:
# Build and install cuplyr
repo_dir <- '/content/cuplyr'
setwd(repo_dir)

# Regenerate Rcpp exports
cat('Regenerating Rcpp exports...\n')
system("Rscript -e \"Rcpp::compileAttributes('.')\" 2>&1", intern = FALSE)

# Remove existing installation
if ('cuplyr' %in% rownames(installed.packages())) {
  cat('Removing existing cuplyr install...\n')
  try(remove.packages('cuplyr'), silent = TRUE)
}

# Build and install
cat('\nBuilding cuplyr (1-2 minutes)...\n\n')
result <- system('R CMD INSTALL --preclean . 2>&1', intern = TRUE)
status <- attr(result, 'status')
if (is.null(status)) status <- 0L

# Check for errors
has_error <- any(grepl('error:|Error:|ERROR:', result, ignore.case = TRUE))
if (status != 0L || has_error) {
  cat('Build FAILED. Output:\n\n')
  cat(result, sep = '\n')
  setwd('/content')
  stop('Compilation failed')
}

# Show last few lines
cat(tail(result, 15), sep = '\n')

setwd('/content')

# Verify installation
if (!('cuplyr' %in% rownames(installed.packages()))) {
  stop('cuplyr not found after install')
}

cat('\ncuplyr installed successfully!\n')
cat('Run the next cells to configure paths and test.\n')

## Step 4: Configure library paths

**Run this cell once per session** to set up library paths.

In [None]:
# Set library paths - RUN THIS ONCE PER SESSION
# This ensures the real NVIDIA driver is found before any RAPIDS stubs

rapids_lib <- '/opt/rapids/lib'
cuda_lib <- '/usr/local/cuda/lib64'

# Find real NVIDIA driver
find_driver_lib <- function() {
  preferred <- c('/usr/lib64-nvidia', '/usr/lib/x86_64-linux-gnu')
  for (p in preferred) {
    if (file.exists(file.path(p, 'libcuda.so.1'))) return(p)
  }
  
  ld_lines <- system('ldconfig -p 2>/dev/null', intern = TRUE)
  hits <- grep('libcuda.so.1', ld_lines, value = TRUE)
  hits <- sub('.* => ', '', hits)
  hits <- hits[nzchar(hits) & file.exists(hits)]
  hits <- hits[!grepl('/compat|/stubs|/opt/rapids', hits)]
  if (length(hits) > 0) return(dirname(hits[1]))
  
  NULL
}

driver_lib <- find_driver_lib()
if (is.null(driver_lib)) {
  stop('Could not find real libcuda.so.1. Ensure Colab GPU runtime is attached.')
}

# Verify it's the real driver (not a stub)
driver_size <- file.info(file.path(driver_lib, 'libcuda.so.1'))$size
if (!is.na(driver_size) && driver_size < 1000000) {
  stop('Driver appears to be a stub. Check Colab GPU runtime.')
}

# Set LD_LIBRARY_PATH with driver_lib FIRST
current <- Sys.getenv('LD_LIBRARY_PATH')
parts <- c(driver_lib, cuda_lib, rapids_lib, strsplit(current, ':', fixed = TRUE)[[1]])
parts <- unique(parts[nzchar(parts)])
Sys.setenv(LD_LIBRARY_PATH = paste(parts, collapse = ':'))

# Unload cuplyr if already loaded so it picks up new paths
if ('package:cuplyr' %in% search()) {
  try(detach('package:cuplyr', unload = TRUE, character.only = TRUE), silent = TRUE)
}
if ('cuplyr' %in% loadedNamespaces()) {
  try(unloadNamespace('cuplyr'), silent = TRUE)
}

cat('Library paths configured\n')
cat('Driver lib:', driver_lib, '\n')
cat('Now run: library(cuplyr)\n')

## Step 5: Test cuplyr

Now use cuplyr directly in R cells!

In [None]:
# Test cuplyr
library(cuplyr)

cat('cuplyr version:', as.character(packageVersion('cuplyr')), '\n\n')

info <- gpu_details()
if (isTRUE(info$available)) {
  cat('GPU:', info$name, '\n')
  cat('Memory:', round(info$total_memory / 1e9, 1), 'GB\n')
  cat('Compute Capability:', info$compute_capability, '\n')
} else {
  cat('GPU NOT DETECTED\n\n')
  cat('This usually means a CUDA stub library was loaded instead of the real driver.\n')
  cat('Run the Troubleshooting cell below for diagnostics.\n')
}

In [None]:
# Basic operations
library(cuplyr)

result <- tbl_gpu(mtcars) |>
  filter(mpg > 20) |>
  mutate(kpl = mpg * 0.425) |>
  select(mpg, kpl, cyl, hp) |>
  arrange(desc(mpg)) |>
  collect()

print(result)

In [None]:
# Group by and summarise
library(cuplyr)

result <- tbl_gpu(mtcars) |>
  group_by(cyl) |>
  summarise(
    count = n(),
    avg_mpg = mean(mpg),
    avg_hp = mean(hp)
  ) |>
  arrange(cyl) |>
  collect()

print(result)

## Troubleshooting

Run this cell if GPU is not detected.

In [None]:
# Troubleshooting diagnostics
cat('=== DIAGNOSTICS ===\n\n')

# 1. Check GPU visibility
cat('1. GPU visibility (nvidia-smi):\n')
system('nvidia-smi -L 2>&1 || echo "nvidia-smi not found"')
cat('\n')

# 2. Check device nodes
cat('2. Device nodes:\n')
system('ls -l /dev/nvidia* 2>&1 || echo "No /dev/nvidia* devices"')
cat('\n')

# 3. Find all libcuda.so files
cat('3. All libcuda.so files on system:\n')
system("find /usr /opt -name 'libcuda.so*' 2>/dev/null | head -20")
cat('\n')

# 4. Check which is being used
cat('4. Library resolution (ldconfig):\n')
system('ldconfig -p 2>/dev/null | grep libcuda.so')
cat('\n')

# 5. Check LD_LIBRARY_PATH
cat('5. LD_LIBRARY_PATH:\n')
ld_path <- Sys.getenv('LD_LIBRARY_PATH')
parts <- strsplit(ld_path, ':')[[1]]
for (i in seq_along(parts)[1:min(10, length(parts))]) {
  cat(' ', i, ': ', parts[i], '\n', sep = '')
}
cat('\n')

# 6. Check cuplyr.so linking
cat('6. cuplyr.so dependencies:\n')
so_path <- system.file('libs', 'cuplyr.so', package = 'cuplyr')
if (nzchar(so_path)) {
  cat('Location:', so_path, '\n\n')
  system(sprintf('ldd %s 2>&1 | grep -E "cuda|cudf|rmm|not found"', shQuote(so_path)))
  cat('\nRUNPATH/RPATH:\n')
  system(sprintf("readelf -d %s 2>/dev/null | grep -E 'RPATH|RUNPATH' || echo 'No RPATH/RUNPATH'", shQuote(so_path)))
} else {
  cat('cuplyr not installed\n')
}
cat('\n')

# 7. Check RAPIDS stubs
cat('7. RAPIDS CUDA stubs:\n')
for (stub in c('/opt/rapids/lib/libcuda.so', '/opt/rapids/lib/libcuda.so.1', '/opt/rapids/lib/stubs/libcuda.so')) {
  if (file.exists(stub)) {
    info <- file.info(stub)
    cat(' ', stub, ':', info$size, 'bytes')
    if (info$size < 1000000) cat(' (STUB - should be disabled!)')
    cat('\n')
  } else if (file.exists(paste0(stub, '.disabled'))) {
    cat(' ', stub, '.disabled (good)\n')
  }
}
cat('\n')

# 8. Test GPU detection in subprocess with LD_PRELOAD
cat('8. GPU detection with LD_PRELOAD:\n')
driver_paths <- c('/usr/lib64-nvidia/libcuda.so.1', '/usr/lib/x86_64-linux-gnu/libcuda.so.1')
driver_so <- driver_paths[file.exists(driver_paths)][1]
if (!is.na(driver_so)) {
  cmd <- sprintf(
    'LD_PRELOAD=%s Rscript -e "library(cuplyr); cat(gpu_details()$available, \\"\\n\\")" 2>&1',
    shQuote(driver_so)
  )
  cat('Command:', cmd, '\n')
  result <- system(cmd, intern = TRUE)
  cat('Result:', paste(result, collapse = '\n'), '\n')
  
  if (any(grepl('TRUE', result))) {
    cat('\n*** GPU works with LD_PRELOAD! ***\n')
    cat('Fix: Re-run cells 5 (disable stubs), 9 (configure), and 10 (build) in order.\n')
  }
} else {
  cat('Could not find real driver to test\n')
}