# Connect to PostgreSQL from Jupyter (R)

This notebook uses the R kernel to connect to a PostgreSQL database using environment variables stored in a `.env` file. It is beginner-friendly and ready to run on macOS when Jupyter is running on the same machine as PostgreSQL.

What this notebook does:
- Shows a safe way to load a `.env` file (without printing secrets)
- Connects to PostgreSQL using DBI + RPostgres
- Runs a sample query and shows the results

Notes before running:
- Create a `.env` file next to this notebook (example below).
- Make sure the R kernel (IRkernel) is installed and selected.
- On macOS you may need to have libpq available (Homebrew `postgresql` provides it).


## Example .env file

Create a file named `.env` in the same folder as this notebook with these contents (replace values):

```
PGHOST=localhost
PGPORT=5432
PGUSER=myuser
PGPASSWORD=mysecretpassword
PGDATABASE=mydb
```

Do NOT commit `.env` to version control. Add `.env` to your `.gitignore`.


In [None]:
# Install required packages if they are missing (runs in R)
if (!requireNamespace("DBI", quietly = TRUE)) install.packages("DBI", repos = "https://cloud.r-project.org")
if (!requireNamespace("RPostgres", quietly = TRUE)) install.packages("RPostgres", repos = "https://cloud.r-project.org")
if (!requireNamespace("dplyr", quietly = TRUE)) install.packages("dplyr", repos = "https://cloud.r-project.org")
# dotenv is optional; we'll fallback to a small loader if it's not available
if (!requireNamespace("dotenv", quietly = TRUE)) {
  message("Package 'dotenv' not installed; falling back to a built-in .env loader (that's fine).")
} else {
  message("Package 'dotenv' is available; we'll prefer it to load .env if present.")
}


In [None]:
# Load libraries
library(DBI)
library(RPostgres)
library(dplyr)

# Helper: small .env parser (works without extra packages)
load_env_file <- function(file = ".env") {
  if (!file.exists(file)) {
    message("No .env file found at: ", file)
    return(invisible(FALSE))
  }
  lines <- readLines(file, warn = FALSE)
  lines <- trimws(lines)
  lines <- lines[lines != "" & !grepl("^#", lines)]
  for (line in lines) {
    parts <- strsplit(line, "=", fixed = TRUE)[[1]]
    key <- parts[1]
    val <- paste(parts[-1], collapse = "=")
    # remove surrounding quotes if present
    val <- gsub('^"|"$|^'"'|'"'$|^'|'$', '', val)
    # set the environment variable using do.call to use dynamic name
    do.call(Sys.setenv, setNames(list(val), key))
  }
  invisible(TRUE)
}

# Try to load .env: prefer dotenv if available, otherwise use the helper above
env_loaded <- FALSE
if (requireNamespace("dotenv", quietly = TRUE)) {
  try({
    dotenv::load_dot_env()
    env_loaded <- TRUE
  }, silent = TRUE)
}
if (!env_loaded) {
  load_env_file(".env")
}

# Read values from environment
pg_host <- Sys.getenv("PGHOST", unset = "localhost")
pg_port <- as.integer(Sys.getenv("PGPORT", unset = "5432"))
pg_user <- Sys.getenv("PGUSER", unset = "")
pg_password <- Sys.getenv("PGPASSWORD", unset = "")
pg_db <- Sys.getenv("PGDATABASE", unset = "")

# Print non-secret connection info for confirmation
cat("Host:", pg_host, "\n")
cat("Port:", pg_port, "\n")
cat("Database:", pg_db, "\n")
cat("User:", pg_user, "\n")
if (nzchar(pg_password)) cat("Password: (loaded, not printed)\n") else cat("Password: (not set)\n")


In [None]:
# Basic validation: stop with a helpful message if required vars are missing
required <- c("PGUSER", "PGPASSWORD", "PGDATABASE")
missing <- required[!nzchar(Sys.getenv(required))]
if (length(missing) > 0) {
  stop("Missing required environment variables: ", paste(missing, collapse = ", "),
       "\n\nPlease create a .env file with these values next to this notebook, or set the env vars in your shell.")
}


In [None]:
# Connect to PostgreSQL
con <- dbConnect(
  RPostgres::Postgres(),
  host = pg_host,
  port = pg_port,
  dbname = pg_db,
  user = pg_user,
  password = pg_password
)

cat("Connection established.\n")

# List tables (shows first few if many)
tables <- dbListTables(con)
cat("Tables (first 20):\n")
print(head(tables, 20))


In [None]:
# Example: read a small sample from a table named 'my_table'
# Replace 'my_table' with a real table name in your database.
sample_table_name <- "my_table"  # <-- change this to your table

if (sample_table_name %in% tables) {
  df <- dbGetQuery(con, sprintf("SELECT * FROM %s LIMIT 10;", DBI::SQL(sample_table_name)))
  print(df)
} else {
  cat("Table '", sample_table_name, "' not found. Pick a table from the list above.\n", sep = "")
}


In [None]:
# You can also use dplyr syntax connected to the database
if (sample_table_name %in% tables) {
  tbl(con, sample_table_name) %>%
    head(10) %>%
    collect() %>%
    print()
} else {
  cat("Skipping dplyr example because table not found.\n")
}


In [None]:
# Clean up: disconnect when you're done
dbDisconnect(con)
cat("Disconnected.\n")


## Troubleshooting tips

- If you get an error installing RPostgres, you may need libpq on macOS. Install via Homebrew:
  ```bash
  brew install libpq
  # You may also need to add libpq to your PATH or set PKG_CONFIG_PATH before installing RPostgres
  ```
- If you see authentication or connection refused errors:
  - Verify PostgreSQL is running (Postgres.app or `brew services start postgresql`).
  - Verify `.env` values (host, port, user, password, database).
  - Try `psql -h localhost -U myuser -d mydb -p 5432` from Terminal to confirm connectivity.
- If the notebook kernel isn't R, install IRkernel and select the R kernel:
  ```r
  install.packages('IRkernel')
  IRkernel::installspec(user = TRUE)
  ```

If you want, tell me the exact error you get and I'll help debug it.
