In [2]:
uk2gtfs_version <- "f15694a655c508f8caebaf99328b0a2d1bc8dfa5"

if (!require("pacman")) install.packages("pacman")
pacman::p_load(remotes, rnaturalearth, sf)

if (!require(UK2GTFS)) { remotes::install_github("ITSleeds/UK2GTFS", ref = uk2gtfs_version)}

library(UK2GTFS)

Loading required package: pacman
Loading required package: UK2GTFS
Your UK2GTFS data is up to date


In [3]:
out_dir <- "./out"
out_name <- "rail_scot_gtfs"
preview_mode <- TRUE

timetable_zip <- file.path(out_dir, "timetable.zip")
timetable_dir <- file.path(out_dir, "timetable")

dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)

In [4]:
scotland_highres <- rnaturalearth::ne_download(
  scale = 10L,
  type = "map_subunits",
  category = "cultural",
  returnclass = "sf"
) |> subset(SU_A3=="SCT")

scotland <- scotland_highres |>
  sf::st_buffer(1000)

[1m[22mReading ]8;;file:///Users/daniel/projects/munro-access/ne_10m_admin_0_map_subunits.zip[34mne_10m_admin_0_map_subunits.zip[39m]8;; from naturalearth...


In [5]:
if (dir.exists(timetable_dir) && length(list.files(timetable_dir)) > 0) {
  print(paste("Using existing timetable from", timetable_dir))
} else {
  print(paste("Downloading timetable from NRDP to", timetable_zip, "..."))
  nrdp_timetable(timetable_zip)

  print(paste("Unzipping timetable to", timetable_dir, "..."))
  dir.create(timetable_dir)
  unzip(timetable_zip, exdir = timetable_dir)
}

[1] "Using existing timetable from ./out/timetable"


In [6]:
print("atoc2gtfs...")
gtfs_raw <- atoc2gtfs(
  path_in = "out/timetable.zip",
  ncores = 4
)

[1] "atoc2gtfs..."
Adding 69 missing tiplocs, these may have unreliable location data
2025-12-09 23:40:33.428323 Some calendar dates had incorrect start or end dates that did not align with operating day bitmask.
 Services=G37699,C25987,C27192,L36706,L36756,L36896,L36952,L11878,L11845,L11888
2025-12-09 23:40:33.823018 Constructing calendar and calendar_dates


In readLines(con = file, n = -1) :
  incomplete final line found on 'tmp/RJTTF673.MSN'


In [24]:
print("Clipping to Scotland...")
gtfs <- gtfs_clip(gtfs_raw, scotland)

print("Creating shapes...")
gtfs <- ATOC_shapes(gtfs)

# Fix bug: ATOC_shapes incorrectly adds shape_id to stop_times
# shape_id should only be in trips.txt, not stop_times.txt
if ("shape_id" %in% names(gtfs$stop_times)) {
  print("Removing invalid shape_id column from stop_times...")
  gtfs$stop_times$shape_id <- NULL
}

print("Validate (before cleaning):")
gtfs_validate_internal(gtfs)

print("Cleaning...")

gtfs <- gtfs_clean(gtfs, public_only=TRUE)

if (!is.null(gtfs$transfers)) {
  gtfs$transfers <- gtfs$transfers[
    gtfs$transfers$from_stop_id %in% gtfs$stops$stop_id &
    gtfs$transfers$to_stop_id %in% gtfs$stops$stop_id,
  ]
}

out_of_order_times <- gtfs$stop_times$arrival_time > gtfs$stop_times$departure_time
if (any(out_of_order_times)) {
  message(sprintf("Swapping %d stop_times with arrival > departure", sum(out_of_order_times)))
  temp <- gtfs$stop_times$arrival_time[out_of_order_times]
  gtfs$stop_times$arrival_time[out_of_order_times] <- gtfs$stop_times$departure_time[out_of_order_times]
  gtfs$stop_times$departure_time[out_of_order_times] <- temp
}

# gtfs-to-html requires short names
empty_short_names <- is.na(gtfs$routes$route_short_name) | gtfs$routes$route_short_name == ""
if (any(empty_short_names)) {
  print("Fixing empty route_short_name fields...")
  gtfs$routes$route_short_name[empty_short_names] <- gtfs$routes$route_long_name[empty_short_names]
  print(paste("Copied route_long_name to route_short_name for", sum(empty_short_names), "routes"))
}

print("Validate:")
gtfs_validate_internal(gtfs)

# gtfs <- gtfs_compress(gtfs)

if (preview_mode) {
  gtfs <- gtfs_trim_dates(
    gtfs,
    startdate = 20251210L,
    enddate = 20251220L
  )
}

# TODO: trip grouping?

[1] "Clipping to Scotland..."
[1] "Creating shapes..."
2025-12-10 00:21:38.064392 Starting routing
2025-12-10 00:21:39.290376 converting routes to GTFS format
2025-12-10 00:21:39.559958 Invert routes
2025-12-10 00:21:39.860203 final formatting
[1] "Removing invalid shape_id column from stop_times..."
[1] "Validate (before cleaning):"
train_category are invalid columns in routes.txt
power_type are invalid columns in trips.txt
NA values in stops
NA values in routes
NA values in trips
NA values in stop_times
[1] "Cleaning..."
Swapping 703 stop_times with arrival > departure
[1] "Fixing empty route_short_name fields..."
[1] "Copied route_long_name to route_short_name for 547 routes"
[1] "Validate:"
train_category are invalid columns in routes.txt
power_type are invalid columns in trips.txt
NA values in stops
NA values in trips
NA values in stop_times
Trimming GTFS between 20251210 and 20251220


In [None]:
out_zip_path <- file.path(out_dir, paste(out_name, ".zip", sep=""))
unzipped_path <- file.path(out_dir, out_name)

print(paste("Writing", out_zip_path, "..."))
gtfs_write(gtfs,
  folder="./out",
  name=out_name # gtfs_write takes without .zip
)

dir.create(unzipped_path, showWarnings = FALSE)
unzip(out_zip_path, exdir=unzipped_path)

print("All done.")

[1] "Writing ./out/rail_scot_gtfs.zip ..."
