In [None]:
# Author: Arthur BARREAU
# Date: 2025-07-15
# R version: 4.3.3
# Description: 
#   • Loads NetCDF data,
#   • Applies a mask based on ASD zones,
#   • Computes monthly averages,
#   • Produces anomaly and/or surface air temperature plots based on selection.
# Source data: https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels?tab=download
# ==============================================================================
# 0. PACKAGE & LIBRARY INSTALLATION ----
# ==============================================================================

system("conda install -c conda-forge libgdal-grib -y")
system("conda install -y r-terra=1.8_42 r-sf=1.0_20 r-gridExtra=2.3")

library(sf)
library(terra)
library(dplyr)
library(ggplot2)
library(jsonlite)
library(gridExtra)

options(timeout = 3600)

# ==============================================================================
# 1. PARAMETERS AND INPUT LOADING ----
# ==============================================================================

json_data <- fromJSON("galaxy_inputs/galaxy_inputs.json")


list_year <- as.numeric(unlist(strsplit(json_data$year, ",")))
list_year_chose <- seq(list_year[1], list_year[2])
list_year <- seq(min(min(list_year_chose), 2001), max(max(list_year_chose), 2020))

subarea <- json_data$subarea 
subarea_list <- unlist(strsplit(subarea, ","))

chose_list <- json_data$graphXselection
path_file <- json_data$data$path 
# ==============================================================================
# 2. LOAD & PROJECT NETCDF DATA ----
# ==============================================================================

netcdf_filename <- path_file
netcdf_raster_data <- rast(netcdf_filename)

n_layers <- nlyr(netcdf_raster_data)
chunk_size <- 10000
num_chunks <- ceiling(n_layers / chunk_size)

projected_rasters <- list()

for (i in 1:num_chunks) {
  start_layer <- (i - 1) * chunk_size + 1
  end_layer <- min(i * chunk_size, n_layers)
  
  chunk <- subset(netcdf_raster_data, start_layer:end_layer)
  projected_rasters[[i]] <- project(chunk, "EPSG:6932")
}

netcdf_raster_data <- do.call(c, projected_rasters)

# ==============================================================================
# 3. LOAD & FILTER ASD SHAPEFILE ----
# ==============================================================================

download_asd_data <- function() {
  output_dir <- "asd"
  dir.create(output_dir, showWarnings = FALSE)
  
  asd_urls <- paste0(
    "https://raw.githubusercontent.com/ccamlr/data/refs/tags/v0.5.0/geographical_data/asd/asd-shapefile-EPSG6932.",
    c("shp", "shx", "dbf", "prj", "cst")
  )
  
  dest_files <- file.path(output_dir, basename(asd_urls))
  mapply(download.file, asd_urls, dest_files, MoreArgs = list(mode = "wb"))
  
  st_read(file.path(output_dir, "asd-shapefile-EPSG6932.shp"))
}

asd_data <- download_asd_data()
selected_subareas <- filter(asd_data, GAR_Short_ %in% subarea_list)
selected_subareas <- vect(selected_subareas)

# ==============================================================================
# 4. MASK NETCDF DATA ----
# ==============================================================================

masked_netcdf_data <- netcdf_raster_data %>% crop(selected_subareas) %>% mask(selected_subareas)
netcdf_data_df <- as.data.frame(masked_netcdf_data, xy = TRUE)

# ==============================================================================
# 5. CALCULATE MONTHLY MEAN & ANOMALIES ----
# ==============================================================================

month_days <- c(Jan = 31, Feb = 28, Mar = 31, Apr = 30, May = 31, Jun = 30,
                Jul = 31, Aug = 31, Sep = 30, Oct = 31, Nov = 30, Dec = 31)

is_leap_year <- function(year) {
  (year %% 4 == 0 && year %% 100 != 0) || (year %% 400 == 0)
}

netcdf_data_mean_position <- data.frame(mean_position = colMeans(netcdf_data_df[, -c(1, 2)], na.rm = TRUE))
monthly_mean_df <- data.frame()
cumulative_row_count <- 0

for (year in list_year) {
  month_days["Feb"] <- ifelse(is_leap_year(year), 29, 28)
  
  month_indices <- cumsum(month_days)
  month_start_indices <- c(1, head(month_indices, -1) + 1)
  year_month_names <- paste(year, names(month_days), sep = "_")
  
  for (i in seq_along(year_month_names)) {
    start_idx <- (month_start_indices[i]*8) + cumulative_row_count
    end_idx <- (month_indices[i]*8) + cumulative_row_count
    
    mean_value <- mean(netcdf_data_mean_position$mean_position[start_idx:end_idx])
    
    monthly_mean_df <- rbind(monthly_mean_df, data.frame(
      date = year_month_names[i],
      mean_value = mean_value
    ))
  }
  cumulative_row_count <- cumulative_row_count + (month_indices[12] * 8)
}

# ==============================================================================
# 6. CALCULATE ANOMALIES ----
# ==============================================================================

filtered_monthly_mean_df <- monthly_mean_df %>%
  filter(as.numeric(substr(date, 1, 4)) >= 2001 & as.numeric(substr(date, 1, 4)) <= 2020)

month_list <- names(month_days)
monthly_mean_by_month <- sapply(month_list, function(month) {
  mean(filtered_monthly_mean_df$mean_value[grepl(paste0("_", month), filtered_monthly_mean_df$date)], na.rm = TRUE)
}, simplify = FALSE)

df_with_anomalies <- monthly_mean_df %>%
  mutate(
    month = substr(date, 6, 8),
    anomaly = mean_value - sapply(month, function(m) monthly_mean_by_month[[m]])
  ) %>%
  select(-month)

df_with_anomalies$date <- factor(df_with_anomalies$date, levels = unique(df_with_anomalies$date))
df_with_anomalies$year <- substr(df_with_anomalies$date, 1, 4)

df_filtered_years_choose <- df_with_anomalies %>%
  filter(as.numeric(year) %in% list_year_chose)

quartiles <- quantile(as.numeric(unique(df_filtered_years_choose$year)), probs = c(0, 0.25, 0.5, 0.75, 1))
selected_years <- unique(round(quartiles))

filtered_break <- df_filtered_years_choose %>%
  filter(grepl("_Jan", date) & as.numeric(year) %in% selected_years)

df_with_anomalies_Celsius <- df_with_anomalies %>%
  mutate(mean_temperature_Celsius = mean_value - 273.15)

# ==============================================================================
# 7. PLOT RESULTS ----
# ==============================================================================

if (chose_list == "AT" || chose_list == "AT_ATa") {
  anomalies_temperature <- ggplot(df_filtered_years_choose, aes(x = date, y = anomaly, fill = anomaly > 0)) +
    geom_col(width = 0.6) +
    scale_fill_manual(values = c("#327cb5", "#bf0404")) +
    scale_x_discrete(
      breaks = filtered_break$date, 
      labels = substr(filtered_break$date, 1, 4)  # Display only the year labels
    ) +
    scale_y_continuous(
      breaks = c(-1, 0, 1),      # Les points où l'axe y aura des repères
      labels = c(-1, 0, 1)       # Les labels correspondants aux repères
    )+
    labs(x = "Year", y = "Anomaly") +
    theme_minimal() +
    theme(
      legend.position = "none",
      axis.title.x = element_text(size = 10),
      axis.title.y = element_text(size = 8),
      axis.text.x = element_text(size = 8),
      axis.text.y = element_text(size = 8)
    )
}

if (chose_list == "ATa" || chose_list == "AT_ATa") {
  temperature_plot <- ggplot(df_with_anomalies_Celsius, aes(x = date, y = mean_temperature_Celsius, group = 1)) +
    geom_line(color = "#327cb5", size = 0.5) +
    geom_point(color = "#327cb5", size = 1) +
    scale_x_discrete(
      breaks = filtered_break$date, 
      labels = substr(filtered_break$date, 1, 4)  # Display only the year labels
    ) +
    labs(x = "Year", y = "Air Temp (°C)") +
    theme_minimal()+
    theme(
      legend.position = "none",
      axis.title.x = element_text(size = 10),
      axis.title.y = element_text(size = 10),
      axis.text.x = element_text(size = 8),
      axis.text.y = element_text(size = 8)
    )
}

if (chose_list == "AT_ATa") {
  temperature_plot <- temperature_plot +
    theme(axis.text.x = element_blank(), axis.title.x = element_blank())
  
  combined_plot <- grid.arrange(temperature_plot, anomalies_temperature, nrow = 2)
  ggsave("outputs/collection/Fig8.png", plot = combined_plot, width = 12, height = 6, units = "in", bg = "white")
} else {
  if (chose_list == "AT") {
    ggsave("outputs/collection/Fig8_AT.png", plot = anomalies_temperature, width = 12, height = 3, units = "in", bg = "white")
  }
  if (chose_list == "ATa") {
    ggsave("outputs/collection/Fig8_ATa.png", plot = temperature_plot, width = 12, height = 3, units = "in", bg = "white")
  }
}

