In [106]:
library(ncdf4)

# Read the netCDF file
nc_data <- nc_open("/cluster/home/haroldh/spGDMM/1_data/4_interim/SINMOD_features.nc")

var_names <- names(nc_data$var)

var_names

In [107]:
# Process each variable and reshape it
midnor_env_predictors <- lapply(var_names, function(var) {
  # Extract data for the variable
  var_data <- ncvar_get(nc_data, var)  # Shape: x, y, stat
  
  # Reshape to (grid cells, stat)
  dim(var_data) <- c(prod(dim(var_data)[1:2]), dim(var_data)[3])
  
  # Get the 'vals' for the 'stat' dimension (if it exists)
  stat_vals <- nc_data$dim$stat$vals  # Adjust if the dimension name isn't 'stat'
  
  # Check if stat_vals exists; otherwise, use generic names
  if (is.null(stat_vals)) {
    stat_vals <- paste0("stat", seq_len(dim(var_data)[2]))
  }
  
  # Assign column names using the variable name and stat_vals
  colnames(var_data) <- paste0(var, "_", stat_vals)
  
  # Convert to a data frame
  return(as.data.frame(var_data))
})

# Combine all variables into a single data frame
midnor_env_predictors <- do.call(cbind, midnor_env_predictors)

# Print column names to verify
print(colnames(midnor_env_predictors))

[1] "temperature_mean"            "temperature_10th_percentile"
[3] "temperature_90th_percentile" "salinity_mean"              
[5] "salinity_10th_percentile"    "salinity_90th_percentile"   


In [108]:
library(splines2)

# Define degree and number of knots
degree <- 3
df <- 4  # Degrees of freedom (including intercept)

# Efficiently compute I-spline bases for all features
i_spline_matrix <- do.call(
  cbind,
  lapply(colnames(midnor_env_predictors), function(col) {
    # Apply I-spline to the feature
    spline_matrix <- iSpline(
      midnor_env_predictors[[col]],
      degree = degree - 1,
      df = df,
      intercept = TRUE,
      skipna = TRUE
    )
    
    # Generate descriptive column names for this feature
    colnames(spline_matrix) <- paste0(col, "_I", seq_len(df))  # Remaining bases
    
    return(spline_matrix)
  })
)

# Print dimensions and column names for verification
print(dim(i_spline_matrix))  # Dimensions: (num rows, num features * df)
print(colnames(i_spline_matrix))  # Column names: feature_Intercept, feature_I1, ...

write.csv(i_spline_matrix, "/cluster/home/haroldh/spGDMM/1_data/2_processed/prediction/X_GDM_predictors_bs.csv", row.names = FALSE)

[1] 525746     24
 [1] "temperature_mean_I1"            "temperature_mean_I2"           
 [3] "temperature_mean_I3"            "temperature_mean_I4"           
 [5] "temperature_10th_percentile_I1" "temperature_10th_percentile_I2"
 [7] "temperature_10th_percentile_I3" "temperature_10th_percentile_I4"
 [9] "temperature_90th_percentile_I1" "temperature_90th_percentile_I2"
[11] "temperature_90th_percentile_I3" "temperature_90th_percentile_I4"
[13] "salinity_mean_I1"               "salinity_mean_I2"              
[15] "salinity_mean_I3"               "salinity_mean_I4"              
[17] "salinity_10th_percentile_I1"    "salinity_10th_percentile_I2"   
[19] "salinity_10th_percentile_I3"    "salinity_10th_percentile_I4"   
[21] "salinity_90th_percentile_I1"    "salinity_90th_percentile_I2"   
[23] "salinity_90th_percentile_I3"    "salinity_90th_percentile_I4"   


In [100]:
formula_use = as.formula(paste("~ 0 +",paste(
  paste("iSpline(`",colnames(midnor_env_predictors),"`,degree=",degree - 1 ,",df = ",df, 
        " ,intercept = TRUE)",sep = ""),collapse = "+")))

I_spline_bases = model.matrix(formula_use, data = midnor_env_predictors)

In [101]:
I_spline_bases

Unnamed: 0,"iSpline(temperature_mean, degree = 2, df = 5, intercept = TRUE)1","iSpline(temperature_mean, degree = 2, df = 5, intercept = TRUE)2","iSpline(temperature_mean, degree = 2, df = 5, intercept = TRUE)3","iSpline(temperature_mean, degree = 2, df = 5, intercept = TRUE)4","iSpline(temperature_mean, degree = 2, df = 5, intercept = TRUE)5","iSpline(temperature_10th_percentile, degree = 2, df = 5, intercept = TRUE)1","iSpline(temperature_10th_percentile, degree = 2, df = 5, intercept = TRUE)2","iSpline(temperature_10th_percentile, degree = 2, df = 5, intercept = TRUE)3","iSpline(temperature_10th_percentile, degree = 2, df = 5, intercept = TRUE)4","iSpline(temperature_10th_percentile, degree = 2, df = 5, intercept = TRUE)5",⋯,"iSpline(salinity_10th_percentile, degree = 2, df = 5, intercept = TRUE)1","iSpline(salinity_10th_percentile, degree = 2, df = 5, intercept = TRUE)2","iSpline(salinity_10th_percentile, degree = 2, df = 5, intercept = TRUE)3","iSpline(salinity_10th_percentile, degree = 2, df = 5, intercept = TRUE)4","iSpline(salinity_10th_percentile, degree = 2, df = 5, intercept = TRUE)5","iSpline(salinity_90th_percentile, degree = 2, df = 5, intercept = TRUE)1","iSpline(salinity_90th_percentile, degree = 2, df = 5, intercept = TRUE)2","iSpline(salinity_90th_percentile, degree = 2, df = 5, intercept = TRUE)3","iSpline(salinity_90th_percentile, degree = 2, df = 5, intercept = TRUE)4","iSpline(salinity_90th_percentile, degree = 2, df = 5, intercept = TRUE)5"
39,0.9656115,0.6880278,0.20492731,0.0,0.0,1.0,1.0,0.77957585,0.2311526,0.002874345,⋯,0.0,0.0,0.0,0.0,0.0,0.953278,0.6268075,0.1700104,0.0,0.0
40,0.9829495,0.7725188,0.27311903,0.0,0.0,1.0,0.8900487,0.227882763,0.002848352,0.0,⋯,0.91282246,0.456830661,0.07133584,0.0,0.0,0.9988171,0.9099205,0.4641375,0.0,0.0
49,1.0,0.9931353,0.74821239,0.004530651,0.0,1.0,1.0,0.689950859,0.1403906,5.869577e-05,⋯,0.99186878,0.748756522,0.2109549,0.0,0.0,0.9999522,0.9570646,0.5809348,0.0,0.0
50,1.0,1.0,0.99197388,0.70312015,0.11381181,1.0,1.0,0.971617114,0.7157915,0.1821181,⋯,0.98919275,0.726830081,0.1955022,0.0,0.0,0.9999953,0.9670611,0.6169602,0.0,0.0
58,1.0,1.0,0.96031516,0.349230677,0.001814837,1.0,1.0,0.919085641,0.5041505,0.05741856,⋯,1.0,0.956133541,0.4999063,0.0013332413,0.0,1.0,0.9987158,0.8813759,0.105348,0.0
59,1.0,1.0,0.966975,0.397412111,0.005279819,1.0,1.0,0.960784171,0.6601275,0.138621,⋯,0.99999624,0.913191867,0.3946979,0.0,0.0,1.0,0.9922037,0.7737717,0.01467059,0.0
60,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,⋯,0.99850225,0.835458977,0.2873321,0.0,0.0,1.0,0.9729573,0.6422071,0.0,0.0
66,1.0,1.0,0.9907396,0.678962152,0.096983397,1.0,0.9848981,0.434208928,0.02887396,0.0,⋯,1.0,1.0,0.927883,0.3506301206,0.00114569,1.0,1.0,0.9999799,0.9919084,0.755402964
67,1.0,1.0,0.99717733,0.83695052,0.257514485,1.0,0.9988595,0.563911427,0.06808823,0.0,⋯,1.0,0.999410659,0.8245836,0.137375899,0.0,1.0,1.0,0.9947437,0.7440675,0.079126746
68,1.0,1.0,0.97038852,0.425575554,0.008442948,1.0,1.0,0.730258497,0.1759985,0.0005607251,⋯,1.0,0.977353623,0.5876137,0.009992208,0.0,1.0,0.9998724,0.9373781,0.2388721,0.0


In [102]:
i_spline_matrix

temperature_mean_I1,temperature_mean_I2,temperature_mean_I3,temperature_mean_I4,temperature_mean_I5,temperature_10th_percentile_I1,temperature_10th_percentile_I2,temperature_10th_percentile_I3,temperature_10th_percentile_I4,temperature_10th_percentile_I5,⋯,salinity_10th_percentile_I1,salinity_10th_percentile_I2,salinity_10th_percentile_I3,salinity_10th_percentile_I4,salinity_10th_percentile_I5,salinity_90th_percentile_I1,salinity_90th_percentile_I2,salinity_90th_percentile_I3,salinity_90th_percentile_I4,salinity_90th_percentile_I5
,,,,,,,,,,⋯,,,,,,,,,,
,,,,,,,,,,⋯,,,,,,,,,,
,,,,,,,,,,⋯,,,,,,,,,,
,,,,,,,,,,⋯,,,,,,,,,,
,,,,,,,,,,⋯,,,,,,,,,,
,,,,,,,,,,⋯,,,,,,,,,,
,,,,,,,,,,⋯,,,,,,,,,,
,,,,,,,,,,⋯,,,,,,,,,,
,,,,,,,,,,⋯,,,,,,,,,,
,,,,,,,,,,⋯,,,,,,,,,,


In [95]:
library(splines2)

# Example data
feature <- c(1, 2, 3, 4, 5)

# Generate I-spline bases
spline_matrix <- iSpline(feature, degree = 2, df = 4, intercept = TRUE)

# Inspect the matrix
print(spline_matrix)


         1       2       3     4
[1,] 0.000 0.00000 0.00000 0.000
[2,] 0.875 0.28125 0.03125 0.000
[3,] 1.000 0.75000 0.25000 0.000
[4,] 1.000 0.96875 0.71875 0.125
[5,] 1.000 1.00000 1.00000 1.000


In [96]:
library(splines2)

# Example data
feature <- c(1, 2, 3, 4, 5)

# Generate I-spline bases
spline_matrix <- iSpline(feature, degree = 2, df = 4, intercept = FALSE)

# Inspect the matrix
print(spline_matrix)


             1         2         3        4
[1,] 0.0000000 0.0000000 0.0000000 0.000000
[2,] 0.5273438 0.0703125 0.0000000 0.000000
[3,] 0.9687500 0.5000000 0.0312500 0.000000
[4,] 1.0000000 0.9296875 0.4726562 0.015625
[5,] 1.0000000 1.0000000 1.0000000 1.000000


In [97]:
feature