# Converting the feature matrix to I-splines

In [1]:
library(ncdf4)

# Read the netCDF file
nc_data <- nc_open("/cluster/home/haroldh/spGDMM/1_data/4_interim/EOF_8tsuv_features.nc")

var_name <- names(nc_data$var)

In [5]:
# Specifiy time index to take from, remember this is R
time_index = 286

# Ensure time index is within the range of the time dimensions of nc_data
time_dim_length <- nc_data$dim$time$len
if (time_index < 1 || time_index > time_dim_length) {
    stop("Time_index is out of range")
}

# Extract data
data <- ncvar_get(nc_data, var_name)[time_index,,,]  # Shape: x, y, mode

# Reshape to (grid cells, EOF mode)
dim(data) <- c(prod(dim(data)[1:2]), dim(data)[3])

# Get the 'vals' for the 'mode' dimension (if it exists)
modes <- nc_data$dim$mode$vals

# Assign column names using the mode name and each value from modes
colnames(data) <- paste0("mode_", modes)

# Convert to a data frame
eof_predictors <- as.data.frame(data)

# Replace 0 with NaN
eof_predictors[eof_predictors == 0] <- NaN

# Print column names to verify
print(colnames(eof_predictors))

[1] "mode_1" "mode_2" "mode_3" "mode_4" "mode_5" "mode_6" "mode_7" "mode_8"


In [9]:
library(splines2)

# Define degree and number of knots
degree <- 3
df <- 5  # Degrees of freedom (including intercept)

# Efficiently compute I-spline bases for all features
i_spline_matrix <- do.call(
  cbind,
  lapply(colnames(eof_predictors), function(col) {
    
    # Apply I-spline to the feature
    spline_matrix <- iSpline(
      eof_predictors[[col]],
      degree = degree - 1,
      df = df,
      intercept = TRUE,
      skipna = TRUE
    )
    
    # Generate descriptive column names for this feature
    colnames(spline_matrix) <- paste0(col, "_I", seq_len(df))  # Remaining bases
    
    return(spline_matrix)
  })
)

# Print dimensions and column names for verification
print(dim(i_spline_matrix))  # Dimensions: (num rows, num features * df)
print(colnames(i_spline_matrix))  # Column names: feature_Intercept, feature_I1,

[1] 527250     40
 [1] "mode_1_I1" "mode_1_I2" "mode_1_I3" "mode_1_I4" "mode_1_I5" "mode_2_I1"
 [7] "mode_2_I2" "mode_2_I3" "mode_2_I4" "mode_2_I5" "mode_3_I1" "mode_3_I2"
[13] "mode_3_I3" "mode_3_I4" "mode_3_I5" "mode_4_I1" "mode_4_I2" "mode_4_I3"
[19] "mode_4_I4" "mode_4_I5" "mode_5_I1" "mode_5_I2" "mode_5_I3" "mode_5_I4"
[25] "mode_5_I5" "mode_6_I1" "mode_6_I2" "mode_6_I3" "mode_6_I4" "mode_6_I5"
[31] "mode_7_I1" "mode_7_I2" "mode_7_I3" "mode_7_I4" "mode_7_I5" "mode_8_I1"
[37] "mode_8_I2" "mode_8_I3" "mode_8_I4" "mode_8_I5"


In [None]:
write.csv(i_spline_matrix, "/cluster/home/haroldh/spGDMM/1_data/2_processed/prediction/EOF_8tsuv_predictors.csv", row.names = FALSE)

# Examination of intercept

In [95]:
library(splines2)

# Example data
feature <- c(1, 2, 3, 4, 5)

# Generate I-spline bases
spline_matrix <- iSpline(feature, degree = 2, df = 4, intercept = TRUE)

# Inspect the matrix
print(spline_matrix)


         1       2       3     4
[1,] 0.000 0.00000 0.00000 0.000
[2,] 0.875 0.28125 0.03125 0.000
[3,] 1.000 0.75000 0.25000 0.000
[4,] 1.000 0.96875 0.71875 0.125
[5,] 1.000 1.00000 1.00000 1.000


In [96]:
library(splines2)

# Example data
feature <- c(1, 2, 3, 4, 5)

# Generate I-spline bases
spline_matrix <- iSpline(feature, degree = 2, df = 4, intercept = FALSE)

# Inspect the matrix
print(spline_matrix)


             1         2         3        4
[1,] 0.0000000 0.0000000 0.0000000 0.000000
[2,] 0.5273438 0.0703125 0.0000000 0.000000
[3,] 0.9687500 0.5000000 0.0312500 0.000000
[4,] 1.0000000 0.9296875 0.4726562 0.015625
[5,] 1.0000000 1.0000000 1.0000000 1.000000
