In [None]:
# Cell 1: Setup and Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from scipy.linalg import cholesky, solve_triangular
from scipy.special import kv, gamma
import seaborn as sns


: 

In [None]:
# Cell 2: Read a Single CSV File
csv_file = "/home/blazar/Codes/GP/Data/feb15.csv"

# Read file into a DataFrame
data = pd.read_csv(csv_file)
print(f"Loaded {len(data)} data points from: {csv_file}")

# Display first few rows
print("First few rows of raw data:")
print(data.head())


In [None]:
# Cell 3: Preprocess Data: Convert Time, Latitude/Longitude, and Depth

# --- LATITUDE & LONGITUDE ---
# Here we keep lat/lon as-is (we store Latitude in Y_coord and Longitude in X_coord)
data["Y_coord"] = data["Latitude"]
data["X_coord"] = data["Longitude"]

# --- DEPTH ---
data["Depth_m"] = data["Depth (Sonar)"]

# Show a sample of the processed data
print("Sample processed data:")
print(data[["Latitude", "Longitude", "X_coord", "Y_coord", "Depth_m"]].head())


In [None]:
# Cell 4: Subsample Data and Define Feature/Target Arrays

max_points_per_file = 20000
n = len(data)
sampled_indices = data.sample(min(n, max_points_per_file), random_state=42).index
sampled_indices = sorted(sampled_indices)

# Define target variable (Temperature)
target_var = "Temperature (°C)"
y = data[target_var].values

# Define the 3D feature matrix for GP: [X_coord, Y_coord, Depth_m]
X_features = data[["X_coord", "Y_coord", "Depth_m"]].values

# Create training set (for GP)
X_train = X_features[sampled_indices]
y_train = y[sampled_indices]

print(f"Total data points: {len(data)}, Training subset size: {X_train.shape[0]}")
print("Example training point (3D):", X_train[0])


In [None]:
# Cell 5: Define Hyperparameters and Nonstationary Matérn Kernel

nu = 1.5                    # Matérn smoothness
sigma_f = np.std(y_train)   # Signal scale
sigma_n = 0.1               # Noise std
base_lengthscale_space = 0.0001  # Base spatial length-scale (in degrees, if lat/lon remain in degrees)
lengthscale_depth = 2       # Depth length-scale (m)

print("\nHyperparameters:")
print(f"nu = {nu}, sigma_f = {sigma_f:.2f}, sigma_n = {sigma_n}")
print(f"base_lengthscale_space = {base_lengthscale_space}, lengthscale_depth = {lengthscale_depth}")

def Sigma_matrix(x, rho_xy=0.2, rho_xz=0.1, rho_yz=0.15):
    """
    Computes the full covariance matrix Σ(x) with off-diagonal correlations.
    """
    sigma_x = base_lengthscale_space * np.exp(-0.1 * x[2])
    sigma_y = sigma_x  # assume same spatial scale for simplicity
    sigma_z = lengthscale_depth
    Sigma = np.array([
        [sigma_x**2, rho_xy * sigma_x * sigma_y, rho_xz * sigma_x * sigma_z],
        [rho_xy * sigma_x * sigma_y, sigma_y**2, rho_yz * sigma_y * sigma_z],
        [rho_xz * sigma_x * sigma_z, rho_yz * sigma_y * sigma_z, sigma_z**2]
    ])
    return Sigma

# Example usage:
sample_x = [0, 0, 5]  # for a point at depth 5
Sigma_ex = Sigma_matrix(sample_x)
eigenvalues, eigenvectors = np.linalg.eigh(Sigma_ex)
print("Full Covariance Matrix Σ(x):\n", Sigma_ex)
print("Eigenvalues:\n", eigenvalues)
print("Eigenvectors:\n", eigenvectors)


def matern_covariance(x, x_prime, nu=nu, sigma_f=sigma_f):
    """
    Computes the Nonstationary Matérn covariance between x and x_prime.
    Adjusted for a 3D input vector (X_coord, Y_coord, Depth_m).
    """
    Σ_i = Sigma_matrix(x)  # 3x3 matrix
    Σ_j = Sigma_matrix(x_prime)  # 3x3 matrix
    det_Si = np.linalg.det(Σ_i)
    det_Sj = np.linalg.det(Σ_j)
    det_half = np.linalg.det((Σ_i + Σ_j) / 2.0)
    diff = np.array(x) - np.array(x_prime)  # 3D difference vector
    M = (Σ_i + Σ_j) / 2.0  # 3x3 matrix

    # Mahalanobis-like distance Q_ij
    try:
        v = np.linalg.solve(M, diff)
        Q_ij = float(diff.dot(v))
    except np.linalg.LinAlgError:
        v = np.linalg.pinv(M).dot(diff)
        Q_ij = float(diff.dot(v))

    if Q_ij < 1e-12:
        return sigma_f**2

    prefactor = (det_Si**0.25) * (det_Sj**0.25) / (det_half**0.5)
    arg = np.sqrt(2 * nu * Q_ij)
    matern_part = (arg**nu) * kv(nu, arg)
    norm_const = 1.0 / (gamma(nu) * 2**(nu - 1))
    
    return sigma_f**2 * prefactor * norm_const * matern_part

print("\nNonstationary Matérn kernel defined.")


In [None]:
# Cell 6: Compute Kernel Matrix and Cholesky Decomposition

N = X_train.shape[0]
print(f"\nComputing kernel matrix for {N} training points...")
K = np.zeros((N, N))
for i in range(N):
    for j in range(i, N):
        cov_ij = matern_covariance(X_train[i], X_train[j])
        K[i, j] = cov_ij
        K[j, i] = cov_ij
    if i % 50 == 0:
        print(f"Processed {i}/{N} rows")

print("Kernel matrix computed. Shape:", K.shape)

K += (sigma_n**2) * np.eye(N)
print("Noise variance added to the diagonal of K.")

print("Performing Cholesky decomposition...")
L = cholesky(K, lower=True)
print("Cholesky decomposition complete.")


In [None]:
# Cell 7: Compute GP Weights and Predictions

print("Solving for GP weights (α)...")
alpha = solve_triangular(L.T, solve_triangular(L, y_train, lower=True), lower=False)
print("GP weights computed.")

# Generate test grid for spatial domain using X_coord and Y_coord
test_depth = 1.0  # Fixed depth for test predictions
grid_x = np.linspace(data["X_coord"].min(), data["X_coord"].max(), 50)
grid_y = np.linspace(data["Y_coord"].min(), data["Y_coord"].max(), 50)

# Build X_test: each point is [X_coord, Y_coord, Depth_m]
X_test = np.array([[xx, yy, test_depth] for xx in grid_x for yy in grid_y])
print(f"Generated {X_test.shape[0]} test points for prediction.")

print("Computing cross-covariance K_star...")
K_star = np.array([[matern_covariance(x, xi) for xi in X_train] for x in X_test])
print("Cross-covariance computed.")

mu_pred = K_star @ alpha
print("Mean predictions computed.")


In [None]:
# Cell 8: Visualization - GP Mean Prediction and Uncertainty

# Reshape predictions to grid for contour plotting
Mu_grid = mu_pred.reshape(len(grid_x), len(grid_y)).T

plt.figure(figsize=(8, 6))
cs = plt.contourf(grid_x, grid_y, Mu_grid, cmap="viridis", levels=15)
plt.colorbar(cs, label="Predicted Temperature (°C)")
plt.scatter(X_train[:, 0], X_train[:, 1], c='white', s=5, edgecolors='k', label="Training points")
plt.title("GP Mean Prediction (Temperature)")
plt.xlabel("X_coord (Longitude)")
plt.ylabel("Y_coord (Latitude)")
plt.legend(loc='upper right')
plt.tight_layout()
plt.show()
print("GP Mean Prediction plot displayed.")

# Compute predictive variance
print("Computing predictive variance...")
v = solve_triangular(L, K_star.T, lower=True)
K_ss_diag = np.array([matern_covariance(x, x) for x in X_test])
var_pred = K_ss_diag - np.sum(v**2, axis=0)
var_pred = np.maximum(var_pred, 1e-10)
std_pred = np.sqrt(var_pred)

Std_grid = std_pred.reshape(len(grid_x), len(grid_y)).T

plt.figure(figsize=(8, 6))
cs2 = plt.contourf(grid_x, grid_y, Std_grid, cmap="plasma", levels=15)
plt.colorbar(cs2, label="Predictive Standard Deviation (°C)")
plt.scatter(X_train[:, 0], X_train[:, 1], c='white', s=5, edgecolors='k')
plt.title("GP Uncertainty (Standard Deviation) - Temperature")
plt.xlabel("X_coord (Longitude)")
plt.ylabel("Y_coord (Latitude)")
plt.tight_layout()
plt.show()
print("GP Uncertainty plot displayed.")


In [None]:
# Cell 9: Additional Analysis

# Plot correlation matrix (using all available columns)
corr_vars = ["Latitude", "Longitude", "Depth_m"]
corr_matrix = data[corr_vars].corr()

plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap="coolwarm")
plt.title("Correlation Matrix: Time, Latitude, Longitude, Depth")
plt.show()
print("Correlation heatmap displayed.")

print("Latitude range:", data["Latitude"].min(), data["Latitude"].max())
print("Longitude range:", data["Longitude"].min(), data["Longitude"].max())
print("Depth range:", data["Depth (Sonar)"].min(), data["Depth (Sonar)"].max())


In [None]:
import matplotlib.pyplot as plt
from sklearn.gaussian_process.kernels import Matern, ConstantKernel as C
import numpy as np

# Create a 1D grid over which to define our "eigenvalue process"
X = np.linspace(0, 10, 100)[:, None]

# Define a Matérn kernel (you might fix nu=2.5 for smoothness, as in the paper they use a very smooth GP for eigenprocesses)
kernel = C(1.0, (1e-3, 1e3)) * Matern(length_scale=1.0, nu=2.5)

# Compute the covariance matrix for our grid
K = kernel(X)

# Draw samples from the GP prior (mean is zero here)
n_samples = 3
samples = np.random.multivariate_normal(np.zeros(X.shape[0]), K, n_samples)

plt.figure(figsize=(8, 6))
for i in range(n_samples):
    plt.plot(X, samples[i], label=f'Sample {i+1}')
plt.title('Samples from a GP Prior for an Eigenvalue Process')
plt.xlabel('Input (e.g., spatial location)')
plt.ylabel('Log Eigenvalue')
plt.legend()
plt.show()
