In [4]:
# cell 1
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, ConstantKernel, RBF
from sklearn.preprocessing import StandardScaler

import pyproj

# For interactive display (works in Jupyter)
import ipywidgets as widgets
from IPython.display import display, Image, clear_output

# ── GPU imports ─────────────────────────────────────────────────────────
import torch, gpytorch
from gpytorch.models import ExactGP
from gpytorch.likelihoods import GaussianLikelihood
from gpytorch.mlls import ExactMarginalLogLikelihood
from gpytorch.kernels import ScaleKernel, RBFKernel, MaternKernel
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Running on:", device)


import os, re, pathlib
# master image directory – change once here if you move things
IMAGES_ROOT = r"C:\ASU\Semester 2\space robotics and ai\codeyy\GP\images"

def sanitize(txt: str) -> str:
    """safe folder/file names (letters, digits, underscore)"""
    return re.sub(r'[^0-9A-Za-z_]+', '_', str(txt))


Running on: cuda


In [26]:
# cell 2
def load_data(filename, sep=','):
    """
    Load CSV data with the specified delimiter.
    Prints detected columns for verification.
    Returns the DataFrame or None on error.
    """
    try:
        data = pd.read_csv(filename, sep=sep)
        print(f"{filename} - Detected columns:", data.columns.tolist())
        return data
    except Exception as e:
        print(f"Error reading {filename}: {e}")
        return None


In [27]:
# cell 3
def preprocess_data(data, variable_name):
    """
    Preprocess the data for GP regression:
      - Check that required columns ['Latitude', 'Longitude', variable_name] exist.
      - Convert them to numeric.
      - Remove rows with NaN values.
      - Convert WGS84 coordinates to UTM.
      - Scale the spatial features using StandardScaler.
    Returns (X_scaled, y, scaler).
    """
    required_columns = ['Latitude', 'Longitude', variable_name]
    for col in required_columns:
        if col not in data.columns:
            raise ValueError(f"Missing required column: {col}")

    data['Latitude'] = pd.to_numeric(data['Latitude'], errors='coerce')
    data['Longitude'] = pd.to_numeric(data['Longitude'], errors='coerce')
    data[variable_name] = pd.to_numeric(data[variable_name], errors='coerce')

    valid_data = data.dropna(subset=['Latitude', 'Longitude', variable_name])
    if len(valid_data) == 0:
        return np.empty((0, 2)), np.array([]), None

    # Convert WGS84 to UTM (Zone 12N)
    utm_crs = pyproj.CRS("EPSG:32612")
    wgs84_crs = pyproj.CRS("EPSG:4326")
    transformer = pyproj.Transformer.from_crs(wgs84_crs, utm_crs, always_xy=True)
    valid_data["X_coord"], valid_data["Y_coord"] = transformer.transform(
        valid_data["Longitude"].values,
        valid_data["Latitude"].values
    )

    X = valid_data[['Y_coord', 'X_coord']].values
    y = valid_data[variable_name].values

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    print(f"Processing {variable_name} - Valid Data Points: {len(X_scaled)}")
    return X_scaled, y, scaler


In [28]:
# cell 4
def create_grid(X_scaled, scaler, grid_size=150, padding=0.01):
    """
    Create a prediction grid over the scaled spatial extent of X_scaled.
    Returns (lat_mesh, lon_mesh, grid_points_scaled).
    """
    if len(X_scaled) == 0:
        return None, None, None

    lat_min, lat_max = X_scaled[:, 0].min(), X_scaled[:, 0].max()
    lon_min, lon_max = X_scaled[:, 1].min(), X_scaled[:, 1].max()

    lat_range = lat_max - lat_min
    lon_range = lon_max - lon_min

    lat_min -= padding * lat_range
    lat_max += padding * lat_range
    lon_min -= padding * lon_range
    lon_max += padding * lon_range

    lat_grid = np.linspace(lat_min, lat_max, grid_size)
    lon_grid = np.linspace(lon_min, lon_max, grid_size)

    lon_mesh, lat_mesh = np.meshgrid(lon_grid, lat_grid)
    grid_points = np.vstack([lat_mesh.ravel(), lon_mesh.ravel()]).T
    grid_points_scaled = grid_points

    return lat_mesh, lon_mesh, grid_points_scaled


In [29]:
# cell 5*
# ── GPyTorch helpers for GPU‐accelerated GP fitting & prediction ──────────

class GPModel(ExactGP):
    def __init__(self, train_x, train_y, likelihood, kernel):
        super().__init__(train_x, train_y, likelihood)
        self.mean_module  = gpytorch.means.ConstantMean()
        self.covar_module = ScaleKernel(kernel).to(device)
    def forward(self, x):
        return gpytorch.distributions.MultivariateNormal(
            self.mean_module(x), self.covar_module(x)
        )

def train_gp_torch(kernel, X, y, iters=80, lr=0.1):
    X_t = torch.as_tensor(X, dtype=torch.float32, device=device)
    y_t = torch.as_tensor(y, dtype=torch.float32, device=device)
    lik = GaussianLikelihood().to(device)
    model = GPModel(X_t, y_t, lik, kernel).to(device)
    model.train(); lik.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    mll = ExactMarginalLogLikelihood(lik, model)
    for i in range(iters):
        optimizer.zero_grad()
        loss = -mll(model(X_t), y_t)
        loss.backward(); optimizer.step()
    return model, lik

@torch.no_grad()
def predict_gp_torch(model, lik, pts, y_mean=0.0):
    model.eval(); lik.eval()
    P = torch.as_tensor(pts, dtype=torch.float32, device=device)
    with gpytorch.settings.fast_pred_var():
        pred = lik(model(P))
    mu  = pred.mean.cpu().numpy() + y_mean
    std = pred.stddev.cpu().numpy()
    return mu, std


In [30]:
# cell 6
def plot_mean_prediction(lat_mesh, lon_mesh, y_pred, variable_name, save_path=None):
    if lat_mesh is None or lon_mesh is None or y_pred is None:
        print("No valid grid for plotting."); return
    plt.figure(figsize=(12,10))
    heatmap = plt.contourf(lon_mesh, lat_mesh, y_pred, levels=100, cmap='coolwarm')
    plt.xlabel('Longitude', fontsize=16); plt.ylabel('Latitude', fontsize=16)
    plt.title(f'Hotspot Map ({variable_name})', fontsize=20)
    cbar = plt.colorbar(heatmap); cbar.ax.tick_params(labelsize=14)
    cbar.set_label(variable_name, fontsize=16)
    if save_path:
        plt.savefig(save_path, dpi=600); print(f"Saved mean prediction image: {save_path}")
    plt.show()


In [31]:
# cell 7
def plot_uncertainty(lat_mesh, lon_mesh, y_std, variable_name, save_path=None):
    if lat_mesh is None or lon_mesh is None or y_std is None:
        print("No valid grid for plotting."); return
    plt.figure(figsize=(12,10))
    std_map = plt.contourf(lon_mesh, lat_mesh, y_std, levels=100, cmap='viridis')
    plt.xlabel('Longitude', fontsize=16); plt.ylabel('Latitude', fontsize=16)
    plt.title(f'Uncertainty Map ({variable_name})', fontsize=20)
    cbar = plt.colorbar(std_map); cbar.ax.tick_params(labelsize=14)
    cbar.set_label('Standard Deviation', fontsize=16)
    if save_path:
        plt.savefig(save_path, dpi=600); print(f"Saved uncertainty image: {save_path}")
    plt.show()


In [32]:
# cell 8
def plot_covariance_matrix(cov_matrix, title="Covariance Matrix"):
    if cov_matrix is None:
        print("No covariance matrix to plot."); return
    plt.figure(figsize=(8,6))
    plt.imshow(cov_matrix, interpolation='none', cmap='viridis')
    plt.title(title, fontsize=16)
    plt.xlabel('Data Point Index', fontsize=14); plt.ylabel('Data Point Index', fontsize=14)
    plt.colorbar(label='Covariance Value')
    plt.show()


In [None]:
# cell 9
constant_value = 1.0
constant_bounds = (1e-3, 1e4)
length_scale = 1.0
length_scale_bounds = (1e-7, 1e5)
nu = 1.5
n_restarts_optimizer = 10
alpha = 1e-2
normalize_y = True
grid_size = 200
padding = 0.08
print("Hyperparameter configuration loaded.")


In [34]:
# cell 10
from gpytorch.kernels import RBFKernel, MaternKernel

kernel_dict = {
    "RBF":            RBFKernel(),
    "Exponential":    MaternKernel(nu=0.5),
    "Matern_3/2":     MaternKernel(nu=1.5),
    "Matern_5/2":     MaternKernel(nu=2.5),
}


In [35]:
# cell 11
files = [
    r'C:\ASU\Semester 2\space robotics and ai\codeyy\GP\data\dec6.csv',
    r'C:\ASU\Semester 2\space robotics and ai\codeyy\GP\data\dec17.csv',
    r'C:\ASU\Semester 2\space robotics and ai\codeyy\GP\data\jan31.csv',
    r'C:\ASU\Semester 2\space robotics and ai\codeyy\GP\data\feb15.csv',
    r'C:\ASU\Semester 2\space robotics and ai\codeyy\GP\data\sep19.csv',
    r'C:\ASU\Semester 2\space robotics and ai\codeyy\GP\data\oct3.csv'
]
results_folder = IMAGES_ROOT            # ← defined up‑top with sanitize()
os.makedirs(results_folder, exist_ok=True)


non_sensor_cols = [
    'Latitude','Longitude','Time (UTC)',
    'Depth (m)','CDOM (ppb)','Turbidity (NTU)'
]


In [None]:
# cell 12
import re

for file in files:
    df = load_data(file)
    if df is None: continue
    tag = os.path.splitext(os.path.basename(file))[0]
    out_root = os.path.join(results_folder, tag)

    sensor_vars = [c for c in df.columns if c not in non_sensor_cols]
    for var in sensor_vars:
        Xs, y, scaler = preprocess_data(df, var)
        if y.size == 0: continue
        y_mean = y.mean(); y_cent = y - y_mean
        lat_mesh, lon_mesh, grid = create_grid(Xs, scaler, grid_size, padding)
        if grid is None: continue

        for name, kern in kernel_dict.items():
            print(f"▶ Processing {tag} / {var} / {name}")
            model, lik = train_gp_torch(kern.to(device), Xs, y_cent)
            mu, std = predict_gp_torch(model, lik, grid, y_mean)
            mu  = mu.reshape(lat_mesh.shape)
            std = std.reshape(lat_mesh.shape)

            date_safe = tag                   # dec6, feb15, …
            var_safe  = sanitize(var)         # Temperature (°C) → Temperature_C_
            kern_safe = sanitize(name)        # Matern_3/2      → Matern_3_2

            folder = os.path.join(results_folder, date_safe, var_safe, kern_safe)
            os.makedirs(folder, exist_ok=True)

            plot_mean_prediction(lat_mesh, lon_mesh, mu,
                     f"{var}–{name}",
                     os.path.join(folder, "mean.png"))

            plot_uncertainty(lat_mesh, lon_mesh, std,
                 f"{var}–{name}",
                 os.path.join(folder, "uncert.png"))   # ← renamed!



In [None]:
# cell 13
# ── Artificial‐Field Demo on GPU with Boustrophedon (Lawnmower) Sampling ──

# 1) Define the true field
def true_field(x, y):
    return (
        np.sin(0.1 * x) * np.cos(0.1 * y) +
        0.05 * x + 0.05 * y +
        10 * np.exp(-((x - 50)**2 + (y - 50)**2) / (2 * 200))
    )

# 2) Build the high-res grid
x_range = np.linspace(0, 100, 200)
y_range = np.linspace(0, 100, 200)
lon_mesh_art, lat_mesh_art = np.meshgrid(x_range, y_range)
coords_art = np.vstack([lat_mesh_art.ravel(), lon_mesh_art.ravel()]).T
Z_true = true_field(coords_art[:, 1], coords_art[:, 0]).reshape(lat_mesh_art.shape)

# 3) Generate boustrophedon (lawnmower) samples
def generate_lawnmower_samples(x_min, x_max, y_min, y_max, dx, dy):
    """
    Return two arrays (sample_x, sample_y) tracing a boustrophedon pattern:
      - rows Δy apart from y_max down to y_min
      - points Δx apart along each row
      - even rows left→right, odd rows right→left
    """
    xs = np.arange(x_min, x_max + 1e-6, dx)
    ys = np.arange(y_max, y_min - 1e-6, -dy)
    pts = []
    for i, y in enumerate(ys):
        row_xs = xs if (i % 2 == 0) else xs[::-1]
        for x in row_xs:
            pts.append((x, y))
    sx, sy = zip(*pts)
    return np.array(sx), np.array(sy)

# pick your spacings
dx, dy = 5.0, 10.0
sample_x, sample_y = generate_lawnmower_samples(
    x_min=0, x_max=100, y_min=0, y_max=100, dx=dx, dy=dy
)
z_samples = true_field(sample_x, sample_y)  # ground truth at sample points

# 4) Scale sample coordinates
train_coords = np.vstack([sample_y, sample_x]).T  # [lat, lon]
scaler_art = StandardScaler().fit(train_coords)
train_coords_scaled = scaler_art.transform(train_coords)

# 5) Scale full grid coordinates
grid_coords_art_scaled = scaler_art.transform(coords_art)

# 6) GPU train & predict with each kernel
for name, kern in kernel_dict.items():
    print(f"▶ Artificial / {name}")
    model, lik = train_gp_torch(kern.to(device),
                                train_coords_scaled, z_samples)
    mu, std = predict_gp_torch(model, lik, grid_coords_art_scaled)
    mu  = mu.reshape(lat_mesh_art.shape)
    std = std.reshape(lat_mesh_art.shape)

    # 7) Plot true field, prediction, and error
    plt.figure(figsize=(18,5))

    # True Field
    plt.subplot(1,3,1)
    plt.contourf(lon_mesh_art, lat_mesh_art, Z_true,
                 levels=100, cmap='coolwarm')
    plt.title("True Field")
    plt.colorbar()

    # GP Prediction (mean)
    plt.subplot(1,3,2)
    plt.contourf(lon_mesh_art, lat_mesh_art, mu,
                 levels=100, cmap='coolwarm')
    plt.scatter(sample_x, sample_y, c='k', s=20)
    plt.title(f"GP Prediction ({name})")
    plt.colorbar()

    # Absolute Error
    plt.subplot(1,3,3)
    err = np.abs(Z_true - mu)
    plt.contourf(lon_mesh_art, lat_mesh_art, err,
                 levels=100, cmap='viridis')
    plt.title(f"Absolute Error ({name})")
    plt.colorbar()

    plt.suptitle(f"Artificial Field GP Regression with {name}", fontsize=16)
    plt.tight_layout(rect=[0,0,1,0.95])
    plt.show()
