In [9]:
# Configuration
gems_tco_path = "/Users/joonwonlee/Documents/GEMS_TCO-1/src"

# --- Standard Libraries ---
import sys
import os
import json
import time
import copy
import cmath
import pickle
import logging
import argparse

# Path configuration (only run once)
sys.path.append(gems_tco_path)

# --- Third-Party Libraries ---
from pathlib import Path
from typing import Optional, List, Tuple, Dict, Any, Callable
from json import JSONEncoder

# Data manipulation and analysis
import pandas as pd
import numpy as np
from sklearn.neighbors import BallTree
import typer

# Torch and Numerical Libraries
import torch
import torch.optim as optim
import torch.fft
import torch.nn.functional as F
from torch.nn import Parameter
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau
import matplotlib.pyplot as plt 

# --- Custom (GEMS_TCO) Imports ---
import GEMS_TCO
from GEMS_TCO import kernels, kernels_new, kernels_reparam_space_time 
from GEMS_TCO import data_preprocess, data_preprocess as dmbh
from GEMS_TCO import orderings as _orderings 
from GEMS_TCO import load_data
from GEMS_TCO import alg_optimization, alg_opt_Encoder
from GEMS_TCO import configuration as config
from GEMS_TCO.data_loader import load_data2
from GEMS_TCO import debiased_whittle

Load monthly data

In [10]:
space: List[str] = ['2', '8']
lat_lon_resolution = [int(s) for s in space]
mm_cond_number: int = 20
years = ['2024']
month_range = [7] 

output_path = input_path = Path(config.mac_estimates_day_path)
data_load_instance = load_data2(config.mac_data_load_path)


df_map, ord_mm, nns_map = data_load_instance.load_maxmin_ordered_data_bymonthyear(
lat_lon_resolution=lat_lon_resolution, 
mm_cond_number=mm_cond_number,
years_=years, 
months_=month_range,
lat_range=[0.0, 5.0],      
lon_range=[123.0, 133.0] 
)

#days: List[str] = ['0', '31']
#days_s_e = [int(d) for d in days]
#days_list = list(range(days_s_e[0], days_s_e[1]))

Subsetting data to lat: [0.0, 5.0], lon: [123.0, 133.0]


In [11]:
daily_aggregated_tensors = [] 
daily_hourly_maps = []        

for day_index in range(31):
  
    hour_start_index = day_index * 8
    hour_end_index = (day_index + 1) * 8
    #hour_end_index = day_index*8 + 1
    hour_indices = [hour_start_index, hour_end_index]
    
    # Load the data for the current day
    day_hourly_map, day_aggregated_tensor = data_load_instance.load_working_data(
        df_map, 
        hour_indices, 
        ord_mm= None,  
        dtype=torch.float 
    )
    # Append the day's data to their respective lists
    daily_aggregated_tensors.append(day_aggregated_tensor)
    daily_hourly_maps.append(day_hourly_map) 

print(daily_aggregated_tensors[0].shape)
#print(daily_hourly_maps[0])

torch.Size([9120, 4])


In [13]:
N= daily_aggregated_tensors[0].shape[0]
4.1887*N

38200.943999999996

In [14]:
a = [11.0474, 0.0623, 0.2445, 1.0972, 0.0101, -0.1671, 1.1825]

instance = debiased_whittle.full_vecc_dw_likeihloods(daily_aggregated_tensors, daily_hourly_maps, day_idx=0, params_list=a)
v = 0.5
nheads = 100
instance.initiate_model_instance_vecchia(v, nns_map, mm_cond_number, nheads)
res = instance.likelihood_wrapper()
res

Using device: cpu
Pre-computing J-vector (Hamming taper)...
Starting with FIXED params (raw log-scale): [4.2042, 1.6348, 0.4721, -3.7632, 0.0218, -0.1689, -1.3984]


[tensor(4.6461, dtype=torch.float64, grad_fn=<DivBackward0>),
 tensor(5.2310, dtype=torch.float64, grad_fn=<DivBackward0>),
 tensor(11.4951, grad_fn=<DivBackward0>)]

difference data


In [15]:
a = [11.0474, 0.0623, 0.2445, 1.0972, 0.0101, -0.1671, 1.1825]
db = debiased_whittle.debiased_whittle_preprocess(daily_aggregated_tensors, daily_hourly_maps, day_idx=0, params_list=a)

subsetted_aggregated_day = db.generate_spatially_filtered_days(0,5,123,133)
print(subsetted_aggregated_day.shape)
subsetted_aggregated_day[:5]

torch.Size([8512, 4])


tensor([[ 4.8000e-02,  1.2341e+02,  2.4390e+00,  2.1000e+01],
        [ 4.8000e-02,  1.2391e+02,  1.3797e+00,  2.1000e+01],
        [ 4.8000e-02,  1.2442e+02, -4.2522e+00,  2.1000e+01],
        [ 4.8000e-02,  1.2492e+02, -1.5892e+00,  2.1000e+01],
        [ 4.8000e-02,  1.2542e+02, -2.2357e+00,  2.1000e+01]])

likelihood calculation

In [12]:
def likelihood_wrapper(subsetted_aggregated_day):
    dwl = debiased_whittle.debiased_whittle_likelihood()
    # --- Configuration ---
    DAY_TO_RUN = 1
    TAPERING_FUNC = dwl.cgn_hamming # Use Hamming taper
    NUM_RUNS = 1
    EPOCHS = 2000
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {DEVICE}")

    DELTA_LAT, DELTA_LON = 0.044, 0.063 

    LAT_COL, LON_COL = 0, 1
    VAL_COL = 2 # Spatially differenced value
    TIME_COL = 3
    lr = 0.01 

    cur_df = subsetted_aggregated_day
    unique_times = torch.unique(cur_df[:, TIME_COL])
    time_slices_list = [cur_df[cur_df[:, TIME_COL] == t_val] for t_val in unique_times]

    # --- 1. Pre-compute J-vector, Taper Grid, and Taper Autocorrelation ---
    print("Pre-computing J-vector (Hamming taper)...")
    J_vec, n1, n2, p, taper_grid = dwl.generate_Jvector_tapered( 
        time_slices_list,
        tapering_func=TAPERING_FUNC, 
        lat_col=LAT_COL, lon_col=LON_COL, val_col=VAL_COL,
        device=DEVICE
    )

    I_sample = dwl.calculate_sample_periodogram_vectorized(J_vec)
    taper_autocorr_grid = dwl.calculate_taper_autocorrelation_fft(taper_grid, n1, n2, DEVICE)


    init_sigmasq   = 13.059
    init_range_lat = 0.154 
    init_range_lon = 0.195 
    init_nugget    = 0.247
    init_range_time = 1.28
    init_advec_lat = 0.0218
    init_advec_lon = -0.1689


    init_phi2 = 1.0 / init_range_lon
    init_phi1 = init_sigmasq * init_phi2
    init_phi3 = (init_range_lon / init_range_lat)**2
    init_phi4 = (init_range_lon / init_range_time)**2      # (range_lon / range_time)^2

    initial_params_values = [
        np.log(init_phi1),    # [0] log_phi1
        np.log(init_phi2),    # [1] log_phi2
        np.log(init_phi3),    # [2] log_phi3
        np.log(init_phi4),    # [3] log_phi4
        init_advec_lat,       # [4] advec_lat (NOT log)
        init_advec_lon,       # [5] advec_lon (NOT log)
        np.log(init_nugget)   # [6] log_nugget
    ]

    print(f"Starting with FIXED params (raw log-scale): {[round(p, 4) for p in initial_params_values]}")

    params_list = [
        Parameter(torch.tensor([val], dtype=torch.float32))
        for val in initial_params_values
    ]


    dwnll = dwl.whittle_likelihood_loss_tapered(
        params=torch.cat(params_list),
        I_sample=I_sample,
        n1=n1,
        n2=n2,
        p_time=p,
        taper_autocorr_grid=taper_autocorr_grid,
        delta1=DELTA_LAT,
        delta2=DELTA_LON
    )
    return dwnll

likelihood_wrapper(subsetted_aggregated_day)

    


Using device: cpu
Pre-computing J-vector (Hamming taper)...
Starting with FIXED params (raw log-scale): [4.2042, 1.6348, 0.4721, -3.7632, 0.0218, -0.1689, -1.3984]


tensor(53177.4688, grad_fn=<SubBackward0>)

In [None]:
dwl = debiased_whittle.debiased_whittle_likelihood()

# --- Configuration ---
DAY_TO_RUN = 1
TAPERING_FUNC = dwl.cgn_hamming # Use Hamming taper
NUM_RUNS = 1
EPOCHS = 2000
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

DELTA_LAT, DELTA_LON = 0.044, 0.063 

LAT_COL, LON_COL = 0, 1
VAL_COL = 2 # Spatially differenced value
TIME_COL = 3
lr = 0.01 

cur_df = subsetted_aggregated_day
unique_times = torch.unique(cur_df[:, TIME_COL])
time_slices_list = [cur_df[cur_df[:, TIME_COL] == t_val] for t_val in unique_times]

# --- 1. Pre-compute J-vector, Taper Grid, and Taper Autocorrelation ---
print("Pre-computing J-vector (Hamming taper)...")
J_vec, n1, n2, p, taper_grid = dwl.generate_Jvector_tapered( 
    time_slices_list,
    tapering_func=TAPERING_FUNC, 
    lat_col=LAT_COL, lon_col=LON_COL, val_col=VAL_COL,
    device=DEVICE
)

I_sample = dwl.calculate_sample_periodogram_vectorized(J_vec)
taper_autocorr_grid = dwl.calculate_taper_autocorrelation_fft(taper_grid, n1, n2, DEVICE)


init_sigmasq   = 13.059
init_range_lat = 0.154 
init_range_lon = 0.195 
init_nugget    = 0.247
init_range_time = 1.28
init_advec_lat = 0.0218
init_advec_lon = -0.1689


init_phi2 = 1.0 / init_range_lon
init_phi1 = init_sigmasq * init_phi2
init_phi3 = (init_range_lon / init_range_lat)**2
init_phi4 = (init_range_lon / init_range_time)**2      # (range_lon / range_time)^2

initial_params_values = [
    np.log(init_phi1),    # [0] log_phi1
    np.log(init_phi2),    # [1] log_phi2
    np.log(init_phi3),    # [2] log_phi3
    np.log(init_phi4),    # [3] log_phi4
    init_advec_lat,       # [4] advec_lat (NOT log)
    init_advec_lon,       # [5] advec_lon (NOT log)
    np.log(init_nugget)   # [6] log_nugget
]

print(f"Starting with FIXED params (raw log-scale): {[round(p, 4) for p in initial_params_values]}")

params_list = [
    Parameter(torch.tensor([val], dtype=torch.float32))
    for val in initial_params_values
]


dwnll = dwl.whittle_likelihood_loss_tapered(
    params=torch.cat(params_list),
    I_sample=I_sample,
    n1=n1,
    n2=n2,
    p_time=p,
    taper_autocorr_grid=taper_autocorr_grid,
    delta1=DELTA_LAT,
    delta2=DELTA_LON
)

print(dwnll)

Using device: cpu
Pre-computing J-vector (Hamming taper)...
Pre-computing sample periodogram...
Pre-computing Hamming taper autocorrelation...
Starting with FIXED params (raw log-scale): [4.2042, 1.6348, 0.4721, -3.7632, 0.0218, -0.1689, -1.3984]
tensor(53177.4688, grad_fn=<SubBackward0>)


optimization adams

In [None]:
# =========================================================================
# 6. Main Execution Script (ðŸ’¥ 7-PARAM MULTIVARIATE ðŸ’¥)
# =========================================================================
if __name__ == '__main__':
    start_time = time.time()

    # --- Configuration ---
    DAY_TO_RUN = 1
    TAPERING_FUNC = dwl.cgn_hamming # Use Hamming taper
    NUM_RUNS = 1
    EPOCHS = 2000
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {DEVICE}")

    DELTA_LAT, DELTA_LON = 0.044, 0.063 

    LAT_COL, LON_COL = 0, 1
    VAL_COL = 2 # Spatially differenced value
    TIME_COL = 3
    lr = 0.01 

    # --- Load Spatially Differenced Data ---
    processed_df = db.generate_spatially_filtered_days(0,5,123,133)

    if DAY_TO_RUN > len(processed_df) or DAY_TO_RUN <= 0:
        print(f"Error: DAY_TO_RUN ({DAY_TO_RUN}) out of bounds.")
        exit()

    cur_df = processed_df
    

    # --- 1. Pre-compute J-vector, Taper Grid, and Taper Autocorrelation ---
    print("Pre-computing J-vector (Hamming taper)...")
    J_vec, n1, n2, p, taper_grid = dwl.generate_Jvector_tapered( 
        time_slices_list,
        tapering_func=TAPERING_FUNC, 
        lat_col=LAT_COL, lon_col=LON_COL, val_col=VAL_COL,
        device=DEVICE
    )

    if J_vec is None or J_vec.numel() == 0 or n1 == 0 or n2 == 0 or p == 0:
       print(f"Error: J-vector generation failed for Day {DAY_TO_RUN}.")
       exit()
       
    print("Pre-computing sample periodogram...")
    I_sample = dwl.calculate_sample_periodogram_vectorized(J_vec)

    print("Pre-computing Hamming taper autocorrelation...")
    taper_autocorr_grid = dwl.calculate_taper_autocorrelation_fft(taper_grid, n1, n2, DEVICE)

    if torch.isnan(I_sample).any() or torch.isinf(I_sample).any():
        print("Error: NaN/Inf in sample periodogram.")
        exit()
    if torch.isnan(taper_autocorr_grid).any() or torch.isinf(taper_autocorr_grid).any():
        print("Error: NaN/Inf in taper autocorrelation.")
        exit()

    print(f"Data grid: {n1}x{n2}, {p} time points. J-vector, Periodogram, Taper Autocorr on {DEVICE}.")

    # --- 2. Optimization Loop ---
    all_final_results = []
    all_final_losses = []

    for i in range(NUM_RUNS):
        print(f"\n{'='*30} Initialization Run {i+1}/{NUM_RUNS} {'='*30}")

        # --- 7-PARAMETER initialization ---
        ''' 
        init_sigmasq   = 15.0
        init_range_lat = 0.66 
        init_range_lon = 0.7 
        init_nugget    = 1.5
        init_beta      = 0.1  # Temporal range ratio
        init_advec_lat = 0.02
        init_advec_lon = -0.08
        '''
        init_sigmasq   = 13.059
        init_range_lat = 0.154 
        init_range_lon = 0.195 
        init_nugget    = 0.247
        init_range_time = 1.28
        init_advec_lat = 0.0218
        init_advec_lon = -0.1689


        
        init_phi2 = 1.0 / init_range_lon
        init_phi1 = init_sigmasq * init_phi2
        init_phi3 = (init_range_lon / init_range_lat)**2
        init_phi4 = (init_range_lon / init_range_time)**2      # (range_lon / range_time)^2

        initial_params_values = [
            np.log(init_phi1),    # [0] log_phi1
            np.log(init_phi2),    # [1] log_phi2
            np.log(init_phi3),    # [2] log_phi3
            np.log(init_phi4),    # [3] log_phi4
            init_advec_lat,       # [4] advec_lat (NOT log)
            init_advec_lon,       # [5] advec_lon (NOT log)
            np.log(init_nugget)   # [6] log_nugget
        ]
        
        print(f"Starting with FIXED params (raw log-scale): {[round(p, 4) for p in initial_params_values]}")

        params_list = [
            Parameter(torch.tensor([val], dtype=torch.float32))
            for val in initial_params_values
        ]


        optimizer = torch.optim.Adam(params_list, lr=lr)

        # --- ðŸ’¥ REVISED: Use Plateau Scheduler ðŸ’¥ ---
        scheduler = ReduceLROnPlateau(
            optimizer,
            mode='min',
            factor=0.5,
            patience=10, # Wait 10 epochs for improvement
            verbose=True
        )
        # --- END REVISION ---

        print(f"Starting optimization run {i+1} on device {DEVICE} (Hamming, 7-param ST kernel)...")

        nat_params_str, phi_params_str, raw_params_str, loss, epochs_run = dwl.run_full_tapered(
            params_list=params_list,
            optimizer=optimizer,
            scheduler=scheduler,
            I_sample=I_sample,
            n1=n1, n2=n2, p=p,
            taper_autocorr_grid=taper_autocorr_grid, 
            epochs=EPOCHS,
            device=DEVICE
        )
        
        if loss is not None:
            all_final_results.append((nat_params_str, phi_params_str, raw_params_str))
            all_final_losses.append(loss)
        else:
            all_final_losses.append(float('inf'))

    # --- ðŸ’¥ REVISED: Corrected f-string ðŸ’¥ ---
    print(f"\n\n{'='*25} Overall Result from Run {'='*25} {'='*25}")
    # --- END REVISION ---
    
    valid_losses = [l for l in all_final_losses if l is not None and l != float('inf')]

    if not valid_losses:
        print(f"The run failed or resulted in an invalid loss for Day {DAY_TO_RUN}.")
    else:
        best_loss = min(valid_losses)
        best_run_index = all_final_losses.index(best_loss)
        best_results = all_final_results[best_run_index]
        
        print(f"Best Run Loss: {best_loss} (after {epochs_run} epochs)")
        print(f"Final Parameters (Natural Scale): {best_results[0]}")
        print(f"Final Parameters (Phi Scale)    : {best_results[1]}")
        print(f"Final Parameters (Raw Log Scale): {best_results[2]}")

    end_time = time.time()
    print(f"\nTotal execution time: {end_time - start_time:.2f} seconds")

optimization lbfgs

In [8]:

dwl = debiased_whittle.debiased_whittle_likelihood()
if __name__ == '__main__':
    start_time = time.time()

    # --- Configuration ---
    DAY_TO_RUN = 1
    TAPERING_FUNC = dwl.cgn_hamming # Use Hamming taper
    NUM_RUNS = 1
    MAX_STEPS = 20 # L-BFGS usually converges in far fewer steps
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {DEVICE}")

    DELTA_LAT, DELTA_LON = 0.044, 0.063 

    LAT_COL, LON_COL = 0, 1
    VAL_COL = 2 # Spatially differenced value
    TIME_COL = 3

    # --- Load Spatially Differenced Data ---
    processed_df = db.generate_spatially_filtered_days(0,5,123,133)

    if DAY_TO_RUN > len(processed_df) or DAY_TO_RUN <= 0:
        print(f"Error: DAY_TO_RUN ({DAY_TO_RUN}) out of bounds.")
        exit()

    cur_df = processed_df
    
    if cur_df.numel() == 0 or cur_df.shape[1] <= max(LAT_COL, LON_COL, VAL_COL, TIME_COL):
        print(f"Error: Data for Day {DAY_TO_RUN} is empty or invalid.")
        exit()

    unique_times = torch.unique(cur_df[:, TIME_COL])
    time_slices_list = [cur_df[cur_df[:, TIME_COL] == t_val] for t_val in unique_times]

    # --- 1. Pre-compute J-vector, Taper Grid, and Taper Autocorrelation ---
    print("Pre-computing J-vector (Hamming taper)...")
    
    # --- ðŸ’¥ REVISED: Renamed 'p' to 'p_time' ðŸ’¥ ---
    J_vec, n1, n2, p_time, taper_grid = dwl.generate_Jvector_tapered( 
        time_slices_list,
        tapering_func=TAPERING_FUNC, 
        lat_col=LAT_COL, lon_col=LON_COL, val_col=VAL_COL,
        device=DEVICE
    )

    if J_vec is None or J_vec.numel() == 0 or n1 == 0 or n2 == 0 or p_time == 0:
       print(f"Error: J-vector generation failed for Day {DAY_TO_RUN}.")
       exit()
       
    print("Pre-computing sample periodogram...")
    I_sample = dwl.calculate_sample_periodogram_vectorized(J_vec)

    print("Pre-computing Hamming taper autocorrelation...")
    taper_autocorr_grid = dwl.calculate_taper_autocorrelation_fft(taper_grid, n1, n2, DEVICE)

    if torch.isnan(I_sample).any() or torch.isinf(I_sample).any():
        print("Error: NaN/Inf in sample periodogram.")
        exit()
    if torch.isnan(taper_autocorr_grid).any() or torch.isinf(taper_autocorr_grid).any():
        print("Error: NaN/Inf in taper autocorrelation.")
        exit()

    print(f"Data grid: {n1}x{n2}, {p_time} time points. J-vector, Periodogram, Taper Autocorr on {DEVICE}.")
    # --- END REVISION ---

    # --- 2. Optimization Loop ---
    all_final_results = []
    all_final_losses = []

    for i in range(NUM_RUNS):
        print(f"\n{'='*30} Initialization Run {i+1}/{NUM_RUNS} {'='*30}")

        # --- 7-PARAMETER initialization ---
        ''' 
        init_sigmasq   = 15.0
        init_range_lat = 0.66 
        init_range_lon = 0.7 
        init_nugget    = 1.5
        init_beta      = 0.1  # Temporal range ratio
        init_advec_lat = 0.02
        init_advec_lon = -0.08
        '''
        init_sigmasq   = 13.059
        init_range_lat = 0.154 
        init_range_lon = 0.195 
        init_nugget    = 0.247
        init_range_time = 1.28
        init_advec_lat = 0.0218
        init_advec_lon = -0.1689
        
        init_phi2 = 1.0 / init_range_lon
        init_phi1 = init_sigmasq * init_phi2
        init_phi3 = (init_range_lon / init_range_lat)**2
        # Change needed to match the spatial-temporal distance formula:
        init_phi4 = (init_range_lon / init_range_time)**2      # (range_lon / range_time)^2

        initial_params_values = [
            np.log(init_phi1),    # [0] log_phi1
            np.log(init_phi2),    # [1] log_phi2
            np.log(init_phi3),    # [2] log_phi3
            np.log(init_phi4),    # [3] log_phi4
            init_advec_lat,       # [4] advec_lat (NOT log)
            init_advec_lon,       # [5] advec_lon (NOT log)
            np.log(init_nugget)   # [6] log_nugget
        ]
        
        print(f"Starting with FIXED params (raw log-scale): {[round(p, 4) for p in initial_params_values]}")

        params_list = [
            Parameter(torch.tensor([val], dtype=torch.float32))
            for val in initial_params_values
        ]

        # Helper to define the boundary globally for clarity
        NUGGET_LOWER_BOUND = 0.05
        LOG_NUGGET_LOWER_BOUND = np.log(NUGGET_LOWER_BOUND) # Approx -2.9957

        # --- ðŸ’¥ REVISED: Use L-BFGS Optimizer ðŸ’¥ ---
        optimizer = torch.optim.LBFGS(
            params_list,
            lr=1.0,           # Initial step length for line search
            max_iter=20,      # Iterations per step
            history_size=100,
            line_search_fn="strong_wolfe", # Often more robust
            tolerance_grad=1e-5
        )
        # --- END REVISION ---

        print(f"Starting optimization run {i+1} on device {DEVICE} (Hamming, 7-param ST kernel, L-BFGS)...")
        
        # --- ðŸ’¥ REVISED: Call L-BFGS trainer, pass p_time ðŸ’¥ ---
        nat_params_str, phi_params_str, raw_params_str, loss, steps_run = dwl.run_lbfgs_tapered(
            params_list=params_list,
            optimizer=optimizer,
            I_sample=I_sample,
            n1=n1, n2=n2, p_time=p_time,
            taper_autocorr_grid=taper_autocorr_grid, 
            max_steps=MAX_STEPS,
            device=DEVICE
        )
        # --- END REVISION ---
        
        if loss is not None:
            all_final_results.append((nat_params_str, phi_params_str, raw_params_str))
            all_final_losses.append(loss)
        else:
            all_final_losses.append(float('inf'))

    print(f"\n\n{'='*25} Overall Result from Run {'='*25} {'='*25}")
    
    valid_losses = [l for l in all_final_losses if l is not None and l != float('inf')]

    if not valid_losses:
        print(f"The run failed or resulted in an invalid loss for Day {DAY_TO_RUN}.")
    else:
        best_loss = min(valid_losses)
        best_run_index = all_final_losses.index(best_loss)
        best_results = all_final_results[best_run_index]
        
        print(f"Best Run Loss: {best_loss} (after {steps_run} steps)")
        print(f"Final Parameters (Natural Scale): {best_results[0]}")
        print(f"Final Parameters (Phi Scale)    : {best_results[1]}")
        print(f"Final Parameters (Raw Log Scale): {best_results[2]}")

    end_time = time.time()
    print(f"\nTotal execution time: {end_time - start_time:.2f} seconds")

Using device: cpu
Pre-computing J-vector (Hamming taper)...
Pre-computing sample periodogram...
Pre-computing Hamming taper autocorrelation...
Data grid: 56x158, 8 time points. J-vector, Periodogram, Taper Autocorr on cpu.

Starting with FIXED params (raw log-scale): [4.2042, 1.6348, 0.4721, -3.7632, 0.0218, -0.1689, -1.3984]
Starting optimization run 1 on device cpu (Hamming, 7-param ST kernel, L-BFGS)...
--- Step 1/20 ---
 Loss: 6.010791 | Max Grad: 6.445593e-02
  Params (Raw Log): log_phi1: 4.1671, log_phi2: 1.7765, log_phi3: 2.4568, log_phi4: -3.1983, advec_lat: 0.0066, advec_lon: -0.1505, log_nugget: -1.1856
--- Step 2/20 ---
 Loss: 3.877474 | Max Grad: 2.119631e-03
  Params (Raw Log): log_phi1: 3.8108, log_phi2: 1.4086, log_phi3: 2.7349, log_phi4: -3.0026, advec_lat: 0.0101, advec_lon: -0.1671, log_nugget: 0.1676
--- Step 3/20 ---
 Loss: 3.846543 | Max Grad: 2.119631e-03
  Params (Raw Log): log_phi1: 3.8108, log_phi2: 1.4086, log_phi3: 2.7349, log_phi4: -3.0026, advec_lat: 0.0101