In [1]:
# Standard libraries
import sys
# Add your custom path
gems_tco_path = "/Users/joonwonlee/Documents/GEMS_TCO-1/src"
sys.path.append(gems_tco_path)
import logging
import argparse # Argument parsing

# Data manipulation and analysis
import pandas as pd
import numpy as np
import pickle
import torch
import torch.optim as optim
import copy                    # clone tensor
import time

# Custom imports
import GEMS_TCO
from GEMS_TCO import kernels
from GEMS_TCO import data_preprocess 
from GEMS_TCO import kernels_new 
from GEMS_TCO import orderings as _orderings 
from GEMS_TCO import load_data
from GEMS_TCO import alg_optimization, alg_opt_Encoder
from GEMS_TCO import configuration as config

from typing import Optional, List, Tuple
from pathlib import Path
import typer
import json
from json import JSONEncoder

from GEMS_TCO.data_loader import load_data2

In [2]:
from pathlib import Path
# Assuming 'config' and 'load_data' class are defined and imported elsewhere

# --- Parameters derived from your framework ---
v: float = 0.5
space: List[str] = ['4', '4']
days: List[str] = ['0', '31']
mm_cond_number: int = 20
# --- End of framework parameters ---

lat_lon_resolution = [int(s) for s in space]
days_s_e = [int(d) for d in days]
days_list = list(range(days_s_e[0], days_s_e[1]))

# These values were not in the framework, so they remain as set in your snippet
years = ['2024']
month_range = [7] 

# Assuming 'config' is available in your environment
output_path = input_path = Path(config.mac_estimates_day_path)

## load ozone data from amarel
data_load_instance = load_data2(config.mac_data_load_path)

# Call the function using the variables from the framework
df_map, ord_mm, nns_map = data_load_instance.load_maxmin_ordered_data_bymonthyear(
lat_lon_resolution=lat_lon_resolution, 
mm_cond_number=mm_cond_number,
years_=years, 
months_=month_range,
lat_range=[0.0, 5.0],      # <-- Add this
lon_range=[123.0, 133.0]   # <-- Add this
)

Subsetting data to lat: [0.0, 5.0], lon: [123.0, 133.0]


In [3]:
# List to hold the aggregated tensor for each day
daily_aggregated_tensors = [] 

# List to hold the map (dict) of hourly data for each day
daily_hourly_maps = []        

for day_index in range(31):
    # Calculate the start and end indices for the 8 hours of data
    hour_start_index = day_index * 8
    #hour_end_index = (day_index + 1) * 8
    hour_end_index = day_index*8 + 1
    hour_indices = [hour_start_index, hour_end_index]
    
    # Load the data for the current day
    day_hourly_map, day_aggregated_tensor = data_load_instance.load_working_data(
        df_map, 
        hour_indices, 
        ord_mm=None,  
        dtype=torch.float 
    )
    
    # Append the day's data to their respective lists
    daily_aggregated_tensors.append(day_aggregated_tensor)
    daily_hourly_maps.append(day_hourly_map) 

print(daily_aggregated_tensors[0].shape)
#print(daily_hourly_maps[0])

torch.Size([1120, 4])


In [4]:
instance1 = kernels.vecchia_experiment(0.5, daily_hourly_maps[0], daily_aggregated_tensors[0], nns_map, mm_cond_number, nheads=10)

a = [21.303, 1.307, 1.563, 0.022, -0.144, 0.198, 4.769]

early_stop_vecc_5000= [2.8770e+01, 9.6e-01, 1.08e+00, 6.6789e-07, 1.13e-06, 00,1.91e+00]
scheduler_vecc_5000= [30.823, 1.325, 2.30, 5.191e-06, -9.755e-07, 0.0, 3.17]

a = early_stop_vecc_5000 #  2174.9120
a = [2.9527e+01, 1.3223e+00, 2.2772e+00, 1.3499e-05, 5.9777e-06, 0.0000e+00,
        3.2343e+00] # 2108.4802


params = torch.tensor(a, dtype=torch.float64, requires_grad=True)
cov_map = instance1.cov_structure_saver(params, instance1.matern_cov_anisotropy_v05)  
instance1.vecchia_oct22( params, instance1.matern_cov_anisotropy_v05, cov_map )


tensor(2108.4801, dtype=torch.float64, grad_fn=<AddBackward0>)

In [5]:
v = 0.5 # smooth
mm_cond_number = 20
nheads = 1120
#nheads = 1230
lr = 0.01
step = 30
gamma_par = 0.5
epochs = 1000

# Optimization adams

In [None]:
import torch
import numpy as np
import time
# (Import your fit_vecchia_adams class and other dependencies here)
# from kernels_new import fit_vecchia_adams 

# --- 1. Load Data (Assumed to be done) ---
# daily_hourly_maps = ...
# daily_aggregated_tensors = ...
# nns_map = ...
# v, mm_cond_number, nheads = 0.5, 20, 10
# lat_lon_resolution = [1.0, 1.0] # Example
# lr, step, gamma_par, epochs = 0.01, 40, 0.5, 100 # Example
# device = 'cuda' if torch.cuda.is_available() else 'cpu'


# --- 2. Run optimization loop over pre-loaded data ---
day_indices = [0] 
for day_idx in day_indices:  

    daily_hourly_map = daily_hourly_maps[day_idx]
    daily_aggregated_tensor = daily_aggregated_tensors[day_idx]

    # --- Correct Parameter Initialization ---
    # This block maps your model's physical parameters (sigmasq, range)
    # to the 'phi' parameterization used by your 'log_reparam' covariance function.
    
    # Define initial *model* parameters
    init_sigmasq   = 30.0
    init_range_lat = 0.66 
    init_range_lon = 0.7 
    init_advec_lat = 0
    init_advec_lon = 0
    init_beta      = 0
    init_nugget    = 1.5
    
    # Map model parameters to the 'phi' reparameterization
    init_phi2 = 1.0 / init_range_lon                # [1] phi2 = 1 / range_lon
    init_phi1 = init_sigmasq * init_phi2            # [0] phi1 = sigmasq * phi2 = sigmasq / range_lon
    init_phi3 = (init_range_lon / init_range_lat)**2  # [2] phi3 = (range_lon / range_lat)^2
    init_phi4 = init_beta**2                      # [3] phi4 = beta^2
    
    # Initialize the parameter list by taking the log(),
    # because the covariance function uses torch.exp().
    # 
    device_str = 'cuda' if torch.cuda.is_available() else 'cpu'

    # ðŸ’¥ CRITICAL FIX: Use dtype=torch.float64 to match your model's internal type.
    params_list = [
        torch.tensor([np.log(init_phi1)],      requires_grad=True, dtype=torch.float64, device=device_str ), # [0] log(phi1)
        torch.tensor([np.log(init_phi2)],      requires_grad=True, dtype=torch.float64, device=device_str ), # [1] log(phi2)
        torch.tensor([np.log(init_phi3)],      requires_grad=True, dtype=torch.float64, device=device_str ), # [2] log(phi3)
        torch.tensor([np.log(init_phi4)],      requires_grad=True, dtype=torch.float64, device=device_str ), # [3] log(phi4)
        torch.tensor([init_advec_lat],         requires_grad=True, dtype=torch.float64, device=device_str ), # [4] advec_lat (linear)
        torch.tensor([init_advec_lon],         requires_grad=True, dtype=torch.float64, device=device_str ), # [5] advec_lon (linear)
        torch.tensor([np.log(init_nugget)],    requires_grad=True, dtype=torch.float64, device=device_str )  # [6] log(nugget)
    ]

    # --- Define learning rates and parameter groups ---
    lr_slow, lr_fast = 0.005, 0.01 
    slow_indices = [ 3, 4, 5] # ranges, anisotropy, advection, beta, nugget
    fast_indices = [0,1,2,6]                 # sigmasq (via phi1)
    
    param_groups = [
        {'params': [params_list[idx] for idx in slow_indices], 'lr': lr_slow, 'name': 'slow_group'},
        {'params': [params_list[idx] for idx in fast_indices], 'lr': lr_fast, 'name': 'fast_group'}
    ]

    # --- Print Job Info ---
    # ðŸ’¥ FIX: Changed 'day' to 'day_idx'
    res_calc = (113 // lat_lon_resolution[0]) * (158 // lat_lon_resolution[0]) 
    print(f'\n--- Starting Day {day_idx+1} (2024-07-{day_idx+1}) ---')
    print(f'Data size per day: { res_calc }, smooth: {v}')
    print(f'mm_cond_number: {mm_cond_number},\ninitial parameters: \n {params_list}')
            
    # --- Instantiate the Correct Class ---
    # ðŸ’¥ FIX: Use the 'fit_vecchia_adams' class you provided
    model_instance = kernels_new.fit_vecchia_adams(
            smooth = v,
            input_map = daily_hourly_map,        # ðŸ’¥ FIX: Use correct loop variable
            aggregated_data = daily_aggregated_tensor, # ðŸ’¥ FIX: Use correct loop variable
            nns_map = nns_map,
            mm_cond_number = mm_cond_number,
            nheads = nheads
        )

    start_time = time.time()
    
    # --- Call the Correct Optimizer Method ---
    # ðŸ’¥ FIX: Method name is 'set_optimizer'
    optimizer, scheduler = model_instance.set_optimizer(
            param_groups,     
            lr=lr,            
            betas=(0.9, 0.99), 
            eps=1e-8, 
            step_size=step, 
            gamma=gamma_par
        )

    # --- Call the Correct Fit Method ---
    # ðŸ’¥ FIX: Method name is 'fit_vecc_scheduler_oct23'
    # ðŸ’¥ FIX: Pass the model's *actual* covariance function.
    # (This assumes you've fixed the 'raw_params' -> 'params' TypeError 
    # in the 'SpatioTemporalModel' base class, as recommended previously).
    out, epoch_ran = model_instance.fit_vecc_scheduler_oct23(
            params_list,
            optimizer,
            scheduler, 
            model_instance.matern_cov_aniso_STABLE_log_reparam, # Pass the *actual* method
            epochs=epochs
        )

    end_time = time.time()
    epoch_time = end_time - start_time
    
    # ðŸ’¥ FIX: Changed 'day' to 'day_idx'
    print(f"Day {day_idx+1} optimization finished in {epoch_time:.2f}s over {epoch_ran+1} epochs.")
    print(f"Day {day_idx+1} final results: {out}")

  torch.tensor([np.log(init_phi4)],      requires_grad=True, dtype=torch.float64, device=device_str ), # [3] log(phi4)



--- Starting Day 1 (2024-07-1) ---
Data size per day: 1092, smooth: 0.5
mm_cond_number: 20,
initial parameters: 
 [tensor([3.7579], dtype=torch.float64, requires_grad=True), tensor([0.3567], dtype=torch.float64, requires_grad=True), tensor([0.1177], dtype=torch.float64, requires_grad=True), tensor([-inf], dtype=torch.float64, requires_grad=True), tensor([1.0000e-08], dtype=torch.float64, requires_grad=True), tensor([0.], dtype=torch.float64, requires_grad=True), tensor([0.4055], dtype=torch.float64, requires_grad=True)]
--- Epoch 1 / Loss: 1814.081401 ---
  Param 0: Value=3.7579, Grad=113.39924778492197
  Param 1: Value=0.3567, Grad=-27.078598333934362
  Param 2: Value=0.1177, Grad=2.1541809744317857
  Param 3: Value=-inf, Grad=0.0
  Param 4: Value=0.0000, Grad=5.684341886080801e-13
  Param 5: Value=0.0000, Grad=1.1937117960769683e-12
  Param 6: Value=0.4055, Grad=11.37839477560694
  Max Abs Grad: 1.133992e+02
------------------------------
--- Epoch 11 / Loss: 1801.280129 ---
  Param

In [8]:
a = [3.3570243666454123, 0.7059960074175937, 0.6296135066755064, 0, -9.65072669686697e-07, -2.9337593169557848e-06, 0.48825588183811414 ]

def cal(a):
    day_indices = [0] 
    for day_idx in day_indices:  

        daily_hourly_map = daily_hourly_maps[day_idx]
        daily_aggregated_tensor = daily_aggregated_tensors[day_idx]


        # Initial parameters (full vector 'a' is for reference)

        # NEW: Define params as a list of 1-element tensors (one per parameter)
        params_list = [
            torch.tensor([val], dtype=torch.float64, requires_grad=True) for val in a
        ]

        model_instance = kernels_new.fit_vecchia_adams(
                smooth = v,
                input_map = daily_hourly_map,        # ðŸ’¥ FIX: Use correct loop variable
                aggregated_data = daily_aggregated_tensor, # ðŸ’¥ FIX: Use correct loop variable
                nns_map = nns_map,
                mm_cond_number = mm_cond_number,
                nheads = nheads
            )
        
        bb= model_instance.full_likelihood( params_list, daily_aggregated_tensor,daily_aggregated_tensor[:,2], model_instance.matern_cov_aniso_STABLE_log_reparam)
        return bb

cal(a)

tensor(1779.9191, dtype=torch.float64, grad_fn=<MulBackward0>)

# LBFGS

In [7]:

from kernels_new import fit_vecchia_lbfgs 


max_lbfgs_steps = 50 # Max steps for the L-BFGS optimizer
device = 'cuda' if torch.cuda.is_available() else 'cpu'


# --- 2. Run optimization loop over pre-loaded data ---
day_indices = [0] 
for day_idx in day_indices:  

    daily_hourly_map = daily_hourly_maps[day_idx]
    daily_aggregated_tensor = daily_aggregated_tensors[day_idx]

    # --- Correct Parameter Initialization ---
    # ... (Parameter initialization code remains the same) ...
    init_sigmasq   = 30.0
    init_range_lat = 0.66 
    init_range_lon = 0.7 
    init_advec_lat = 0
    init_advec_lon = 0
    init_beta      = 0
    init_nugget    = 1.5
    
    init_phi2 = 1.0 / init_range_lon
    init_phi1 = init_sigmasq * init_phi2
    init_phi3 = (init_range_lon / init_range_lat)**2
    init_phi4 = init_beta**2
    
    device_str = 'cuda' if torch.cuda.is_available() else 'cpu'

    params_list = [
        torch.tensor([np.log(init_phi1)],      requires_grad=True, dtype=torch.float64, device=device_str ),
        torch.tensor([np.log(init_phi2)],      requires_grad=True, dtype=torch.float64, device=device_str ),
        torch.tensor([np.log(init_phi3)],      requires_grad=True, dtype=torch.float64, device=device_str ),
        torch.tensor([np.log(init_phi4)],      requires_grad=True, dtype=torch.float64, device=device_str ),
        torch.tensor([init_advec_lat],         requires_grad=True, dtype=torch.float64, device=device_str ),
        torch.tensor([init_advec_lon],         requires_grad=True, dtype=torch.float64, device=device_str ),
        torch.tensor([np.log(init_nugget)],    requires_grad=True, dtype=torch.float64, device=device_str )
    ]

    # --- ðŸ’¥ FIX: Define a SINGLE parameter group for L-BFGS ---
    # L-BFGS does not support per-parameter options or different LRs.
    # Pass all parameters in one list.
    param_groups = [{'params': params_list}]
    # --- End Fix ---

    # --- Print Job Info ---
    res_calc = (113 // lat_lon_resolution[0]) * (158 // lat_lon_resolution[0]) 
    print(f'\n--- Starting Day {day_idx+1} (2024-07-{day_idx+1}) ---')
    print(f'Data size per day: { res_calc }, smooth: {v}')
    print(f'mm_cond_number: {mm_cond_number},\ninitial parameters: \n {params_list}')
            
    # --- 1. Instantiate the L-BFGS Class ---
    model_instance = kernels_new.fit_vecchia_lbfgs(
            smooth = v,
            input_map = daily_hourly_map,
            aggregated_data = daily_aggregated_tensor, 
            nns_map = nns_map,
            mm_cond_number = mm_cond_number,
            nheads = nheads
        )

    start_time = time.time()
    
    # --- 2. Call the L-BFGS Optimizer Method ---
    # The 'param_groups' variable now correctly contains only one group.
    optimizer = model_instance.set_optimizer(
            param_groups,     
            lr=1.0,            # Standard initial step length for L-BFGS
            max_iter=10,       # Line search iterations per step
            tolerance_grad=1e-4
        )

    # --- 3. Call the L-BFGS Fit Method ---
    out, steps_ran = model_instance.fit_vecc_lbfgs(
            params_list,
            optimizer,
            model_instance.matern_cov_aniso_STABLE_log_reparam, # Pass the *actual* method
            max_steps=max_lbfgs_steps
        )

    end_time = time.time()
    epoch_time = end_time - start_time
    
    # --- 4. Updated Print Statement ---
    print(f"Day {day_idx+1} optimization finished in {epoch_time:.2f}s over {steps_ran+1} steps.")
    print(f"Day {day_idx+1} final results: {out}")

  torch.tensor([np.log(init_phi4)],      requires_grad=True, dtype=torch.float64, device=device_str ),



--- Starting Day 1 (2024-07-1) ---
Data size per day: 1092, smooth: 0.5
mm_cond_number: 20,
initial parameters: 
 [tensor([3.7579], dtype=torch.float64, requires_grad=True), tensor([0.3567], dtype=torch.float64, requires_grad=True), tensor([0.1177], dtype=torch.float64, requires_grad=True), tensor([-inf], dtype=torch.float64, requires_grad=True), tensor([0.], dtype=torch.float64, requires_grad=True), tensor([0.], dtype=torch.float64, requires_grad=True), tensor([0.4055], dtype=torch.float64, requires_grad=True)]
--- Starting L-BFGS Optimization ---
--- Step 1/50 / Loss: 2203.453133 ---
  Param 0: Value=2.3953, Grad=29.797448955188386
  Param 1: Value=-1.1863, Grad=6.795424181454637
  Param 2: Value=-0.2210, Grad=13.80491538480854
  Param 3: Value=-inf, Grad=0.0
  Param 4: Value=-0.0000, Grad=4.3834380569762743e-13
  Param 5: Value=0.0000, Grad=4.4221570849600766e-13
  Param 6: Value=1.5276, Grad=-4.824017268116008
  Max Abs Grad: 2.979745e+01
------------------------------
--- Step 2/

In [None]:
a = [53.56, 17.09, 8.19, 0, 0, 0, 6.08]



# fix time only

adam

In [7]:
import torch
import numpy as np
import time
# (Import your fit_vecchia_adams class and other dependencies here)
# from kernels_new import fit_vecchia_adams 

# --- 1. Load Data (Assumed to be done) ---
# daily_hourly_maps = ...
# daily_aggregated_tensors = ...
# nns_map = ...
# v, mm_cond_number, nheads = 0.5, 20, 10
# lat_lon_resolution = [1.0, 1.0] # Example
# lr, step, gamma_par, epochs = 0.01, 40, 0.5, 100 # Example
# device = 'cuda' if torch.cuda.is_available() else 'cpu'


nheads=300
# --- 2. Run optimization loop over pre-loaded data ---
day_indices = [0] 
for day_idx in day_indices:  

    daily_hourly_map = daily_hourly_maps[day_idx]
    daily_aggregated_tensor = daily_aggregated_tensors[day_idx]

    # --- Correct Parameter Initialization ---
    init_sigmasq   = 25
    init_range_lat = 0.66 
    init_range_lon = 0.7 
    init_advec_lat = 0  # Will be fixed
    init_advec_lon = 0      # Will be fixed
    init_beta      = 0      # Will be fixed
    init_nugget    = 1.5
    
    # Map model parameters to the 'phi' reparameterization
    init_phi2 = 1.0 / init_range_lon                # [1] phi2 = 1 / range_lon
    init_phi1 = init_sigmasq * init_phi2            # [0] phi1 = sigmasq * phi2 = sigmasq / range_lon
    init_phi3 = (init_range_lon / init_range_lat)**2  # [2] phi3 = (range_lon / range_lat)^2
    
    # ðŸ’¥ FIX: Add epsilon to prevent log(0) since init_beta is 0
    init_phi4 = init_beta**2 # + 1e-9                 # [3] phi4 = beta^2
    
    device_str = 'cuda' if torch.cuda.is_available() else 'cpu'

    # ðŸ’¥ FIX: Set requires_grad=False for fixed temporal parameters
    params_list = [
        # --- Spatial & Nugget Parameters (Optimized) ---
        torch.tensor([np.log(init_phi1)],      requires_grad=True,  dtype=torch.float64, device=device_str ), # [0] log(phi1)
        torch.tensor([np.log(init_phi2)],      requires_grad=True,  dtype=torch.float64, device=device_str ), # [1] log(phi2)
        torch.tensor([np.log(init_phi3)],      requires_grad=True,  dtype=torch.float64, device=device_str ), # [2] log(phi3)
        
        # --- Temporal Parameters (Fixed) ---
        torch.tensor([np.log(init_phi4)],      requires_grad=False, dtype=torch.float64, device=device_str ), # [3] log(phi4)
        torch.tensor([init_advec_lat],         requires_grad=False, dtype=torch.float64, device=device_str ), # [4] advec_lat (linear)
        torch.tensor([init_advec_lon],         requires_grad=False, dtype=torch.float64, device=device_str ), # [5] advec_lon (linear)
        
        # --- Spatial & Nugget Parameters (Optimized) ---
        torch.tensor([np.log(init_nugget)],    requires_grad=True,  dtype=torch.float64, device=device_str )  # [6] log(nugget)
    ]

    # --- ðŸ’¥ FIX: Define parameter groups for *optimizable* parameters only ---
    lr_slow, lr_fast = 0.005, 0.02
    
    # These are the indices of the parameters we still want to optimize
    fast_indices = [0, 1, 2, 6] 
    
    param_groups = [
        # The 'slow_group' is removed as it only contained fixed parameters
        {'params': [params_list[idx] for idx in fast_indices], 'lr': lr_fast, 'name': 'fast_group'}
    ]
    # --- End Fix ---

    # --- Print Job Info ---
    res_calc = (113 // lat_lon_resolution[0]) * (158 // lat_lon_resolution[0]) 
    print(f'\n--- Starting Day {day_idx+1} (2024-07-{day_idx+1}) ---')
    print(f'Data size per day: { res_calc }, smooth: {v}')
    print(f'mm_cond_number: {mm_cond_number},\ninitial parameters: \n {params_list}')
            
    # --- Instantiate the Correct Class ---
    model_instance = kernels_new.fit_vecchia_adams(
            smooth = v,
            input_map = daily_hourly_map,
            aggregated_data = daily_aggregated_tensor,
            nns_map = nns_map,
            mm_cond_number = mm_cond_number,
            nheads = nheads
        )

    start_time = time.time()
    
    # --- Call the Correct Optimizer Method ---
    # 'param_groups' now correctly contains only the 4 optimizable parameters
    optimizer, scheduler = model_instance.set_optimizer(
            param_groups,     
            lr=lr,            
            betas=(0.9, 0.99), 
            eps=1e-8, 
            step_size=step, 
            gamma=gamma_par
        )

    # --- Call the Correct Fit Method ---
    # We still pass the *full* params_list, as the NLL function
    # needs all 7 values (even the fixed ones).
    out, epoch_ran = model_instance.fit_vecc_scheduler_oct23(
            params_list,
            optimizer,
            scheduler, 
            model_instance.matern_cov_aniso_STABLE_log_reparam, 
            epochs=epochs
        )

    end_time = time.time()
    epoch_time = end_time - start_time
    
    print(f"Day {day_idx+1} optimization finished in {epoch_time:.2f}s over {epoch_ran+1} epochs.")
    print(f"Day {day_idx+1} final results: {out}")

    # --- ðŸ’¥ START: Print Original Parameter Values ðŸ’¥ ---
    
    # 'out' is the list returned from the fit function: [raw_param_0, ..., raw_param_6, final_loss]
    # We extract just the raw parameters by slicing off the last element (the loss).
    final_raw_params_list = out[:-1] 
    
    # Use the helper function to convert them
    interpretable_params = model_instance._convert_raw_params_to_interpretable(final_raw_params_list)
    
    # Print the raw list first
    print(f"Day {day_idx+1} final raw results (params + loss): {out}")
    
    # Pretty-print the interpretable dictionary
    print(f"Day {day_idx+1} final interpretable parameters:")
    if interpretable_params:
        for key, val in interpretable_params.items():
            # The :10s aligns the keys for readability
            print(f'    {key:10s}: {val:.6f}')
    # --- ðŸ’¥ END ðŸ’¥ ---

  torch.tensor([np.log(init_phi4)],      requires_grad=False, dtype=torch.float64, device=device_str ), # [3] log(phi4)



--- Starting Day 1 (2024-07-1) ---
Data size per day: 1092, smooth: 0.5
mm_cond_number: 20,
initial parameters: 
 [tensor([3.5756], dtype=torch.float64, requires_grad=True), tensor([0.3567], dtype=torch.float64, requires_grad=True), tensor([0.1177], dtype=torch.float64, requires_grad=True), tensor([-inf], dtype=torch.float64), tensor([0.], dtype=torch.float64), tensor([0.], dtype=torch.float64), tensor([0.4055], dtype=torch.float64, requires_grad=True)]
--- Epoch 1 / Loss: 1955.397753 ---
  Param 0: Value=3.5756, Grad=194.8455366654497
  Param 1: Value=0.3567, Grad=-146.05639413948617
  Param 2: Value=0.1177, Grad=5.137603578596103
  Param 3: Value=-inf, Grad=N/A
  Param 4: Value=0.0000, Grad=N/A
  Param 5: Value=0.0000, Grad=N/A
  Param 6: Value=0.4055, Grad=16.35748364033666
  Max Abs Grad: 1.948455e+02
------------------------------
--- Epoch 11 / Loss: 1900.151211 ---
  Param 0: Value=3.3803, Grad=95.30642370266075
  Param 1: Value=0.5542, Grad=-94.57488273295881
  Param 2: Value=

In [10]:
from kernels_new import fit_vecchia_lbfgs 
import numpy as np # Make sure numpy is imported
import torch # Make sure torch is imported

max_lbfgs_steps = 10 # Max steps for the L-BFGS optimizer
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# (Assuming other variables like lat_lon_resolution, v, mm_cond_number, nheads are defined)

# --- 2. Run optimization loop over pre-loaded data ---
day_indices = [0] 
for day_idx in day_indices:  

    daily_hourly_map = daily_hourly_maps[day_idx]
    daily_aggregated_tensor = daily_aggregated_tensors[day_idx]

    # --- Correct Parameter Initialization ---
    init_sigmasq   = 30.0
    init_range_lat = 1
    init_range_lon = 1.5 
    init_advec_lat = 0.0 # Will be fixed
    init_advec_lon = 0.0 # Will be fixed
    init_beta      = 0.0 # Will be fixed
    init_nugget    = 1.5
    
    init_phi2 = 1.0 / init_range_lon
    init_phi1 = init_sigmasq * init_phi2
    init_phi3 = (init_range_lon / init_range_lat)**2
    
    # ðŸ’¥ 1. Add epsilon to prevent log(0) when init_beta is 0
    init_phi4 = init_beta**2 + 1e-9 
    
    device_str = 'cuda' if torch.cuda.is_available() else 'cpu'

    # ðŸ’¥ 2. Fix temporal parameters by setting requires_grad=False
    params_list = [
        # --- Spatial Parameters (Optimized) ---
        torch.tensor([np.log(init_phi1)],      requires_grad=True,  dtype=torch.float64, device=device_str ), # [0] log(phi1)
        torch.tensor([np.log(init_phi2)],      requires_grad=True,  dtype=torch.float64, device=device_str ), # [1] log(phi2)
        torch.tensor([np.log(init_phi3)],      requires_grad=True,  dtype=torch.float64, device=device_str ), # [2] log(phi3)
        
        # --- Temporal Parameters (Fixed) ---
        torch.tensor([np.log(init_phi4)],      requires_grad=False, dtype=torch.float64, device=device_str ), # [3] log(phi4)
        torch.tensor([init_advec_lat],         requires_grad=False, dtype=torch.float64, device=device_str ), # [4] advec_lat
        torch.tensor([init_advec_lon],         requires_grad=False, dtype=torch.float64, device=device_str ), # [5] advec_lon
        
        # --- Nugget (Optimized) ---
        torch.tensor([np.log(init_nugget)],    requires_grad=True,  dtype=torch.float64, device=device_str )  # [6] log(nugget)
    ]

    # --- ðŸ’¥ 3. Create a list of *only* the parameters to be optimized ---
    params_to_optimize = [p for p in params_list if p.requires_grad]
    
    # Pass only the optimizable parameters to L-BFGS
    param_groups = [{'params': params_to_optimize}]
    # --- End Fix ---

    # --- Print Job Info ---
    res_calc = (113 // lat_lon_resolution[0]) * (158 // lat_lon_resolution[0]) 
    print(f'\n--- Starting Day {day_idx+1} (2024-07-{day_idx+1}) ---')
    print(f'Data size per day: { res_calc }, smooth: {v}')
    print(f'mm_cond_number: {mm_cond_number},\ninitial parameters: \n {params_list}')
            
    # --- 1. Instantiate the L-BFGS Class ---
    model_instance = kernels_new.fit_vecchia_lbfgs(
            smooth = v,
            input_map = daily_hourly_map,
            aggregated_data = daily_aggregated_tensor, 
            nns_map = nns_map,
            mm_cond_number = mm_cond_number,
            nheads = nheads
        )

    start_time = time.time()
    
    # --- 2. Call the L-BFGS Optimizer Method ---
    # The 'param_groups' variable now correctly contains only the 4 active parameters.
    optimizer = model_instance.set_optimizer(
            param_groups,     
            lr=1.0,            # Standard initial step length for L-BFGS
            max_iter=10,       # Line search iterations per step
            tolerance_grad=1e-4
        )

    # --- 3. Call the L-BFGS Fit Method ---
    # We still pass the *full* params_list here, because the NLL function
    # (matern_cov_aniso_STABLE_log_reparam) needs all 7 values.
    out, steps_ran = model_instance.fit_vecc_lbfgs(
            params_list,
            optimizer,
            model_instance.matern_cov_aniso_STABLE_log_reparam, # Pass the *actual* method
            max_steps=max_lbfgs_steps
        )

    end_time = time.time()
    epoch_time = end_time - start_time
    
    # --- 4. Updated Print Statement ---
    print(f"Day {day_idx+1} optimization finished in {epoch_time:.2f}s over {steps_ran+1} steps.")
    print(f"Day {day_idx+1} final results: {out}")


--- Starting Day 1 (2024-07-1) ---
Data size per day: 1092, smooth: 0.5
mm_cond_number: 20,
initial parameters: 
 [tensor([2.9957], dtype=torch.float64, requires_grad=True), tensor([-0.4055], dtype=torch.float64, requires_grad=True), tensor([0.8109], dtype=torch.float64, requires_grad=True), tensor([-20.7233], dtype=torch.float64), tensor([0.], dtype=torch.float64), tensor([0.], dtype=torch.float64), tensor([0.4055], dtype=torch.float64, requires_grad=True)]
--- Starting L-BFGS Optimization ---
--- Step 1/10 / Loss: 1962.098613 ---
  Param 0: Value=3.0123, Grad=0.12637566556945573
  Param 1: Value=0.6197, Grad=-0.030526065336653363
  Param 2: Value=0.7247, Grad=0.030299899082774542
  Param 3: Value=-20.7233, Grad=N/A
  Param 4: Value=0.0000, Grad=N/A
  Param 5: Value=0.0000, Grad=N/A
  Param 6: Value=0.6786, Grad=-0.04972887213368495
  Max Abs Grad: 1.263757e-01
------------------------------
--- Step 2/10 / Loss: 1877.625261 ---
  Param 0: Value=3.0000, Grad=2.2874680952827964e-05
  

In [33]:

a = [2.9999559388023678, 0.6150577787926361, 0.8039117855445436, 0, 0.0, 0.0, 0.7166534901520966]

#a = [3.1850261248518965, 0.7307329198588871, 0.7895547147699365, 0, 0.0, 0.0, 0.27780223854803404]
cal(a)

tensor(1781.4919, dtype=torch.float64, grad_fn=<MulBackward0>)