In [1]:
# Standard libraries
import sys
# Add your custom path
gems_tco_path = "/Users/joonwonlee/Documents/GEMS_TCO-1/src"
sys.path.append(gems_tco_path)
import os
import logging
import argparse # Argument parsing

# Data manipulation and analysis
import pandas as pd
import numpy as np
import pickle
import torch
import torch.optim as optim
import copy                    # clone tensor
import time

# Custom imports

from GEMS_TCO import kernels_reparam_space_time_gpu as kernels_reparam_space_time
from GEMS_TCO import kernels_reparam_space_time_gpu_copy_dummy_013126 as kernels_reparam_space_time
from GEMS_TCO import orderings as _orderings 
from GEMS_TCO import alg_optimization, BaseLogger
from GEMS_TCO import kernels_columns as kernels_reparam_space_time_gpu_col
from typing import Optional, List, Tuple
from pathlib import Path
import typer
import json
from json import JSONEncoder
from GEMS_TCO import configuration as config
from GEMS_TCO.data_loader import load_data2, exact_location_filter
from GEMS_TCO import debiased_whittle
from torch.nn import Parameter

Load daily data applying max-min ordering

In [2]:
space: List[str] = ['1', '1']
lat_lon_resolution = [int(s) for s in space]
mm_cond_number: int = 8
years = ['2024']
month_range = [7] 

output_path = input_path = Path(config.mac_estimates_day_path)
data_load_instance = load_data2(config.mac_data_load_path)

#lat_range_input = [1, 3]
#lon_range_input = [125.0, 129.0]

lat_range_input=[-3,2]      
lon_range_input=[121, 131] 

df_map, ord_mm, nns_map = data_load_instance.load_maxmin_ordered_data_bymonthyear(
lat_lon_resolution=lat_lon_resolution, 
mm_cond_number=mm_cond_number,
years_=years, 
months_=month_range,

lat_range=lat_range_input,   
lon_range=lon_range_input
  
)

In [4]:
daily_aggregated_tensors_dw = [] 
daily_hourly_maps_dw = []      

daily_aggregated_tensors_vecc = [] 
daily_hourly_maps_vecc = []   


for day_index in range(31):
    hour_start_index = day_index * 8
    
    hour_end_index = (day_index + 1) * 8
    #hour_end_index = day_index*8 + 1
    hour_indices = [hour_start_index, hour_end_index]

    day_hourly_map, day_aggregated_tensor = data_load_instance.load_working_data(
    df_map, 
    hour_indices, 
    ord_mm= None,  # or just omit it
    dtype=torch.float64, # or just omit it 
    keep_ori=False  #keep_exact_loc
    )

    daily_aggregated_tensors_dw.append( day_aggregated_tensor )
    daily_hourly_maps_dw.append( day_hourly_map )

    day_hourly_map, day_aggregated_tensor = data_load_instance.load_working_data(
    df_map, 
    hour_indices, 
    ord_mm= ord_mm,  # or just omit it
    dtype=torch.float64, # or just omit it 
    keep_ori=False  #keep_exact_loc
    )

    daily_aggregated_tensors_vecc.append( day_aggregated_tensor )
    daily_hourly_maps_vecc.append( day_hourly_map )
print(daily_aggregated_tensors_vecc[0].shape)
#print(daily_hourly_maps[0])
nn = daily_aggregated_tensors_vecc[0].shape[0]

torch.Size([145008, 4])


In [5]:
v=0.5
mm_cond_number= 8
nheads = 300
patience, factor = 5, 0.5


In [7]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

# Global L-BFGS Settings
LBFGS_LR = 1.0
LBFGS_MAX_STEPS = 3      # 10 to 20  
LBFGS_HISTORY_SIZE = 100   
LBFGS_MAX_EVAL = 100       # line search from 50 to 80
       
DELTA_LAT, DELTA_LON = 0.044, 0.063 
LAT_COL, LON_COL, VAL_COL, TIME_COL = 0, 1, 2, 3

days_list = [0]

# --- 2. Run optimization loop over pre-loaded data ---

for day_idx in days_list:  # 0-based

    print(f'\n{"="*40}')
    print(f'--- Starting Processing for Day {day_idx+1} (2024-07-{day_idx+1}) ---')
    print(f'{"="*40}')

    # Assuming data access is correct
    daily_hourly_map_dw = daily_hourly_maps_dw[day_idx]
    daily_aggregated_tensor_dw = daily_aggregated_tensors_dw[day_idx]

    daily_hourly_map_vecc = daily_hourly_maps_vecc[day_idx]
    daily_aggregated_tensor_vecc = daily_aggregated_tensors_vecc[day_idx]

    if isinstance(daily_aggregated_tensor_vecc, torch.Tensor):
        daily_aggregated_tensor_vecc = daily_aggregated_tensor_vecc.to(DEVICE)


    init_sigmasq   = 13.059
    init_range_lat = 0.2
    init_range_lon = 0.25
    init_advec_lat = 0.0218
    init_range_time = 1.5
    init_advec_lon = -0.1689
    init_nugget    = 0.247

    #init_sigmasq   = 13.059
    #init_range_lat = 0.154 
    #init_range_lon = 0.195
    #init_advec_lat = 1e-8
    #init_range_time = 1e-8
    #init_advec_lon = 1e-8
    #init_nugget    = 0.247
    
    
    # Map model parameters to the 'phi' reparameterization
    init_phi2 = 1.0 / init_range_lon                # 1/range_lon
    init_phi1 = init_sigmasq * init_phi2            # sigmasq / range_lon
    init_phi3 = (init_range_lon / init_range_lat)**2  # (range_lon / range_lat)^2
    init_phi4 = (init_range_lon / init_range_time)**2      # (range_lon / range_time)^2

    # Create Initial Parameters (Float64, Requires Grad)
    initial_vals = [np.log(init_phi1), np.log(init_phi2), np.log(init_phi3), 
                    np.log(init_phi4), init_advec_lat, init_advec_lon, np.log(init_nugget)]

    params_list = [
        torch.tensor([val], requires_grad=True, dtype=torch.float64, device=DEVICE)
        for val in initial_vals
    ]

    # --- ðŸ’¥ Instantiate the GPU Batched Class ---
    # NOTE: Ensure fit_vecchia_lbfgs is the NEW class we defined
    model_instance = kernels_reparam_space_time.fit_vecchia_lbfgs(
            smooth = v,
            input_map = daily_hourly_map_vecc,
            aggregated_data = daily_aggregated_tensor_vecc,
            nns_map = nns_map,
            mm_cond_number = mm_cond_number,
            nheads = nheads
        )

    model_instance = kernels_reparam_space_time_gpu_col.fit_vecchia_lbfgs(
        smooth=v,
        #input_map=daily_hourly_maps_vecc_sim[0],
        #aggregated_data= daily_aggregated_tensors_vecc_sim[0],

        input_map=daily_hourly_maps_dw[2],
        aggregated_data= daily_aggregated_tensors_dw[2],

        nns_map=None,
        mm_cond_number=mm_cond_number,
        #nheads=nheads
    )

    # --- ðŸ’¥ Set L-BFGS Optimizer ---
    optimizer_vecc = model_instance.set_optimizer(
                params_list,     
                lr=LBFGS_LR,            
                max_iter=LBFGS_MAX_EVAL,        
                history_size=LBFGS_HISTORY_SIZE 
            )

    print(f"\n--- Starting Phase 2: Vecchia Optimization (Day {day_idx+1}) ---")
    start_time = time.time()
    
    # --- ðŸ’¥ Call the Batched Fit Method ---
    # REMOVED: model_instance.matern_cov_aniso_STABLE_log_reparam
    out, steps_ran = model_instance.fit_vecc_lbfgs(
            params_list,
            optimizer_vecc,
            # covariance_function argument is GONE
            max_steps=LBFGS_MAX_STEPS, 
            grad_tol=1e-7
        )


    end_time = time.time()
    epoch_time = end_time - start_time
    
    print(f"Vecchia Optimization finished in {epoch_time:.2f}s. Results: {out}")


Using device: cpu

--- Starting Processing for Day 1 (2024-07-1) ---

--- Starting Phase 2: Vecchia Optimization (Day 1) ---
ðŸš€ Pre-computing (ULTRA DENSE Stencil: ~120 Neighbors)...
   Searching Neighbors (Stencil Candidates: 123)...
âœ… Precompute Done. Unique Patterns: 136
   Total Time: 80.37s
--- Starting Optimization (Dense Stencil) ---
Step 1 | Loss: 1.170152 | Max Grad: 0.00e+00
Final Params: {'sigma_sq': nan, 'range_lon': nan, 'range_lat': nan, 'range_time': nan, 'advec_lat': nan, 'advec_lon': nan, 'nugget': nan}
Vecchia Optimization finished in 89.56s. Results: [nan, nan, nan, nan, nan, nan, nan]


In [None]:
------------------------------
Final Interpretable Params: {'sigma_sq': 14.387504858621533, 'range_lon': 0.3585894747751284, 'range_lat': 0.3357524042214466, 'range_time': 1.7848548701311417, 'advec_lat': -0.0015716791121009894, 'advec_lon': -0.1651791561224459, 'nugget': 0.24991957193620096}
Vecchia Optimization finished in 85.17s. Results: [3.6919371808652652, 1.025577069119908, 0.13160842747121995, -3.209828351594799, -0.0015716791121009894, -0.1651791561224459, -1.386616125135576, 0.906564767775755]

irr
Final Interpretable Params: {'sigma_sq': 12.290298790449642, 'range_lon': 0.22907577626387454, 'range_lat': 0.19658119557610815, 'range_time': 1.000909212965491, 'advec_lat': 0.009248779511964886, 'advec_lon': -0.03267178143742675, 'nugget': 0.30640488716410186}
Vecchia Optimization finished in 78.75s. Results: [3.982512664402337, 1.4737024294478556, 0.30595458906718426, -2.9492224586592135, 0.009248779511964886, -0.03267178143742675, -1.1828478909684212, 1.0670711507791684]

reg
inal Interpretable Params: {'sigma_sq': 12.107425169564506, 'range_lon': 0.214359670488606, 'range_lat': 0.1817041045625628, 'range_time': 0.9627164808377048, 'advec_lat': 0.00974767142388243, 'advec_lon': -0.02991866611184063, 'nugget': 2.388788114152505e-07}
Vecchia Optimization finished in 150.05s. Results: [4.033918886317142, 1.5400999715436163, 0.3305514850281794, -3.004207297179707, 0.00974767142388243, -0.02991866611184063, -15.247309478824095, 1.1081462496002819]

day 3 
```mm:8, nheads:0 3.3min + sin + cos```

Final Interpretable Params: {'sigma_sq': 5.729729026727045, 'range_lon': 0.0036587431849657167, 'range_lat': 0.0012094699144324097, 'range_time': 0.03315069403922441, 'advec_lat': 0.0006871610223819161, 'advec_lon': 0.7768481448824361, 'nugget': 0.4047672260885547}
Vecchia Optimization finished in 206.37s. Results: [7.356303821961211, 5.610635582627295, 2.2138750399607745, -4.407887912768792, 0.0006871610223819161, 0.7768481448824361, -0.9044431275065533, 1.3942282339249539]

```mm:8, nheads:0 3.3min ```

Final Interpretable Params: {'sigma_sq': 13.52176155987271, 'range_lon': 0.14804554772619838, 'range_lat': 0.1144343090263059, 'range_time': 1.2325348810769514, 'advec_lat': -0.041359274162119944, 'advec_lon': -0.2538392052470705, 'nugget': 0.9374975398710232}
Vecchia Optimization finished in 205.28s. Results: [4.514535652675534, 1.9102352976581036, 0.515038086528947, -4.238616450537956, -0.041359274162119944, -0.2538392052470705, -0.06454114527858945, 1.4484173299536962]

```mm:8, nheads:5 4min ```

```mm:8, nheads:30 4min ```
Final Interpretable Params: {'sigma_sq': 13.669364586514357, 'range_lon': 0.15083264731249288, 'range_lat': 0.11662213059603312, 'range_time': 1.2627057882308315, 'advec_lat': -0.04101681075075879, 'advec_lon': -0.25248042684726085, 'nugget': 0.9611024244021281}
Vecchia Optimization finished in 204.66s. Results: [4.50674152004807, 1.8915843527250762, 0.5144637421408024, -4.249682444376359, -0.04101681075075879, -0.25248042684726085, -0.039674294627593726, 1.4485796682053353]


```mm:15, nheads:300 8min ```

Final Interpretable Params: {'sigma_sq': 15.987012980327377, 'range_lon': 0.19554905112684073, 'range_lat': 0.1520424726426302, 'range_time': 1.7346813870286135, 'advec_lat': -0.04175957765041507, 'advec_lon': -0.24798155593579485, 'nugget': 1.2705783726841615}

``` debiased whittle ```
Best Run Loss: 5.757 (after 4 steps)
Final Parameters (Natural Scale): sigmasq: 17.4925, range_lat: 0.1538, range_lon: 0.1915, range_time: 1.6396, advec_lat: -0.0530, advec_lon: -0.2803, nugget: 1.5813
Final Parameters (Phi Scale)    : phi1: 91.3249, phi2: 5.2208, phi3: 1.5503, phi4: 0.0136, advec_lat: -0.0530, advec_lon: -0.2803, nugget: 1.5813
Final Parameters (Raw Log Scale): log_phi1: 4.5144, log_phi2: 1.6527, log_phi3: 0.4384, log_phi4: -4.2942, advec_lat: -0.0530, advec_lon: -0.2803, log_nugget: 0.4583


day2

```mm:8, nheads:0 4min ```

Final Interpretable Params: {'sigma_sq': 8.366564121506674, 'range_lon': 0.1549748061133354, 'range_lat': 0.11153205307539416, 'range_time': 1.0734237460346854, 'advec_lat': -0.00623351472675741, 'advec_lon': -0.24543605291094764, 'nugget': 1.6400890271121227}
Vecchia Optimization finished in 227.53s. Results: [3.988736017479325, 1.8644927164784315, 0.6579010827747122, -3.870692038510152, -0.00623351472675741, -0.24543605291094764, 0.49475052518720436, 1.3441941337277519]

```mm:8, nheads:5 4min ```

Final Interpretable Params: {'sigma_sq': 8.423172104750531, 'range_lon': 0.15700473815159813, 'range_lat': 0.11301047728007582, 'range_time': 1.0894565926043982, 'advec_lat': -0.005855656240283135, 'advec_lon': -0.24334669105231177, 'nugget': 1.6523327736126328}
Vecchia Optimization finished in 329.65s. Results: [3.982465786618215, 1.851479294779673, 0.6575909009911699, -3.87431665562769, -0.005855656240283135, -0.24334669105231177, 0.502188091624637, 1.3442811993310593]


```mm:8, nheads:10 5min ```

Final Interpretable Params: {'sigma_sq': 8.434624560416324, 'range_lon': 0.15742622658981514, 'range_lat': 0.11331541468641343, 'range_time': 1.092870211035363, 'advec_lat': -0.005776271785214885, 'advec_lon': -0.2429149020688287, 'nugget': 1.6548849040774298}
Vecchia Optimization finished in 262.90s. Results: [3.9811435383315907, 1.8487983330555655, 0.657563470385232, -3.8752115791546777, -0.005776271785214885, -0.2429149020688287, 0.5037314620466141, 1.3442947956599645]

```mm:15 nheads:300 ```

Final Interpretable Params: {'sigma_sq': 12.138675796215404, 'range_lon': 0.2853604451458316, 'range_lat': 0.20648779278033955, 'range_time': 2.2060040135925485, 'advec_lat': 0.001727014542248498, 'advec_lon': -0.22293591604378923, 'nugget': 2.087514406427404}

Vecchia Optimization finished in 953.12s. Results: [3.7503988799281887, 1.2540021779879755, 0.6470236104112204, -4.090369836431771, 0.001727014542248498, -0.22293591604378923, 0.7359740790016484, 1.3452577850426004]


========================================
--- Starting Processing for Day 1 (2024-07-1) ---
========================================

```mm:7 nheads:0 5 min ```

Final Interpretable Params: {'sigma_sq': 10.569605281314743, 'range_lon': 0.14200359882303407, 'range_lat': 0.11161968489461344, 'range_time': 0.8715661717453963, 'advec_lat': 0.0242886130403664, 'advec_lon': -0.15954080680753288, 'nugget': 6.138438989319248e-07}
Vecchia Optimization finished in 1053.64s. Results: [4.309885333762307, 1.951902877877842, 0.48150995753408793, -3.628878779078998, 0.0242886130403664, -0.15954080680753288, -14.30352517739001, 1.240425409122721]

```mm:12, nheads:0 5min ```

Final Interpretable Params: {'sigma_sq': 10.488076410034935, 'range_lon': 0.14073044003890806, 'range_lat': 0.11039052166423442, 'range_time': 0.835231003054122, 'advec_lat': 0.02363100822784711, 'advec_lon': -0.16357402304597604, 'nugget': 6.728098082283848e-07}
Vecchia Optimization finished in 414.83s. Results: [4.3111480230549795, 1.9609089911476811, 0.4856440243355976, -3.561724098239924, 0.02363100822784711, -0.16357402304597604, -14.211803150171008, 1.240218304649 4564]

```mm:8, nheads:0 5min ```

------------------------------
Final Interpretable Params: {'sigma_sq': 10.488076417803729, 'range_lon': 0.14073044016388103, 'range_lat': 0.1103905217342916, 'range_time': 0.835231004195405, 'advec_lat': 0.023631008151489937, 'advec_lon': -0.1635740224244672, 'nugget': 6.727973102027563e-07}
Vecchia Optimization finished in 222.84s. Results: [4.311148022907675, 1.9609089902596504, 0.4856440248423989, -3.5617240991967183, 0.023631008151489937, -0.1635740224244672, -14.211821726210712, 1.240218304649 459]

```mm:8, nheads:10 5min ```
Final Interpretable Params: {'sigma_sq': 10.558430388457724, 'range_lon': 0.14179260996001697, 'range_lat': 0.1112165306452909, 'range_time': 0.8446228888601491, 'advec_lat': 0.023328002993195814, 'advec_lon': -0.16258995452127456, 'nugget': 4.637894119371829e-07}
Vecchia Optimization finished in 1152.10s. Results: [4.310314411953833, 1.9533897821800137, 0.4857729383037368, -3.56904949113498, 0.023328002993195814, -0.16258995452127456, -14.583835241326524, 1.2403595466586055]

```mm:8, nheads:80 5min ```

Final Interpretable Params: {'sigma_sq': 11.006086403532715, 'range_lon': 0.14860068457265108, 'range_lat': 0.11650340473006518, 'range_time': 0.9077858958066443, 'advec_lat': 0.021852814885628166, 'advec_lon': -0.15862268076456867, 'nugget': 7.109280446538787e-07}
Vecchia Optimization finished in 292.76s. Results: [4.304940969085704, 1.9064925398944794, 0.48668448271634956, -3.6194916282784857, 0.021852814885628166, -0.15862268076456867, -14.156694615282996, 1.2410108632965844]

```mm:8, nheads:200 4min ```

Final Interpretable Params: {'sigma_sq': 12.156526241674566, 'range_lon': 0.16620409318999774, 'range_lat': 0.1301807912751779, 'range_time': 1.077543051896279, 'advec_lat': 0.020739697904739146, 'advec_lon': -0.16128425389734136, 'nugget': 2.436005746521788e-06}
Vecchia Optimization finished in 240.63s. Results: [4.292404933576785, 1.7945387687653933, 0.48858464754993547, -3.738444532638281, 0.020739697904739146, -0.16128425389734136, -12.925150849120314, 1.2417995126905985]

```mm:8, nheads:400 4min ```

------------------------------
Final Interpretable Params: {'sigma_sq': 13.08618055898487, 'range_lon': 0.18072835038101287, 'range_lat': 0.14158038290565716, 'range_time': 1.1895069654662571, 'advec_lat': 0.02010820528296312, 'advec_lon': -0.16111328907142078, 'nugget': 0.005886870024760588}
Vecchia Optimization finished in 230.72s. Results: [4.282316955935752, 1.7107602017089585, 0.4882548888256034, -3.7685982164921903, 0.02010820528296312, -0.16111328907142078, -5.1350308275285474, 1.2419952634078875]

```mm:15 nheads 300 12 min```
Final Interpretable Params: {'sigma_sq': 12.768798755341088, 'range_lon': 0.17558306854693478, 'range_lat': 0.13749362701220857, 'range_time': 1.1605600864511632, 'advec_lat': 0.020485081623863546, 'advec_lon': -0.16411004529108067, 'nugget': 9.430158191707344e-06}


Vecchia Optimization finished in 691.05s. Results: [4.286647620915056, 1.7396430230054256, 0.48906937789270644, -3.7770914894896723, 0.020485081623863546, -0.16411004529108067, -11.57159768609298, 1.2419278421000428]


