In [1]:
# Configuration
gems_tco_path = "/Users/joonwonlee/Documents/GEMS_TCO-1/src"

# --- Standard Libraries ---
import sys
import os
import json
import time
import copy
import cmath
import pickle
import logging
import argparse

# Path configuration (only run once)
sys.path.append(gems_tco_path)

# --- Third-Party Libraries ---
from pathlib import Path
from typing import Optional, List, Tuple, Dict, Any, Callable
from json import JSONEncoder

# Data manipulation and analysis
import pandas as pd
import numpy as np
from sklearn.neighbors import BallTree
import typer

# Torch and Numerical Libraries
import torch
import torch.optim as optim
import torch.fft
import torch.nn.functional as F
from torch.nn import Parameter
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau
import matplotlib.pyplot as plt 

# --- Custom (GEMS_TCO) Imports ---
import GEMS_TCO
from GEMS_TCO import kernels_reparam_space_time 
from GEMS_TCO import data_preprocess, data_preprocess as dmbh
from GEMS_TCO import orderings as _orderings 

from GEMS_TCO import alg_optimization, alg_opt_Encoder
from GEMS_TCO import configuration as config
from GEMS_TCO.data_loader import load_data2
from GEMS_TCO import debiased_whittle

Load monthly data

In [23]:
space: List[str] = ['1', '1']
lat_lon_resolution = [int(s) for s in space]
mm_cond_number: int = 8
years = ['2024']
month_range = [7] 

output_path = input_path = Path(config.mac_estimates_day_path)
data_load_instance = load_data2(config.mac_data_load_path)

lat_range_input = [1, 3]
lon_range_input = [125.0, 129.0]
lon_range_input = [129.0, 133.0]

#lat_range_input=[0,5]      
#lon_range_input=[123, 133.0] 

df_map, ord_mm, nns_map = data_load_instance.load_maxmin_ordered_data_bymonthyear(
lat_lon_resolution=lat_lon_resolution, 
mm_cond_number=mm_cond_number,
years_=years, 
months_=month_range,

lat_range=lat_range_input,   
lon_range=lon_range_input

)

#days: List[str] = ['0', '31']
#days_s_e = [int(d) for d in days]
#days_list = list(range(days_s_e[0], days_s_e[1]))

Subsetting data to lat: [1, 3], lon: [129.0, 133.0]


In [24]:
daily_aggregated_tensors_dw = [] 
daily_hourly_maps_dw = []      

daily_aggregated_tensors_vecc = [] 
daily_hourly_maps_vecc = []   


for day_index in range(31):
    hour_start_index = day_index * 8
    hour_end_index = (day_index + 1) * 8
    #hour_end_index = day_index*8 + 1
    hour_indices = [hour_start_index, hour_end_index]

    day_hourly_map, day_aggregated_tensor = data_load_instance.load_working_data(
    df_map, 
    hour_indices, 
    ord_mm= None,  # or just omit it
    dtype=torch.float64, # or just omit it 
    keep_ori=False  #keep_exact_loc
    )

    daily_aggregated_tensors_dw.append( day_aggregated_tensor )
    daily_hourly_maps_dw.append( day_hourly_map )

    day_hourly_map, day_aggregated_tensor = data_load_instance.load_working_data(
    df_map, 
    hour_indices, 
    ord_mm= ord_mm,  # or just omit it
    dtype=torch.float64, # or just omit it 
    keep_ori=False  #keep_exact_loc
    )

    daily_aggregated_tensors_vecc.append( day_aggregated_tensor )
    daily_hourly_maps_vecc.append( day_hourly_map )
print(daily_aggregated_tensors_vecc[0].shape)
#print(daily_hourly_maps[0])
nn = daily_aggregated_tensors_vecc[0].shape[0]

torch.Size([23552, 4])


# 

If you want to compute likleihood on small 8 boxes,
then use 

# 1-3 125-129

1-3 125 129
day1

23184
 full likelihood: 27497.49,
 vecchia: 27512.18, 
 whittle de-biased: 1124.4
 full likelihood: 27516.94,
 vecchia: 27522.4, 
 whittle de-biased: 1120.5
 full likelihood: 27502.56,
 vecchia: 27514.28, 
 whittle de-biased: 1092.36
 full likelihood: 27506.84,
 vecchia: 27517.13, 
 whittle de-biased: 1099.41
 full likelihood: 27665.1,
 vecchia: 27676.64, 
 whittle de-biased: 1566.3

 mac line search 80
 23184
 full likelihood: 27506.87,
 vecchia: 27516.98, 
 whittle de-biased: 1100.3


 day2   

 23184
 full likelihood: 31667.88,
 vecchia: 31737.68, 
 whittle de-biased: 2833.55
 full likelihood: 31820.87,
 vecchia: 31869.75, 
 whittle de-biased: 2392.34
 full likelihood: 31672.01,
 vecchia: 31737.81, 
 whittle de-biased: 2846.26
 full likelihood: 31667.68,
 vecchia: 31736.62, 
 whittle de-biased: 2849.41
 full likelihood: 31667.56,
 vecchia: 31736.63, 
 whittle de-biased: 2849.82
 full likelihood: 31914.3,
 vecchia: 31955.94, 
 whittle de-biased: 2579.62


 3-5 129 133

 day1

 23040
 full likelihood: 25851.96,
 vecchia: 25906.67, 
 whittle de-biased: 1357.27
 full likelihood: 25825.77,
 vecchia: 25884.58, 
 whittle de-biased: 1265.2
 full likelihood: 25840.32,
 vecchia: 25893.1, 
 whittle de-biased: 1307.71
 full likelihood: 25839.27,
 vecchia: 25892.9, 
 whittle de-biased: 1307.29
 full likelihood: 25947.66,
 vecchia: 26020.97, 
 whittle de-biased: 1231.85

 mac_ slow version line search 80
 full likelihood: 25839.6,
 vecchia: 25893.36, 
 whittle de-biased: 1308.45



 day2

 23040
 full likelihood: 33063.62,
 vecchia: 33222.27, 
 whittle de-biased: 3189.93
 full likelihood: 33205.76,
 vecchia: 33280.34, 
 whittle de-biased: 2452.0
 full likelihood: 33073.54,
 vecchia: 33226.61, 
 whittle de-biased: 3214.84
 full likelihood: 33067.24,
 vecchia: 33226.19, 
 whittle de-biased: 3204.52
 full likelihood: 33066.99,    
 vecchia: 33226.04, 
 whittle de-biased: 3204.76
 full likelihood: 33146.59,
 vecchia: 33267.35, 
 whittle de-biased: 2215.82





day 2
3-5 129 -133
23040
 full likelihood: 26988.86,
 vecchia: 26872.44, 
 whittle de-biased: 3189.93
 full likelihood: 26866.2,
 vecchia: 26814.83, 
 whittle de-biased: 2988.98
 full likelihood: 26993.54,
 vecchia: 26876.0, 
 whittle de-biased: 3204.76
 full likelihood: 26993.56,
 vecchia: 26875.93, 
 whittle de-biased: 3204.64
 full likelihood: 26597.35,
 vecchia: 26625.36, 
 whittle de-biased: 2215.82


 3-5 124 128

 22680
 full likelihood: 30780.73,
 vecchia: 30517.97, 
 whittle de-biased: 11573.27
 full likelihood: 30610.24,     adams amarel
 vecchia: 30445.04, 
 whittle de-biased: 11388.29
 full likelihood: 30790.4,
 vecchia: 30521.77, 
 whittle de-biased: 11591.01
 full likelihood: 30790.61,
 vecchia: 30521.85, 
 whittle de-biased: 11591.22
 full likelihood: 30538.36,     lbfgs amarel (more accurate, slow setting)
 vecchia: 30483.29, 
 whittle de-biased: 11476.03

In [4]:
print(nn)
day_idx = 0
lat_range=lat_range_input 
lon_range=lon_range_input

day1_va = [4.22817, 1.664023, 0.481917, -3.77204, 0.02213, -0.16318, -1.737487]
day1_va_amarel = [4.2766, 1.6846, 0.5049, -3.6748, 0.01975, -0.15765, -14.722]
day1_vl = [4.2866, 1.7396, 0.4891, -3.777, 0.02048, -0.16411, -12.05573]
day1_vl_amarel =[4.2843, 1.7136, 0.4887, -3.7712, 0.0202, -0.1616, -14.7220]
day1_dwl = [4.2739, 1.8060, 0.7948, -3.3599, 0.0223, -0.1672, -11.8381]


day1_dwl2 = [4.8141, 2.1179, 0.5037, -8.3823, 0.0120, 0.0077, 1.8768]


day1_vl_mac2 = [4.2824, 1.7108, 0.4883, -3.7686, 0.0201, -0.1611, -5.1617]


day2_va = [3.7634, 1.2864, 0.6458, -4.05860, 0.001777, -0.22191, 0.7242916]
day2_va_amarel = [3.8929, 1.4547, 0.6384, -3.9767, 0.0002, -0.2129, 0.5708]
day2_vl = [3.7503, 1.2538, 0.6472, -4.09016, 0.001728, -0.222897, 0.73606]
day2_vl2 = [3.7440, 1.2168, 0.6473, -4.0569, 0.00105, -0.22017, 0.74023]
day2_vl3 = [3.7440, 1.2160, 0.6473, -4.0566, 0.0011, -0.2202, 0.7403]
day2_vl4 = [3.7440, 1.2167, 0.64726, -4.05665, 0.001068, -0.220179, 0.740284]
day2_dwl =[4.1200, 1.6540, 0.8909, -3.4966, -0.0263, -0.2601, -0.0986]

day3_va = [4.61865, 1.86892, 0.54694, -4.21337, -0.04020, -0.24562, -0.7427]
day3_va_amarel = [4.4692, 1.6762, 0.4894, -4.25829, -0.0370, -0.24052, 0.0924]
day3_vl = [4.39425, 1.60585, 0.50261, -4.30459, -0.03894, -0.2451, 0.26052]
day2 = [ day2_va, day2_va_amarel, day2_vl3, day2_vl4, day2_dwl]

day4_va = [4.1117, 1.6978, 0.7622, -4.0126, 0.028246, -0.14168, -0.27482]
day4_vl = [3.962231, 1.4687, 0.7822, -4.0332, 0.03072, -0.14823, 0.0994]
day4_dwl = [3.9351, 1.8070, 1.0980, -3.5154, 0.0214, -0.1712, -0.5348]

#day1 = [day1_va, day1_va_amarel, day1_vl, day1_vl_amarel, day1_dwl, day1_dwl2,day1_vl_mac2]
day3 = [day3_va, day3_va_amarel, day3_vl]
day1 = [day1_dwl2]
day4 = [ day4_dwl]
day2 = [day2_va, day2_va_amarel, day2_vl3, day2_vl4, day2_dwl]

for i,model_params in enumerate(day1):
    instance = debiased_whittle.full_vecc_dw_likelihoods(daily_aggregated_tensors_vecc, daily_hourly_maps_vecc, day_idx=day_idx, params_list=model_params, lat_range=lat_range, lon_range=lon_range)
    v = 0.5
    nheads = 300
    instance.initiate_model_instance_vecchia(v, nns_map, mm_cond_number, nheads)

    model_params = [torch.tensor(x, dtype=torch.float64) for x in model_params]

    res = instance.likelihood_wrapper(model_params, instance.model_instance.matern_cov_aniso_STABLE_log_reparam, daily_aggregated_tensors_dw, daily_hourly_maps_dw)
    #print(res)
    print(f' full likelihood: {torch.round(res[0]*nn, decimals=2)},\n vecchia: {torch.round(res[1]*nn, decimals=2)}, \n whittle de-biased: {torch.round(res[2], decimals = 2)}')

23184
Pre-computing Vecchia batches...
Pre-computed 20784 batches.
 full likelihood: 35799.27,
 vecchia: 35313.67, 
 whittle de-biased: 9220.28


day1 dwl

23184
Pre-computing Vecchia batches...
Pre-computed 20784 batches.
 full likelihood: 27665.1,
 vecchia: 27667.72, 
 whittle de-biased: -8017.94

23184
Pre-computing Vecchia batches...
Pre-computed 20784 batches.
 full likelihood: 35799.27,
 vecchia: 35313.67, 
 whittle de-biased: 9220.28

Four days 1 to 3  125 to 129

In [21]:
#day1_va = [3.8107, 1.4085, 2.7349, -3.0026, 0.0101, -0.1671, 0.1676] #epoch more than 200
lat_range=lat_range_input 
lon_range=lon_range_input

day1_va = [4.22817, 1.664023, 0.481917, -3.77204, 0.02213, -0.16318, -1.737487]
day1_vl = [4.2866, 1.7396, 0.4891, -3.777, 0.02048, -0.16411, -12.05573]
day1_dwl = [4.2739, 1.8060, 0.7948, -3.3599, 0.0223, -0.1672, -11.8381]

day2_va = [3.7634, 1.2864, 0.6458, -4.05860, 0.001777, -0.22191, 0.7242916]
day2_vl = [3.7503, 1.2538, 0.6472, -4.09016, 0.001728, -0.222897, 0.73606]
day2_dwl =[4.1200, 1.6540, 0.8909, -3.4966, -0.0263, -0.2601, -0.0986]

day3_va = [4.61865, 1.86892, 0.54694, -4.21337, -0.04020, -0.24562, -0.7427]
day3_vl = [4.4038, 1.6321, 0.50344, -4.3653, -0.0417, -0.2480, 0.2393]
day3_dwl = [4.0950, 1.6663, 0.6876, -3.3118, -0.0500, -0.2666, -0.5033]

day4_va = [4.1117, 1.6978, 0.7622, -4.0126, 0.028246, -0.14168, -0.27482]
day4_vl = [3.962231, 1.4687, 0.7822, -4.0332, 0.03072, -0.14823, 0.0994]
day4_dwl = [3.9351, 1.8070, 1.0980, -3.5154, 0.0214, -0.1712, -0.5348]
day4_dwl2 = [4.1875, 1.9465, 0.2492, -3.9739, 0.0146, -0.2040, -0.8567]




day1 = [day1_va, day1_vl, day1_dwl]
day2 = [day2_va, day2_vl, day2_dwl]
day3 = [day3_va, day3_vl, day3_dwl]
day4 = [day4_va, day4_vl, day4_dwl, day4_dwl2]

#days = [day1, day2, day3, day4]
days = [day4]
v = 0.5
nheads = 300



for day_idx, day in enumerate(days):
    for i,model_params in enumerate(day):
        instance = debiased_whittle.full_vecc_dw_likelihoods(daily_aggregated_tensors_vecc, daily_hourly_maps_vecc, day_idx=day_idx, params_list=model_params, lat_range=lat_range, lon_range=lon_range)

        instance.initiate_model_instance_vecchia(v, nns_map, mm_cond_number, nheads)
        model_params = [torch.tensor(x, dtype=torch.float64) for x in model_params]
        res = instance.likelihood_wrapper(model_params, instance.model_instance.matern_cov_aniso_STABLE_log_reparam, daily_aggregated_tensors_dw, daily_hourly_maps_dw)
        #res = instance.likelihood_wrapper(daily_aggregated_tensors_dw, daily_hourly_maps_dw)
        if i==0:
            print(f'Vecchia Estimate using Adams')
        elif i==1:
            print(f'Vecchia estimate using L-BFGS')
        else:
            print(f'debiased_whittle estimate using L-BFGS')
        
        print(f' full likelihood: {torch.round(res[0]*nn, decimals=2)},\n vecchia: {torch.round(res[1]*nn, decimals=2)}, \n whittle de-biased: {torch.round(res[2], decimals = 2)}')
    print("-----")



#instance = debiased_whittle.full_vecc_dw_likelihoods(daily_aggregated_tensors, daily_hourly_maps, day_idx=0, params_list=a)
#v = 0.5
#nheads = 300
#instance.initiate_model_instance_vecchia(v, nns_map, mm_cond_number, nheads)
#res = instance.likelihood_wrapper()
#res


Pre-computing Vecchia batches...
Pre-computed 20784 batches.
Vecchia Estimate using Adams
 full likelihood: 27784.07,
 vecchia: 27770.61, 
 whittle de-biased: 6023.03
Pre-computing Vecchia batches...
Pre-computed 20784 batches.
Vecchia estimate using L-BFGS
 full likelihood: 27808.64,
 vecchia: 27783.2, 
 whittle de-biased: 6062.49
Pre-computing Vecchia batches...
Pre-computed 20784 batches.
debiased_whittle estimate using L-BFGS
 full likelihood: 27702.8,
 vecchia: 27701.96, 
 whittle de-biased: 6415.19
Pre-computing Vecchia batches...
Pre-computed 20784 batches.
debiased_whittle estimate using L-BFGS
 full likelihood: 27431.5,
 vecchia: 27436.37, 
 whittle de-biased: 5975.43
-----


In [None]:
#day1_va = [3.8107, 1.4085, 2.7349, -3.0026, 0.0101, -0.1671, 0.1676] #epoch more than 200
lat_range=lat_range_input 
lon_range=lon_range_input

day1_va = [4.22817, 1.664023, 0.481917, -3.77204, 0.02213, -0.16318, -1.737487]
day1_vl = [4.2866, 1.7396, 0.4891, -3.777, 0.02048, -0.16411, -12.05573]
day1_dwl = [4.2739, 1.8060, 0.7948, -3.3599, 0.0223, -0.1672, -11.8381]

day2_va = [3.7634, 1.2864, 0.6458, -4.05860, 0.001777, -0.22191, 0.7242916]
day2_vl = [3.7503, 1.2538, 0.6472, -4.09016, 0.001728, -0.222897, 0.73606]
day2_dwl =[4.1200, 1.6540, 0.8909, -3.4966, -0.0263, -0.2601, -0.0986]

day3_va = [4.61865, 1.86892, 0.54694, -4.21337, -0.04020, -0.24562, -0.7427]
day3_vl = [4.4038, 1.6321, 0.50344, -4.3653, -0.0417, -0.2480, 0.2393]
day3_dwl = [4.0950, 1.6663, 0.6876, -3.3118, -0.0500, -0.2666, -0.5033]

day4_va = [4.1117, 1.6978, 0.7622, -4.0126, 0.028246, -0.14168, -0.27482]
day4_vl = [3.962231, 1.4687, 0.7822, -4.0332, 0.03072, -0.14823, 0.0994]
day4_dwl = [3.9351, 1.8070, 1.0980, -3.5154, 0.0214, -0.1712, -0.5348]
day4_dwl2 = [4.1875, 1.9465, 0.2492, -3.9739, 0.0146, -0.2040, -0.8567]




day1 = [day1_va, day1_vl, day1_dwl]
day2 = [day2_va, day2_vl, day2_dwl]
day3 = [day3_va, day3_vl, day3_dwl]
day4 = [day4_va, day4_vl, day4_dwl, day4_dwl2]

#days = [day1, day2, day3, day4]
days = [day4]
v = 0.5
nheads = 300

for day_idx, day in enumerate(days):
    for i,model_params in enumerate(day):
        instance = debiased_whittle.full_vecc_dw_likelihoods(daily_aggregated_tensors_vecc, daily_hourly_maps_vecc, day_idx=day_idx, params_list=model_params, lat_range=lat_range, lon_range=lon_range)

        instance.initiate_model_instance_vecchia(v, nns_map, mm_cond_number, nheads)
        model_params = [torch.tensor(x, dtype=torch.float64) for x in model_params]
        res = instance.likelihood_wrapper(model_params, instance.model_instance.matern_cov_aniso_STABLE_log_reparam, daily_aggregated_tensors_dw, daily_hourly_maps_dw)
        #res = instance.likelihood_wrapper(daily_aggregated_tensors_dw, daily_hourly_maps_dw)
        if i==0:
            print(f'Vecchia Estimate using Adams')
        elif i==1:
            print(f'Vecchia estimate using L-BFGS')
        else:
            print(f'debiased_whittle estimate using L-BFGS')
        
        print(f' full likelihood: {torch.round(res[0]*nn, decimals=2)},\n vecchia: {torch.round(res[1]*nn, decimals=2)}, \n whittle de-biased: {torch.round(res[2], decimals = 2)}')
    print("-----")



#instance = debiased_whittle.full_vecc_dw_likelihoods(daily_aggregated_tensors, daily_hourly_maps, day_idx=0, params_list=a)
#v = 0.5
#nheads = 300
#instance.initiate_model_instance_vecchia(v, nns_map, mm_cond_number, nheads)
#res = instance.likelihood_wrapper()
#res

Pre-computing Vecchia batches...
Pre-computed 21152 batches.
Vecchia Estimate using Adams
 full likelihood: 31058.52,
 vecchia: 31021.1, 
 whittle de-biased: 6539.2
Pre-computing Vecchia batches...
Pre-computed 21152 batches.
Vecchia estimate using L-BFGS
 full likelihood: 31145.22,
 vecchia: 31070.0, 
 whittle de-biased: 6640.06
Pre-computing Vecchia batches...
Pre-computed 21152 batches.
debiased_whittle estimate using L-BFGS
 full likelihood: 31256.6,
 vecchia: 31318.44, 
 whittle de-biased: 7426.94
Pre-computing Vecchia batches...
Pre-computed 21152 batches.
debiased_whittle estimate using L-BFGS
 full likelihood: 31276.31,
 vecchia: 31284.23, 
 whittle de-biased: 6375.48
-----


difference data


In [15]:
a = [11.0474, 0.0623, 0.2445, 1.0972, 0.0101, -0.1671, 1.1825]
day = 0 # 0 index
lat_range= [0,5]
lon_range= [123.0, 133.0]
#lat_range= [1,3]
#lon_range= [125, 129.0]
db = debiased_whittle.debiased_whittle_preprocess(daily_aggregated_tensors_dw, daily_hourly_maps_dw, day_idx=day, params_list=a, lat_range=lat_range, lon_range=lon_range)


subsetted_aggregated_day = db.generate_spatially_filtered_days(0,5,123,133)
print(subsetted_aggregated_day.shape)
N2= subsetted_aggregated_day.shape[0]
print(N2)
subsetted_aggregated_day[:20]

torch.Size([142832, 4])
142832


tensor([[ 4.0000e-03,  1.2303e+02,  2.9422e+00,  2.1000e+01],
        [ 4.0000e-03,  1.2309e+02,  1.9636e+00,  2.1000e+01],
        [ 4.0000e-03,  1.2316e+02, -1.3187e+00,  2.1000e+01],
        [ 4.0000e-03,  1.2322e+02, -3.1683e+00,  2.1000e+01],
        [ 4.0000e-03,  1.2328e+02, -5.4924e-01,  2.1000e+01],
        [ 4.0000e-03,  1.2335e+02,  1.7212e+00,  2.1000e+01],
        [ 4.0000e-03,  1.2341e+02,  2.1317e+00,  2.1000e+01],
        [ 4.0000e-03,  1.2347e+02,  2.3966e-01,  2.1000e+01],
        [ 4.0000e-03,  1.2353e+02,  3.0116e+00,  2.1000e+01],
        [ 4.0000e-03,  1.2360e+02, -5.0000e-01,  2.1000e+01],
        [ 4.0000e-03,  1.2366e+02, -3.3742e-01,  2.1000e+01],
        [ 4.0000e-03,  1.2372e+02, -1.5392e+00,  2.1000e+01],
        [ 4.0000e-03,  1.2379e+02,  3.5266e+00,  2.1000e+01],
        [ 4.0000e-03,  1.2385e+02, -1.3805e+00,  2.1000e+01],
        [ 4.0000e-03,  1.2391e+02,  9.2229e-01,  2.1000e+01],
        [ 4.0000e-03,  1.2398e+02, -8.0870e-01,  2.1000e+01],
        

likelihood calculation

In [8]:
dwl = debiased_whittle.debiased_whittle_likelihood()

# --- Configuration ---
DAY_TO_RUN = 1
TAPERING_FUNC = dwl.cgn_hamming # Use Hamming taper
NUM_RUNS = 1
EPOCHS = 2000
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

DELTA_LAT, DELTA_LON = 0.044, 0.063 

LAT_COL, LON_COL = 0, 1
VAL_COL = 2 # Spatially differenced value
TIME_COL = 3
lr = 0.01 

cur_df = subsetted_aggregated_day
unique_times = torch.unique(cur_df[:, TIME_COL])
time_slices_list = [cur_df[cur_df[:, TIME_COL] == t_val] for t_val in unique_times]

# --- 1. Pre-compute J-vector, Taper Grid, and Taper Autocorrelation ---
print("Pre-computing J-vector (Hamming taper)...")
J_vec, n1, n2, p, taper_grid = dwl.generate_Jvector_tapered( 
    time_slices_list,
    tapering_func=TAPERING_FUNC, 
    lat_col=LAT_COL, lon_col=LON_COL, val_col=VAL_COL,
    device=DEVICE
)

I_sample = dwl.calculate_sample_periodogram_vectorized(J_vec)
taper_autocorr_grid = dwl.calculate_taper_autocorrelation_fft(taper_grid, n1, n2, DEVICE)


init_sigmasq   = 13.059
init_range_lat = 0.154 
init_range_lon = 0.195 
init_nugget    = 0.247
init_range_time = 1.28
init_advec_lat = 0.0218
init_advec_lon = -0.1689


init_phi2 = 1.0 / init_range_lon
init_phi1 = init_sigmasq * init_phi2
init_phi3 = (init_range_lon / init_range_lat)**2
init_phi4 = (init_range_lon / init_range_time)**2      # (range_lon / range_time)^2

initial_params_values = [
    np.log(init_phi1),    # [0] log_phi1
    np.log(init_phi2),    # [1] log_phi2
    np.log(init_phi3),    # [2] log_phi3
    np.log(init_phi4),    # [3] log_phi4
    init_advec_lat,       # [4] advec_lat (NOT log)
    init_advec_lon,       # [5] advec_lon (NOT log)
    np.log(init_nugget)   # [6] log_nugget
]

initial_params_values =[4.2739, 1.8060, 0.7948, -3.3599, 0.0223, -0.1672, -11.8381]
print(f"Starting with FIXED params (raw log-scale): {[round(p, 4) for p in initial_params_values]}")

params_list = [
    Parameter(torch.tensor([val], dtype=torch.float64))
    for val in initial_params_values
]

dwnll = dwl.whittle_likelihood_loss_tapered(
    params=torch.cat(params_list),
    I_sample=I_sample,
    n1=n1,
    n2=n2,
    p_time=p,
    taper_autocorr_grid=taper_autocorr_grid,
    delta1=DELTA_LAT,
    delta2=DELTA_LON
)

dwnll2 = dwl.whittle_likelihood_loss_tapered_sum(
    params=torch.cat(params_list),
    I_sample=I_sample,
    n1=n1,
    n2=n2,
    p_time=p,
    taper_autocorr_grid=taper_autocorr_grid,
    delta1=DELTA_LAT,
    delta2=DELTA_LON
)

print(dwnll* n1* n2, dwnll2)

Using device: cpu
Pre-computing J-vector (Hamming taper)...
Starting with FIXED params (raw log-scale): [4.2739, 1.806, 0.7948, -3.3599, 0.0223, -0.1672, -11.8381]
tensor(36212.0890, dtype=torch.float64, grad_fn=<MulBackward0>) (tensor(36252.7646, dtype=torch.float64, grad_fn=<SumBackward0>), 113, 158)


debiased whittle optimization adams

In [None]:
# =========================================================================
# 6. Main Execution Script (ðŸ’¥ 7-PARAM MULTIVARIATE ðŸ’¥)
# =========================================================================

dwl = debiased_whittle.debiased_whittle_likelihood()
if __name__ == '__main__':
    start_time = time.time()

    # --- Configuration ---
    DAY_TO_RUN = 1
    TAPERING_FUNC = dwl.cgn_hamming # Use Hamming taper
    NUM_RUNS = 1
    EPOCHS = 200
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {DEVICE}")

    DELTA_LAT, DELTA_LON = 0.044, 0.063 

    LAT_COL, LON_COL = 0, 1
    VAL_COL = 2 # Spatially differenced value
    TIME_COL = 3
    lr = 0.1 

    # --- Load Spatially Differenced Data ---

    cur_df = subsetted_aggregated_day
    

    # --- 1. Pre-compute J-vector, Taper Grid, and Taper Autocorrelation ---
    print("Pre-computing J-vector (Hamming taper)...")
    J_vec, n1, n2, p, taper_grid = dwl.generate_Jvector_tapered( 
        time_slices_list,
        tapering_func=TAPERING_FUNC, 
        lat_col=LAT_COL, lon_col=LON_COL, val_col=VAL_COL,
        device=DEVICE
    )

    if J_vec is None or J_vec.numel() == 0 or n1 == 0 or n2 == 0 or p == 0:
       print(f"Error: J-vector generation failed for Day {DAY_TO_RUN}.")
       exit()
       
    print("Pre-computing sample periodogram...")
    I_sample = dwl.calculate_sample_periodogram_vectorized(J_vec)

    print("Pre-computing Hamming taper autocorrelation...")
    taper_autocorr_grid = dwl.calculate_taper_autocorrelation_fft(taper_grid, n1, n2, DEVICE)

    if torch.isnan(I_sample).any() or torch.isinf(I_sample).any():
        print("Error: NaN/Inf in sample periodogram.")
        exit()
    if torch.isnan(taper_autocorr_grid).any() or torch.isinf(taper_autocorr_grid).any():
        print("Error: NaN/Inf in taper autocorrelation.")
        exit()

    print(f"Data grid: {n1}x{n2}, {p} time points. J-vector, Periodogram, Taper Autocorr on {DEVICE}.")

    # --- 2. Optimization Loop ---
    all_final_results = []
    all_final_losses = []

    for i in range(NUM_RUNS):
        print(f"\n{'='*30} Initialization Run {i+1}/{NUM_RUNS} {'='*30}")

        # --- 7-PARAMETER initialization ---
        ''' 
        init_sigmasq   = 15.0
        init_range_lat = 0.66 
        init_range_lon = 0.7 
        init_nugget    = 1.5
        init_beta      = 0.1  # Temporal range ratio
        init_advec_lat = 0.02
        init_advec_lon = -0.08
        '''
        init_sigmasq   = 13.059
        init_range_lat = 0.154 
        init_range_lon = 0.195 
        init_nugget    = 1.247
        init_range_time = 1.28
        init_advec_lat = 0.0218
        init_advec_lon = -0.1689


        
        init_phi2 = 1.0 / init_range_lon
        init_phi1 = init_sigmasq * init_phi2
        init_phi3 = (init_range_lon / init_range_lat)**2
        init_phi4 = (init_range_lon / init_range_time)**2      # (range_lon / range_time)^2

        initial_params_values = [
            np.log(init_phi1),    # [0] log_phi1
            np.log(init_phi2),    # [1] log_phi2
            np.log(init_phi3),    # [2] log_phi3
            np.log(init_phi4),    # [3] log_phi4
            init_advec_lat,       # [4] advec_lat (NOT log)
            init_advec_lon,       # [5] advec_lon (NOT log)
            np.log(init_nugget)   # [6] log_nugget
        ]
        
        print(f"Starting with FIXED params (raw log-scale): {[round(p, 4) for p in initial_params_values]}")

        params_list = [
            Parameter(torch.tensor([val], dtype=torch.float32))
            for val in initial_params_values
        ]


        optimizer = torch.optim.Adam(params_list, lr=lr)

        # --- ðŸ’¥ REVISED: Use Plateau Scheduler ðŸ’¥ ---
        scheduler = ReduceLROnPlateau(
            optimizer,
            mode='min',
            factor=0.5,
            patience=10, # Wait 10 epochs for improvement
            verbose=True
        )
        # --- END REVISION ---

        print(f"Starting optimization run {i+1} on device {DEVICE} (Hamming, 7-param ST kernel)...")

        nat_params_str, phi_params_str, raw_params_str, loss, epochs_run = dwl.run_full_tapered(
            params_list=params_list,
            optimizer=optimizer,
            scheduler=scheduler,
            I_sample=I_sample,
            n1=n1, n2=n2, p_time=p,
            taper_autocorr_grid=taper_autocorr_grid, 
            epochs=EPOCHS,
            device=DEVICE
        )
        
        if loss is not None:
            all_final_results.append((nat_params_str, phi_params_str, raw_params_str))
            all_final_losses.append(loss)
        else:
            all_final_losses.append(float('inf'))

    # --- ðŸ’¥ REVISED: Corrected f-string ðŸ’¥ ---
    print(f"\n\n{'='*25} Overall Result from Run {'='*25} {'='*25}")
    # --- END REVISION ---
    
    valid_losses = [l for l in all_final_losses if l is not None and l != float('inf')]

    if not valid_losses:
        print(f"The run failed or resulted in an invalid loss for Day {DAY_TO_RUN}.")
    else:
        best_loss = min(valid_losses)
        best_run_index = all_final_losses.index(best_loss)
        best_results = all_final_results[best_run_index]
        
        print(f"Best Run Loss: {best_loss} (after {epochs_run} epochs)")
        print(f"Final Parameters (Natural Scale): {best_results[0]}")
        print(f"Final Parameters (Phi Scale)    : {best_results[1]}")
        print(f"Final Parameters (Raw Log Scale): {best_results[2]}")

    end_time = time.time()
    print(f"\nTotal execution time: {end_time - start_time:.2f} seconds")

debiased whittle optimization lbfgs

In [16]:

dwl = debiased_whittle.debiased_whittle_likelihood()
if __name__ == '__main__':
    start_time = time.time()

    # --- Configuration ---
    DAY_TO_RUN = 4 # data is decided above
    TAPERING_FUNC = dwl.cgn_hamming # Use Hamming taper
    NUM_RUNS = 1
    MAX_STEPS = 20 # L-BFGS usually converges in far fewer steps
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {DEVICE}")

    DELTA_LAT, DELTA_LON = 0.044, 0.063 

    LAT_COL, LON_COL = 0, 1
    VAL_COL = 2 # Spatially differenced value
    TIME_COL = 3


    cur_df =subsetted_aggregated_day
    
    if cur_df.numel() == 0 or cur_df.shape[1] <= max(LAT_COL, LON_COL, VAL_COL, TIME_COL):
        print(f"Error: Data for Day {DAY_TO_RUN} is empty or invalid.")
        exit()

    unique_times = torch.unique(cur_df[:, TIME_COL])
    time_slices_list = [cur_df[cur_df[:, TIME_COL] == t_val] for t_val in unique_times]

    # --- 1. Pre-compute J-vector, Taper Grid, and Taper Autocorrelation ---
    print("Pre-computing J-vector (Hamming taper)...")
    
    # --- ðŸ’¥ REVISED: Renamed 'p' to 'p_time' ðŸ’¥ ---
    J_vec, n1, n2, p_time, taper_grid = dwl.generate_Jvector_tapered( 
        time_slices_list,
        tapering_func=TAPERING_FUNC, 
        lat_col=LAT_COL, lon_col=LON_COL, val_col=VAL_COL,
        device=DEVICE
    )

    if J_vec is None or J_vec.numel() == 0 or n1 == 0 or n2 == 0 or p_time == 0:
       print(f"Error: J-vector generation failed for Day {DAY_TO_RUN}.")
       exit()
       
    print("Pre-computing sample periodogram...")
    I_sample = dwl.calculate_sample_periodogram_vectorized(J_vec)

    print("Pre-computing Hamming taper autocorrelation...")
    taper_autocorr_grid = dwl.calculate_taper_autocorrelation_fft(taper_grid, n1, n2, DEVICE)

    if torch.isnan(I_sample).any() or torch.isinf(I_sample).any():
        print("Error: NaN/Inf in sample periodogram.")
        exit()
    if torch.isnan(taper_autocorr_grid).any() or torch.isinf(taper_autocorr_grid).any():
        print("Error: NaN/Inf in taper autocorrelation.")
        exit()

    print(f"Data grid: {n1}x{n2}, {p_time} time points. J-vector, Periodogram, Taper Autocorr on {DEVICE}.")
    # --- END REVISION ---

    # --- 2. Optimization Loop ---
    all_final_results = []
    all_final_losses = []

    for i in range(NUM_RUNS):
        print(f"\n{'='*30} Initialization Run {i+1}/{NUM_RUNS} {'='*30}")

        # --- 7-PARAMETER initialization ---
        ''' 
        init_sigmasq   = 15.0
        init_range_lat = 0.66 
        init_range_lon = 0.7 
        init_nugget    = 1.5
        init_beta      = 0.1  # Temporal range ratio
        init_advec_lat = 0.02
        init_advec_lon = -0.08
        '''
        init_sigmasq   = 13.059
        init_range_lat = 0.154 
        init_range_lon = 0.195
        init_advec_lat = 0.0218
        init_range_time = 0.7
        init_advec_lon = -0.1689
        init_nugget    = 0.247

        init_phi2 = 1.0 / init_range_lon
        init_phi1 = init_sigmasq * init_phi2
        init_phi3 = (init_range_lon / init_range_lat)**2
        # Change needed to match the spatial-temporal distance formula:
        init_phi4 = (init_range_lon / init_range_time)**2      # (range_lon / range_time)^2

        initial_params_values = [
            np.log(init_phi1),    # [0] log_phi1
            np.log(init_phi2),    # [1] log_phi2
            np.log(init_phi3),    # [2] log_phi3
            np.log(init_phi4),    # [3] log_phi4
            init_advec_lat,       # [4] advec_lat (NOT log)
            init_advec_lon,       # [5] advec_lon (NOT log)
            np.log(init_nugget)   # [6] log_nugget
        ]
        
        print(f"Starting with FIXED params (raw log-scale): {[round(p, 4) for p in initial_params_values]}")

        params_list = [
            Parameter(torch.tensor([val], dtype=torch.float64))
            for val in initial_params_values
        ]

        # Helper to define the boundary globally for clarity
        NUGGET_LOWER_BOUND = 0.05
        LOG_NUGGET_LOWER_BOUND = np.log(NUGGET_LOWER_BOUND) # Approx -2.9957

        # --- ðŸ’¥ REVISED: Use L-BFGS Optimizer ðŸ’¥ ---
        optimizer = torch.optim.LBFGS(
            params_list,
            lr=1.0,           # Initial step length for line search
            max_iter=20,      # Iterations per step
            history_size=100,
            line_search_fn="strong_wolfe", # Often more robust
            tolerance_grad=1e-5
        )
        # --- END REVISION ---

        print(f"Starting optimization run {i+1} on device {DEVICE} (Hamming, 7-param ST kernel, L-BFGS)...")
        
        # --- ðŸ’¥ REVISED: Call L-BFGS trainer, pass p_time ðŸ’¥ ---
        nat_params_str, phi_params_str, raw_params_str, loss, steps_run = dwl.run_lbfgs_tapered(
            params_list=params_list,
            optimizer=optimizer,
            I_sample=I_sample,
            n1=n1, n2=n2, p_time=p_time,
            taper_autocorr_grid=taper_autocorr_grid, 
            max_steps=MAX_STEPS,
            device=DEVICE
        )
        # --- END REVISION ---
        
        if loss is not None:
            all_final_results.append((nat_params_str, phi_params_str, raw_params_str))
            all_final_losses.append(loss)
        else:
            all_final_losses.append(float('inf'))

    print(f"\n\n{'='*25} Overall Result from Run {'='*25} {'='*25}")
    
    valid_losses = [l for l in all_final_losses if l is not None and l != float('inf')]

    if not valid_losses:
        print(f"The run failed or resulted in an invalid loss for Day {DAY_TO_RUN}.")
    else:
        best_loss = min(valid_losses)
        best_run_index = all_final_losses.index(best_loss)
        best_results = all_final_results[best_run_index]
        
        print(f"Best Run Loss: {best_loss} (after {steps_run} steps)")
        print(f"Final Parameters (Natural Scale): {best_results[0]}")
        print(f"Final Parameters (Phi Scale)    : {best_results[1]}")
        print(f"Final Parameters (Raw Log Scale): {best_results[2]}")

    end_time = time.time()
    print(f"\nTotal execution time: {end_time - start_time:.2f} seconds")

Using device: cpu
Pre-computing J-vector (Hamming taper)...
Pre-computing sample periodogram...
Pre-computing Hamming taper autocorrelation...
Data grid: 113x158, 8 time points. J-vector, Periodogram, Taper Autocorr on cpu.

Starting with FIXED params (raw log-scale): [4.2042, 1.6348, 0.4721, -2.5562, 0.0218, -0.1689, -1.3984]
Starting optimization run 1 on device cpu (Hamming, 7-param ST kernel, L-BFGS)...
--- Step 1/20 ---
 Loss: 1.254618 | Max Grad: 2.150713e-02
  Params (Raw Log): log_phi1: 4.2025, log_phi2: 1.9605, log_phi3: 0.2627, log_phi4: -3.9885, advec_lat: 0.0152, advec_lon: -0.2036, log_nugget: -0.9763
--- Step 2/20 ---
 Loss: 1.163201 | Max Grad: 6.975980e-06
  Params (Raw Log): log_phi1: 4.1875, log_phi2: 1.9465, log_phi3: 0.2492, log_phi4: -3.9739, advec_lat: 0.0146, advec_lon: -0.2040, log_nugget: -0.8567
--- Step 3/20 ---
 Loss: 1.163044 | Max Grad: 6.975980e-06
  Params (Raw Log): log_phi1: 4.1875, log_phi2: 1.9465, log_phi3: 0.2492, log_phi4: -3.9739, advec_lat: 0.01