In [25]:
# for path in sys.path:
#   print(path)

import sys
gems_tco_path = "/Users/joonwonlee/Documents/GEMS_TCO-1/src"
sys.path.append(gems_tco_path)

import logging
import argparse # Argument parsing
import math
from collections import defaultdict
import concurrent
from concurrent.futures import ThreadPoolExecutor  # Importing specific executor for clarity
import time

# Data manipulation and analysis
import pandas as pd
import numpy as np

# Nearest neighbor search
import sklearn
from sklearn.neighbors import BallTree

# Special functions and optimizations
from scipy.special import gamma, kv  # Bessel function and gamma function
from scipy.stats import multivariate_normal  # Simulation
from scipy.optimize import minimize
from scipy.spatial.distance import cdist  # For space and time distance
from scipy.spatial import distance  # Find closest spatial point
from scipy.optimize import differential_evolution

# Plotting and visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Type hints
from typing import Callable, Union, Tuple

# Add your custom path
# sys.path.append("/cache/home/jl2815/tco")

# Custom imports

from GEMS_TCO import orbitmap 
from GEMS_TCO import kernels 
from GEMS_TCO import evaluate
from GEMS_TCO import orderings as _orderings

import pickle
import torch
import torch.optim as optim
import copy                    # clone tensor

In [76]:
lat_lon_resolution = [4,4]
mm_cond_number = 10
params= [20, 8.25, 5.25, 0.2, 0.5, 5]
idx_for_datamap= [0,8]

Data load

In [77]:


# Load the one dictionary to set spaital coordinates
# filepath = "C:/Users/joonw/TCO/GEMS_data/data_2023/sparse_cen_map23_01.pkl"
filepath = "/Users/joonwonlee/Documents/GEMS_DATA/pickle_2023/coarse_cen_map23_01.pkl"
with open(filepath, 'rb') as pickle_file:
    coarse_dict_24_1 = pickle.load(pickle_file)

sample_df = coarse_dict_24_1['y23m01day01_hm02:12']

sample_key = coarse_dict_24_1.get('y23m01day01_hm02:12')
if sample_key is None:
    print("Key 'y23m01day01_hm02:12' not found in the dictionary.")

# { (20,20):(5,1), (5,5):(20,40) }
rho_lat = lat_lon_resolution[0]          
rho_lon = lat_lon_resolution[1]
lat_n = sample_df['Latitude'].unique()[::rho_lat]
lon_n = sample_df['Longitude'].unique()[::rho_lon]

lat_number = len(lat_n)
lon_number = len(lon_n)

# Set spatial coordinates for each dataset
coarse_dicts = {}

years = ['2024']
for year in years:
    for month in range(7, 8):  # Iterate over all months
        # filepath = f"C:/Users/joonw/TCO/GEMS_data/data_{year}/sparse_cen_map{year[2:]}_{month:02d}.pkl"
        filepath = f"/Users/joonwonlee/Documents/GEMS_DATA/pickle_{year}/coarse_cen_map{year[2:]}_{month:02d}.pkl"
        with open(filepath, 'rb') as pickle_file:
            loaded_map = pickle.load(pickle_file)
            for key in loaded_map:
                tmp_df = loaded_map[key]
                coarse_filter = (tmp_df['Latitude'].isin(lat_n)) & (tmp_df['Longitude'].isin(lon_n))
                coarse_dicts[f"{year}_{month:02d}_{key}"] = tmp_df[coarse_filter].reset_index(drop=True)


key_idx = sorted(coarse_dicts)
if not key_idx:
    raise ValueError("coarse_dicts is empty")

# extract first hour data because all data shares the same spatial grid
data_for_coord = coarse_dicts[key_idx[0]]
x1 = data_for_coord['Longitude'].values
y1 = data_for_coord['Latitude'].values 
coords1 = np.stack((x1, y1), axis=-1)


# instance = orbitmap.MakeOrbitdata(data_for_coord, lat_s=5, lat_e=10, lon_s=110, lon_e=120)
# s_dist = cdist(coords1, coords1, 'euclidean')
# ord_mm, _ = instance.maxmin_naive(s_dist, 0)

ord_mm = _orderings.maxmin_cpp(coords1)
data_for_coord = data_for_coord.iloc[ord_mm].reset_index(drop=True)
coords1_reordered = np.stack((data_for_coord['Longitude'].values, data_for_coord['Latitude'].values), axis=-1)
# nns_map = instance.find_nns_naive(locs=coords1_reordered, dist_fun='euclidean', max_nn=mm_cond_number)
nns_map=_orderings.find_nns_l2(locs= coords1_reordered  ,max_nn = mm_cond_number)


analysis_data_map = {}
for i in range(idx_for_datamap[0],idx_for_datamap[1]):
    tmp = coarse_dicts[key_idx[i]].copy()
    tmp['Hours_elapsed'] = np.round(tmp['Hours_elapsed']-477700)

    tmp = tmp.iloc[ord_mm, :4].to_numpy()
    tmp = torch.from_numpy(tmp).float()  # Convert NumPy to Tensor
    # tmp = tmp.clone().detach().requires_grad_(True)  # Enable gradients
    
    analysis_data_map[key_idx[i]] = tmp

aggregated_data = pd.DataFrame()
for i in range(idx_for_datamap[0],idx_for_datamap[1]):
    tmp = coarse_dicts[key_idx[i]].copy()
    tmp['Hours_elapsed'] = np.round(tmp['Hours_elapsed']-477700)
    tmp = tmp.iloc[ord_mm].reset_index(drop=True)  
    aggregated_data = pd.concat((aggregated_data, tmp), axis=0)

aggregated_data = aggregated_data.iloc[:, :4].to_numpy()

aggregated_data = torch.from_numpy(aggregated_data).float()  # Convert NumPy to Tensor
# aggregated_np = aggregated_np.clone().detach().requires_grad_(True)  # Enable gradients


instance = kernels.likelihood_function(smooth=0.5, input_map=analysis_data_map, aggregated_data=aggregated_data,nns_map=nns_map, mm_cond_number=mm_cond_number)


In [72]:
# Define your initial parameters
params = [21.8, 1.09, 1.17, 0.2, .2, 0.5, 1]
params = [52.627, 4, 5.685, 6.77e-2, -4.19e-3, 0.0585, 3.143]  # 50x8 lr=0.01  24.42 1.92, 1.92, 0.001, -0.045, -.237, 3.34
params = [51.79, 3.894, 4.135, -2.08e-2, -7.71e-2, 0.061, 3.5]
params = torch.tensor(params, requires_grad=True)

torch_smooth = torch.tensor(0.5, dtype=torch.float32)

instance = kernels.likelihood_function(smooth=torch_smooth , input_map=analysis_data_map,aggregated_data=aggregated_data, nns_map=nns_map, mm_cond_number=mm_cond_number)

out0 = instance.full_likelihood(params, aggregated_data[:,:4],aggregated_data[:,2], instance.matern_cov_anisotropy_v05)
print(out0)

out0 = instance.vecchia_like_local_computer(params, instance.matern_cov_anisotropy_v05)
print(out0)

tensor(2542.7729, grad_fn=<MulBackward0>)
tensor(2640.7676, grad_fn=<AddBackward0>)


Optimization full likelihood

In [15]:
aggregated_data.shape

torch.Size([10000, 4])

## 200 x 8

lr 0.001 without scheduler  same as lr, step_size, gamma  0.01 40 0.5  (9.8s)

 Loss: 2549.066650390625, full Parameters: [ 2.48777485e+01  2.05998826e+00  2.16013098e+00  2.20775465e-03
 -7.89414570e-02  1.05411254e-01  3.75236106e+00]

 lr 0.01  step size 40  betas 0.9 , 0.8 gamma 0.9  30 s

  Loss: 2547.1728515625, full Parameters: [ 2.7377291e+01  2.2077193e+00  2.3204505e+00  1.0307773e-03
 -8.0311157e-02  9.8579854e-02  3.6677265e+00]

 lr 0.01  step size 10 betas 0.9 , 0.8 gamma 0.9  30 s
  Loss: 2548.87841796875, full Parameters: [ 2.5092268e+01  2.0689390e+00  2.1694989e+00  2.0285936e-03
 -7.9028614e-02  1.0501490e-01  3.7373385e+00]
Training full likelihood complete.   11.8 sc

 lr 0.01  step size 20 betas 0.9 , 0.8 gamma 0.9  30 s
 Loss: 2548.15283203125, full Parameters: [ 2.59814014e+01  2.12175608e+00  2.22699022e+00  1.73025124e-03
 -7.93599486e-02  1.02427535e-01  3.70715070e+00]



lr 0.01  step size 20 beta 0.9 0.99 gamma 0.9
 Loss: 2548.18603515625, full Parameters: [ 2.5938652e+01  2.1110108e+00  2.2155209e+00  1.5893303e-03
 -7.9482891e-02  1.0297947e-01  3.6958976e+00]
 21.6

lr 0.01  step size 20 beta 0.9 0.8 gamma 0.9
 Loss: 2548.15283203125, full Parameters: [ 2.59814014e+01  2.12175608e+00  2.22699022e+00  1.73025124e-03
 -7.93599486e-02  1.02427535e-01  3.70715070e+00]
 22.9 s

lr 0.01  step size 10 beta 0.9 0.99 gamma 0.9
Loss: 2548.95361328125, full Parameters: [ 2.5118145e+01  1.9827319e+00  2.0768294e+00  1.0898338e-03
 -8.0070712e-02  1.1034889e-01  3.5647078e+00]


## 1250 x 8

1250* 8 55m using constant learning rate 0.0001 
Loss: 14068.798828125, full Parameters: [ 2.46198387e+01  1.61719894e+00  1.76454413e+00  8.55297223e-03
 -1.08275235e-01  1.28809512e-01  2.80795789e+00]

1250* 8 10m 32s
lr 0.01  step size 40 beta 0.9 0.8 gamma 0.9
  Loss: 14068.1953125, full Parameters: [ 2.5030930e+01  1.6107724e+00  1.7573007e+00  8.8407323e-03
 -1.0820019e-01  1.2936097e-01  2.7430327e+00]
Training full likelihood complete.

9m 33s
lr 0.01  step size 20 beta 0.9 0.8 gamma 0.9
 Loss: 14068.29296875, full Parameters: 
 [ 2.4933689e+01  1.6009743e+00  1.7502663e+00  9.2404895e-03 -1.0737537e-01  1.2953614e-01 
  2.7420275e+00]
Training full likelihood complete.

#### high resolution data might benefits from larger step size high resolution data often provides 
#### more stable gradients, so larger step size less likely to cause significant fluctuations
14n 41.8s
lr 0.01  step size 10 beta 0.9 0.99 gamma 0.9

FINAL STATE: Epoch 199, 
 Loss: 14068.8828125, full Parameters: 
 [ 2.4707581e+01  1.6489888e+00  1.7993137e+00  8.4043797e-03 -1.0836436e-01  1.2655504e-01  
 2.8416286e+00]

#### beta 0.9 0.99 might be too conservative for high resolution data
13m 44.8s
lr 0.01  step size 20 beta 0.9 0.99 gamma 0.9

 Loss: 14068.318359375, full Parameters: [ 2.4938175e+01  1.6203119e+00  1.7678342e+00  8.6686825e-03
 -1.0813228e-01  1.2845081e-01  2.7731323e+00]


18m
lr 0.01  step size 40 beta 0.9 0.99 gamma 0.9

 Loss: 14067.970703125, full Parameters: [ 2.5205673e+01  1.6159834e+00  1.7630767e+00  8.7957922e-03
 -1.0802399e-01  1.2862283e-01  2.7390635e+00]

9m 52s
lr 0.01  step size 20 beta 0.9 0.8 gamma 0.9

Loss: 14068.29296875, full Parameters: [ 2.4933689e+01  1.6009743e+00  1.7502663e+00  9.2404895e-03
 -1.0737537e-01  1.2953614e-01  2.7420275e+00]
Training full likelihood complete.

In [81]:
params = [24.42, 1.92, 1.92, 0.001, -0.045, 0.237, 3.34]
params = torch.tensor(params, requires_grad=True)

instance = kernels.model_fitting(
    smooth=0.5,
    input_map=analysis_data_map,
    aggregated_data=aggregated_data,
    nns_map=nns_map,
    mm_cond_number=mm_cond_number
)

# optimizer = optim.Adam([params], lr=0.01)  # For Adam
optimizer, scheduler = instance.optimizer_fun2( params, lr=0.01, betas=(0.9, 0.8), eps=1e-8, step_size=20, gamma=0.9)    
instance.run_full2(params, optimizer,scheduler, epochs=3000)



Epoch 1, Gradients: [   5.967201   64.56529    25.027222  -61.038574  203.27737  1465.2336
   56.982613]
 Loss: 14257.3193359375, Parameters: [ 2.442e+01  1.920e+00  1.920e+00  1.000e-03 -4.500e-02  2.370e-01
  3.340e+00]
Epoch 101, Gradients: [ -0.7736554   -0.61901855  -0.24597168  -5.3624268  -12.07843
  71.16577      2.4958801 ]
 Loss: 14068.568359375, Parameters: [ 2.4761600e+01  1.5730878e+00  1.7161615e+00  8.2207927e-03
 -1.0916295e-01  1.3317481e-01  2.7271605e+00]
Converged at epoch 129
Epoch 130, Gradients: [ -1.2897807   -1.5251465    0.8886719   -0.49572754   1.3125
 -29.372559    -0.845871  ]
 Loss: 14068.29296875, full Parameters: [ 2.4933689e+01  1.6009743e+00  1.7502663e+00  9.2404895e-03
 -1.0737537e-01  1.2953614e-01  2.7420275e+00]
FINAL STATE: Epoch 130, Gradients: [ -1.2897807   -1.5251465    0.8886719   -0.49572754   1.3125
 -29.372559    -0.845871  ]
 Loss: 14068.29296875, full Parameters: [ 2.4933689e+01  1.6009743e+00  1.7502663e+00  9.2404895e-03
 -1.0737537e

In [67]:
params = [24.42, 1.92, 1.92, 0.001, -0.045, 0.237, 3.34]
params = torch.tensor(params, requires_grad=True)

instance = kernels.model_fitting(
    smooth=0.5,
    input_map=analysis_data_map,
    aggregated_data=aggregated_data,
    nns_map=nns_map,
    mm_cond_number=mm_cond_number
)

# optimizer = optim.Adam([params], lr=0.01)  # For Adam
optimizer = instance.optimizer_fun( params, lr=0.001, betas=(0.9, 0.99), eps=1e-8)    
instance.run_full(params, optimizer, epochs=3000)

Epoch 1, Gradients: [  0.5812483  19.981602   11.618286    2.1514587  18.508026  447.89014
   5.896081 ]
 Loss: 2588.835693359375, Parameters: [ 2.442e+01  1.920e+00  1.920e+00  1.000e-03 -4.500e-02  2.370e-01
  3.340e+00]
Epoch 101, Gradients: [-4.4301248e-01  5.5735703e+00  2.6599121e-01  9.8266602e-03
  8.8864136e-01  1.9039636e+02  1.7605927e+00]
 Loss: 2552.7041015625, Parameters: [ 2.4379133e+01  1.8349186e+00  1.8445122e+00 -1.1280666e-03
 -7.4676104e-02  1.4577185e-01  3.2494757e+00]
Epoch 201, Gradients: [-8.6255789e-01 -8.7886429e-01 -1.7917480e+00 -1.6784668e-04
 -2.0629883e-02  3.6184082e+00 -1.7865100e+00]
 Loss: 2550.16357421875, Parameters: [ 2.45150394e+01  1.82015252e+00  1.87993288e+00 -4.56443377e-04
 -8.19397792e-02  1.22905836e-01  3.27331400e+00]
Epoch 301, Gradients: [-8.4441233e-01 -1.4291420e+00 -1.0267334e+00 -1.8310547e-03
 -7.8430176e-03  3.1687012e+00 -1.4634323e+00]
 Loss: 2549.85693359375, Parameters: [ 2.4625595e+01  1.8435745e+00  1.9267030e+00 -4.14884

Optimization vecchia

In [7]:

params = [24.42, 1.92, 1.92, 0.001, -0.045, 0.237, 3.34]
params = torch.tensor(params, requires_grad=True)

instance = kernels.model_fitting(
    smooth=0.5,
    input_map=analysis_data_map,
    aggregated_data=aggregated_data,
    nns_map=nns_map,
    mm_cond_number=mm_cond_number
)
# optimizer = optim.Adam([params], lr=0.01)  # For Adam
optimizer = instance.optimizer_fun( params, lr=0.01, betas=(0.9, 0.8), eps=1e-8)    
instance.run_vecc_local(params, optimizer, epochs=3000)

Epoch 1, Gradients: [ -1.6359289  -2.443027   -1.6558454  -4.108972  -14.154725  320.9304
   5.662271 ]
 Loss: 2711.34814453125, Parameters: [ 2.442e+01  1.920e+00  1.920e+00  1.000e-03 -4.500e-02  2.370e-01
  3.340e+00]
Epoch 101, Gradients: [-3.3657918  -0.401838   -0.08449596  0.57027197  0.6423551   4.046658
 -5.713063  ]
 Loss: 2667.8994140625, Parameters: [25.402588    2.8637505   2.692839    0.10317959 -0.02825812  0.07111356
  4.0243974 ]
Epoch 201, Gradients: [-2.5897667   0.04656863  0.18996137 -0.7142588  -0.55480295 -0.16142726
  0.3011552 ]
 Loss: 2661.966796875, Parameters: [ 2.6416969e+01  3.6419423e+00  3.2790620e+00  1.6556214e-01
 -2.6265675e-02 -5.8514145e-03  5.1216049e+00]
Epoch 301, Gradients: [-2.4217558  -0.01603395  0.03121904  0.59986     0.29007858  0.2992233
  0.04264249]
 Loss: 2659.416748046875, Parameters: [ 2.7421131e+01  3.5703635e+00  3.2777872e+00  1.7882232e-01
 -2.6097119e-02  2.6434304e-03  4.9496832e+00]
Epoch 401, Gradients: [-2.2370813e+00  1.90