In [2]:
import sys
import os
# gems_tco_path = "/Users/joonwonlee/Documents/GEMS_TCO-1/src"
# sys.path.append(gems_tco_path)

# Data manipulation and analysis
import pandas as pd
import numpy as np
import pickle 

import GEMS_TCO
from GEMS_TCO import kernels 
from GEMS_TCO import orderings as _orderings
from GEMS_TCO import load_data_local_computer

import torch
from collections import defaultdict

import torch
from torch.func import grad, hessian, jacfwd, jacrev
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import copy                    # clone tensor

# Summary

Two options: 1. torch.autograd 2. torch.func (recommended for both gradients and hessians)

Observations:
- In order to track gradients, ```sqrt()``` in distance function has to be removed and put ```sqrt(distance function output)``` in covariance function.   

- If dtypes don't match, both autograd and torch.func cannot track hessians, so consider ```.to(torch.float64)``` so ``` aggregated_data[:,:4].torch.float64()```   
for the consistency.
Actually, it turns out that if I use ```float32```, then autograd derivative can be different from analytical derivative by ```0.001 ~ 0.004```. 

the difference is on the order of one-thousandth 

- For hessians, torch.func is recommended. ``` torch.autograd.functional.hessian(compute_loss, params)``` this doesn't work.   

- It seems there is nontrivial difference between float32 and float64 settings. 

# LOAD estimates for July 2024

In [3]:
lat_lon_resolution = [8,8]
day = 1
mm_cond_number = 20

years = ['2024']
month_range =[7,8]
idx_for_datamap= [ 8*(day-1),8*day]

instance = load_data_local_computer()
month_map, ord_mm, nns_map= instance.load_mm20k_data_bymonthyear( lat_lon_resolution= lat_lon_resolution, mm_cond_number=mm_cond_number,years_=years, months_=month_range)
analysis_data_map, aggregated_data = instance.load_working_data_byday( month_map, ord_mm, nns_map, idx_for_datamap=idx_for_datamap)

input_path = "/Users/joonwonlee/Documents/GEMS_TCO-1/Exercises/st_model/estimates"
output_filename = 'vecchia_inter_estimates_1250_july24.csv'
output_csv_path = os.path.join(input_path, output_filename)

df = pd.read_csv(output_csv_path)
df.head()

Unnamed: 0,sigmasq,range_lat,range_lon,advec_lat,advec_lon,beta,nugget,loss
0,24.793444,1.584529,1.718248,0.009089,-0.107299,0.131038,2.717239,14068.529297
1,24.424301,1.997055,1.942683,0.043588,-0.072679,0.137124,1.513148,12357.71582
2,26.009497,1.215236,1.558868,0.023392,-0.150548,0.19985,2.890678,14948.140625
3,24.701347,1.612308,1.82296,-0.164069,-0.237443,0.131595,3.636499,14786.204102
4,22.598671,2.901185,3.722327,-0.011729,-0.152072,0.072866,2.397249,12096.261719


# Gradients and hessians sanity check

In [15]:

nheads =10
instance = kernels.vecchia_experiment(0.5, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)

# Convert parameters to a tensor with requires_grad=True
params = torch.tensor(df.iloc[0, :-1].values, dtype=torch.float64, requires_grad=True)
print(f'input parameters: {params}')

# Define the function to compute the loss
def compute_loss(params):
    return instance.full_likelihood(params, aggregated_data[:, :4].to(torch.float64), aggregated_data[:, 2].to(torch.float64), instance.matern_cov_anisotropy_v05)
    # return instance.vecchia_interpolation_1to6(params, instance.matern_cov_ani, 35)
    
# Compute the first derivative using torch.func.grad
grad_f = torch.autograd.grad(compute_loss(params), params)
print(f' the gradient: {grad_f}')

grad_function = torch.func.grad(compute_loss)
gradient = grad_function(params)
print(f' the gradient: {gradient}')

#[  0.9324, -43.9642, -35.9082,  59.9937, -17.1091, -76.0932,  -0.6668]
torch.autograd.gradcheck(compute_loss, params, atol=1e-9, rtol=1e-6)


input parameters: tensor([ 2.4793e+01,  1.5845e+00,  1.7182e+00,  9.0885e-03, -1.0730e-01,
         1.3104e-01,  2.7172e+00], dtype=torch.float64, requires_grad=True)
 the gradient: (tensor([  -3.5293,    3.5674,   -3.6402,   16.3591,   81.4576, -450.7034,
         -23.6871], dtype=torch.float64),)
 the gradient: tensor([  -3.5293,    3.5674,   -3.6402,   16.3591,   81.4576, -450.7034,
         -23.6871], dtype=torch.float64, grad_fn=<AddBackward0>)


True

## gradient(vecc) * hessian (full) * gradient (vecc)

In [29]:
# params = [24.42, 1.92, 1.92, 0.001, -0.045, 0.237, 3.34]

# Convert parameters to a tensor with requires_grad=True
params = torch.tensor(df.iloc[0, :-1].values, dtype=torch.float64, requires_grad=True)

params = [ 27.25, 2.18, 2.294, 4.099e-4, -0.07915, 0.0999, 3.65]   #200
params = torch.tensor(params, dtype=torch.float64, requires_grad=True)
  

nheads =10
instance = kernels.vecchia_experiment(0.5, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)

o1 = instance.vecc_ghg_statistic(params,instance.full_likelihood, instance.vecchia_b1,instance.matern_cov_anisotropy_v05, aggregated_data[:, :4],aggregated_data[:, 2])
print(o1)
o2 = instance.vecc_ghg_statistic(params,instance.full_likelihood, instance.vecchia_b2,instance.matern_cov_anisotropy_v05, aggregated_data[:, :4],aggregated_data[:, 2])
print(o2)

mm_cond_number = 10
instance = kernels.vecchia_experiment(0.5, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)
instance.vecc_ghg_statistic(params,instance.full_likelihood, instance.vecchia_b2,instance.matern_cov_anisotropy_v05, aggregated_data[:, :4],aggregated_data[:, 2])


tensor(16.2136, dtype=torch.float64, grad_fn=<DotBackward0>)
tensor(18.0869, dtype=torch.float64, grad_fn=<DotBackward0>)


tensor(19.9679, dtype=torch.float64, grad_fn=<DotBackward0>)

### Now compare statistics

In [7]:
copy_analysis_map = copy.deepcopy(analysis_data_map)
key_order = [0,1,2,4,3,5,7,6]
keys = list(analysis_data_map.keys())
reordered_dict = {keys[key]: copy_analysis_map[keys[key]] for key in key_order}


params = [ 27.25, 2.18, 2.294, 4.099e-4, -0.07915, 0.0999, 3.65]   #200
params = torch.tensor(params, dtype=torch.float64, requires_grad=True)
nheads =200
instance = kernels.vecchia_experiment(0.5, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)
fl= instance.full_likelihood(params, aggregated_data[:, :4],aggregated_data[:, 2], instance.matern_cov_anisotropy_v05)
fs = instance.full_ghg_statistic(params,instance.full_likelihood, instance.matern_cov_anisotropy_v05, aggregated_data[:, :4],aggregated_data[:, 2])

print(fl.item())

print(fs.item())
  

cond_number of hessian 2816516.366634098
3940.2817598903225
-59.56466834318


In [5]:
nheads = 200
params = [ 27.25, 2.18, 2.294, 4.099e-4, -0.07915, 0.0999, 3.65]   #200
params = torch.tensor(params, dtype=torch.float64, requires_grad=True)

instance = kernels.vecchia_experiment(0.5, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)


ll = instance.vecchia_b2(params, instance.matern_cov_anisotropy_v05 )
print(f'mm_cond_number: {mm_cond_number} likelihood: {ll}')

ll2 = instance.vecchia_interpolation_1to6(params, instance.matern_cov_anisotropy_v05 )
print(f'mm_cond_number: {mm_cond_number} likelihood: {ll2}')

key_order = [0,1,2,4,3,5,7,6]
keys = list(analysis_data_map.keys())
reordered_dict = {keys[key]: copy_analysis_map[keys[key]] for key in key_order}

instance = kernels.vecchia_experiment(0.5, reordered_dict, aggregated_data,nns_map,mm_cond_number, nheads)
ll = instance.vecchia_b2(params, instance.matern_cov_anisotropy_v05 )
print(f'mm_cond_number: {mm_cond_number} likelihood: {ll}')

key_order = [0,1,2,4,3,7,5,6]
keys = list(copy_analysis_map.keys())




mm_cond_number: 20 likelihood: 3961.533734087265
mm_cond_number: 20 likelihood: 3946.0588701555866
mm_cond_number: 20 likelihood: 3957.1965567747948


In [6]:
nheads = 300
instance = kernels.vecchia_experiment(0.5, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)

o1= instance.vecc_ghg_statistic(params,instance.full_likelihood, instance.vecchia_b2,instance.matern_cov_anisotropy_v05, aggregated_data[:, :4],aggregated_data[:, 2])

ll = instance.vecchia_b2(params, instance.matern_cov_anisotropy_v05 )
print(f'mm_cond_number: {mm_cond_number} likelihood: {ll}, statistic:{o1}')

o1= instance.vecc_ghg_statistic(params,instance.full_likelihood, instance.vecchia_interpolation_1to6,instance.matern_cov_anisotropy_v05, aggregated_data[:, :4],aggregated_data[:, 2])

ll = instance.vecchia_interpolation_1to6(params, instance.matern_cov_anisotropy_v05 )
print(f'mm_cond_number: {mm_cond_number} likelihood: {ll}, statistic:{o1}')



key_order = [0,1,2,4,3,5,7,6]
keys = list(copy_analysis_map.keys())
reordered_dict = {keys[key]: copy_analysis_map[keys[key]] for key in key_order}

instance = kernels.vecchia_experiment(0.5, reordered_dict, aggregated_data,nns_map,mm_cond_number, nheads)

o1= instance.vecc_ghg_statistic(params,instance.full_likelihood, instance.vecchia_b2, instance.matern_cov_anisotropy_v05, aggregated_data[:, :4],aggregated_data[:, 2])
ll = instance.vecchia_b2(params, instance.matern_cov_anisotropy_v05 )
print(f'mm_cond_number: {mm_cond_number} likelihood: {ll}, statistic:{o1}')



mm_cond_number: 20 likelihood: 3938.502954147818, statistic:-58.767466763382814
mm_cond_number: 20 likelihood: 3936.1105450710866, statistic:-58.611760282523576
mm_cond_number: 20 likelihood: 3940.224792674919, statistic:-58.56296216905197


In [None]:
instance = kernels.vecchia_experiment(0.5, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)
fl= instance.full_likelihood(params, aggregated_data[:, :4],aggregated_data[:, 2], instance.matern_cov_anisotropy_v05)
fs = instance.full_ghg_statistic(params,instance.full_likelihood, instance.matern_cov_anisotropy_v05, aggregated_data[:, :4],aggregated_data[:, 2])

mm_cond_number = 10

instance = kernels.vecchia_experiment(0.5, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)

o1= instance.vecc_ghg_statistic(params,instance.full_likelihood, instance.vecchia_b2,instance.matern_cov_anisotropy_v05, aggregated_data[:, :4],aggregated_data[:, 2])
ll = instance.vecchia_b2(params, instance.matern_cov_anisotropy_v05 )
print(f'mm_cond_number: {mm_cond_number} likelihood: {ll}, statistic:{o1}')

vary the size of conditioning set

In [10]:
instance = kernels.vecchia_experiment(0.5, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)
fl= instance.full_likelihood(params, aggregated_data[:, :4],aggregated_data[:, 2], instance.matern_cov_anisotropy_v05)
fs = instance.full_ghg_statistic(params,instance.full_likelihood, instance.matern_cov_anisotropy_v05, aggregated_data[:, :4],aggregated_data[:, 2])

print(f'full likelihood: {fl}, full statistic: {fs}')
for i in range(5,30):
    mm_cond_number = i
    instance = kernels.vecchia_experiment(0.5, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)
    
    o1= instance.vecc_ghg_statistic(params,instance.full_likelihood, instance.vecchia_b2,instance.matern_cov_anisotropy_v05, aggregated_data[:, :4],aggregated_data[:, 2])
    ll = instance.vecchia_b2(params, instance.matern_cov_anisotropy_v05 )
    print(f'mm_cond_number: {i} likelihood: {ll}, statistic:{o1}')



cond_number of hessian 189194.83871267262
full likelihood: 2547.258276245673, full statistic: 5.100717330882949
cond_number of hessian 189194.83871267262
mm_cond_number: 5 likelihood: 2571.7202011676554, statistic:4.810536260238044
cond_number of hessian 189194.83871267262
mm_cond_number: 6 likelihood: 2569.9784416212933, statistic:4.106628202046433
cond_number of hessian 189194.83871267262
mm_cond_number: 7 likelihood: 2567.640680723155, statistic:4.602141260135241
cond_number of hessian 189194.83871267262
mm_cond_number: 8 likelihood: 2567.6782831405317, statistic:5.079131623977824
cond_number of hessian 189194.83871267262
mm_cond_number: 9 likelihood: 2568.0001926920804, statistic:5.343090709089482
cond_number of hessian 189194.83871267262
mm_cond_number: 10 likelihood: 2566.523313914881, statistic:5.518904441534436
cond_number of hessian 189194.83871267262
mm_cond_number: 11 likelihood: 2566.7857736093847, statistic:5.4051722056787765
cond_number of hessian 189194.83871267262
mm_co