In [1]:
import sys
import os
# gems_tco_path = "/Users/joonwonlee/Documents/GEMS_TCO-1/src"
# sys.path.append(gems_tco_path)

# Data manipulation and analysis
import pandas as pd
import numpy as np
import pickle 

import GEMS_TCO
from GEMS_TCO import kernels 
from GEMS_TCO import orderings as _orderings
from GEMS_TCO import load_data

import torch
from collections import defaultdict

import torch
from torch.func import grad, hessian, jacfwd, jacrev
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import copy                    # clone tensor

from pathlib import Path
import json
from json import JSONEncoder

# Summary

Two options: 1. torch.autograd 2. torch.func (recommended for both gradients and hessians)

Observations:
- In order to track gradients, ```sqrt()``` in distance function has to be removed and put ```sqrt(distance function output)``` in covariance function.   

- If dtypes don't match, both autograd and torch.func cannot track hessians, so consider ```.to(torch.float64)``` so ``` aggregated_data[:,:4].torch.float64()```   
for the consistency.
Actually, it turns out that if I use ```float32```, then autograd derivative can be different from analytical derivative by ```0.001 ~ 0.004```. 

the difference is on the order of one-thousandth 

- For hessians, torch.func is recommended. ``` torch.autograd.functional.hessian(compute_loss, params)``` this doesn't work.   

- It seems there is nontrivial difference between float32 and float64 settings. 

# LOAD estimates for July 2024

In [2]:
lat_lon_resolution = [6,6]
day = 7
mm_cond_number = 10

years = ['2024']
month_range =[7,8]
idx_for_datamap= [ 8*(day-1),8*day]

input_path = Path("/Users/joonwonlee/Documents/GEMS_DATA/")  # mac
instance = load_data(input_path)
month_map, ord_mm, nns_map= instance.load_mm20k_data_bymonthyear( lat_lon_resolution= lat_lon_resolution, mm_cond_number=mm_cond_number,years_=years, months_=month_range)
analysis_data_map, aggregated_data = instance.load_working_data_byday( month_map, ord_mm, nns_map, idx_for_datamap=idx_for_datamap)


'''  
input_path = "/Users/joonwonlee/Documents/GEMS_TCO-1/Exercises/st_model/estimates"
output_filename = 'vecchia_inter_estimates_1250_july24.csv'
output_csv_path = os.path.join(input_path, output_filename)

df = pd.read_csv(output_csv_path)
'''
input_filename = "full_v15_1250.0.csv"
input_path = "/Users/joonwonlee/Documents/GEMS_TCO-1/Exercises/st_model/estimates"
input_filepath = os.path.join(input_path, input_filename)
df_full_v15 = pd.read_csv(input_filepath)
df_full_v15 = df_full_v15.iloc[:,5:13]

df = df_full_v15


# Gradients and hessians sanity check

In [5]:

nheads =10
instance = kernels.vecchia_experiment(0.5, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)

# Convert parameters to a tensor with requires_grad=True
params = torch.tensor(df.iloc[0, :-1].values, dtype=torch.float64, requires_grad=True)
print(f'input parameters: {params}')

# Define the function to compute the loss
def compute_loss(params):
    return instance.full_likelihood(params, aggregated_data[:, :4].to(torch.float64), aggregated_data[:, 2].to(torch.float64), instance.matern_cov_anisotropy_v05)
    # return instance.vecchia_interpolation_1to6(params, instance.matern_cov_ani, 35)
    
# Compute the first derivative using torch.func.grad
grad_f = torch.autograd.grad(compute_loss(params), params)
print(f' the gradient: {grad_f}')

grad_function = torch.func.grad(compute_loss)
gradient = grad_function(params)
print(f' the gradient: {gradient}')

#[  0.9324, -43.9642, -35.9082,  59.9937, -17.1091, -76.0932,  -0.6668]
torch.autograd.gradcheck(compute_loss, params, atol=1e-9, rtol=1e-6)


input parameters: tensor([ 2.7345e+01,  4.9205e-01,  5.3007e-01,  4.8692e-03, -1.1557e-01,
         4.2872e-01,  4.4720e+00], dtype=torch.float64, requires_grad=True)
 the gradient: (tensor([   2.0183, -183.8441, -204.7172,    1.4672,  -70.0683,  225.2526,
          10.5151], dtype=torch.float64),)
 the gradient: tensor([   2.0183, -183.8441, -204.7172,    1.4672,  -70.0683,  225.2526,
          10.5151], dtype=torch.float64, grad_fn=<AddBackward0>)


True

## gradient(vecc) * hessian (full) * gradient (vecc)

In [3]:
# params = [24.42, 1.92, 1.92, 0.001, -0.045, 0.237, 3.34]

df =df_full_v15
# Convert parameters to a tensor with requires_grad=True
params = torch.tensor(df.iloc[0, :-1].values, dtype=torch.float64, requires_grad=True)

# params = [ 27.25, 2.18, 2.294, 4.099e-4, -0.07915, 0.0999, 3.65]   #200
# params = torch.tensor(params, dtype=torch.float64, requires_grad=True)
  

nheads =10
instance = kernels.vecchia_experiment(0.5, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)

cov_map =  instance.cov_structure_saver(params, instance.matern_cov_anisotropy_v15)
o1, o2 = instance.full_ghg_statistic(params,instance.full_likelihood,instance.matern_cov_anisotropy_v05, aggregated_data[:, :4],aggregated_data[:, 2])
print(o1, o2)



eigenvalues, eigenvectors = torch.linalg.eig(o1)

# Print the results
print("Eigenvalues:", eigenvalues)
print("Eigenvectors:", eigenvectors)



cond_number of hessian 4126.64438029086
tensor([[ 2.1724e-01, -9.4483e+00, -6.2820e+00,  5.3410e+00, -6.2668e+00,
          1.5015e+01, -1.9855e-01],
        [-9.4483e+00,  1.9296e+03,  3.1761e+02,  6.8744e+01, -1.1437e+02,
         -5.7432e+01, -8.0936e+00],
        [-6.2820e+00,  3.1761e+02,  2.2000e+03,  1.5277e+02,  4.5844e+02,
          4.0103e+01,  2.1065e+01],
        [ 5.3410e+00,  6.8744e+01,  1.5277e+02,  5.1892e+03, -2.4477e+02,
          5.7972e+02,  9.7752e+00],
        [-6.2668e+00, -1.1437e+02,  4.5844e+02, -2.4477e+02,  1.3294e+03,
         -2.2632e+02, -1.2681e+01],
        [ 1.5015e+01, -5.7432e+01,  4.0103e+01,  5.7972e+02, -2.2632e+02,
          7.0536e+01,  1.2236e+00],
        [-1.9855e-01, -8.0936e+00,  2.1065e+01,  9.7752e+00, -1.2681e+01,
          1.2236e+00, -6.8866e-01]], dtype=torch.float64,
       grad_fn=<ViewBackward0>) tensor(4126.6444, dtype=torch.float64, grad_fn=<SqueezeBackward1>)
Eigenvalues: tensor([ 5.2796e+03+0.j,  2.5089e+03+0.j,  1.8782e+03+0.

### I prefer looking at likelihoods only

In [None]:
nheads = 200
params = [ 27.25, 2.18, 2.294, 4.099e-4, -0.07915, 0.0999, 3.65]   #200
params = torch.tensor(params, dtype=torch.float64, requires_grad=True)

instance = kernels.vecchia_experiment(0.5, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)

mm_cond_number=10
fl= instance.full_likelihood(params, aggregated_data[:, :4],aggregated_data[:, 2], instance.matern_cov_anisotropy_v05)
print(fl)

ll = instance.vecchia_b2(params, instance.matern_cov_anisotropy_v05 )
print(f'mm_cond_number: {mm_cond_number} likelihood: {ll}')

ll2 = instance.vecchia_interpolation_1to6(params, instance.matern_cov_anisotropy_v05 )
print(f'mm_cond_number: {mm_cond_number} likelihood: {ll2}')

key_order = [0,1,2,4,3,5,7,6]
keys = list(analysis_data_map.keys())
reordered_dict = {keys[key]: analysis_data_map[keys[key]] for key in key_order}
instance = kernels.vecchia_experiment(0.5, reordered_dict, aggregated_data,nns_map,mm_cond_number, nheads)


ll = instance.vecchia_b2(params, instance.matern_cov_anisotropy_v05 )
print(f'mm_cond_number: {mm_cond_number} likelihood: {ll}')

### Now compare statistics

In [None]:
copy_analysis_map = copy.deepcopy(analysis_data_map)
key_order = [0,1,2,4,3,5,7,6]
keys = list(analysis_data_map.keys())
reordered_dict = {keys[key]: copy_analysis_map[keys[key]] for key in key_order}


params = [ 27.25, 2.18, 2.294, 4.099e-4, -0.07915, 0.0999, 3.65]   #200
params = torch.tensor(params, dtype=torch.float64, requires_grad=True)
nheads =200
instance = kernels.vecchia_experiment(0.5, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)
fl= instance.full_likelihood(params, aggregated_data[:, :4],aggregated_data[:, 2], instance.matern_cov_anisotropy_v05)
fs = instance.full_ghg_statistic(params,instance.full_likelihood, instance.matern_cov_anisotropy_v05, aggregated_data[:, :4],aggregated_data[:, 2])

print(fl.item())
print(fs.item())
  

cond_number of hessian 2816516.366634098
3940.2817598903225
-59.56466834318


In [None]:
lat_lon_resolution = [8,8]
params = [ 27.25, 2.18, 2.294, 4.099e-4, -0.07915, 0.0999, 3.65]   #200
params = torch.tensor(params, dtype=torch.float64, requires_grad=True)


for day in range(1,15):
    # day = 7
    mm_cond_number = 20

    years = ['2024']
    month_range =[7,8]
    idx_for_datamap= [ 8*(day-1),8*day]

    instance = load_data_local_computer()
    month_map, ord_mm, nns_map= instance.load_mm20k_data_bymonthyear( lat_lon_resolution= lat_lon_resolution, mm_cond_number=mm_cond_number,years_=years, months_=month_range)
    analysis_data_map, aggregated_data = instance.load_working_data_byday( month_map, ord_mm, nns_map, idx_for_datamap=idx_for_datamap)

    nheads = 20
    instance = kernels.vecchia_experiment(0.5, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)

    fl= instance.full_likelihood(params, aggregated_data[:, :4],aggregated_data[:, 2], instance.matern_cov_anisotropy_v05)
    
    fs = instance.full_ghg_statistic(params,instance.full_likelihood, instance.matern_cov_anisotropy_v05, aggregated_data[:, :4],aggregated_data[:, 2])

    print(fl.item())

    print(fs.item())

    o1= instance.vecc_ghg_statistic(params,instance.full_likelihood, instance.vecchia_b2,instance.matern_cov_anisotropy_v05, aggregated_data[:, :4],aggregated_data[:, 2])

    ll = instance.vecchia_b2(params, instance.matern_cov_anisotropy_v05 )
    print(f'vecchia_b2 mm_cond_number: {mm_cond_number} likelihood: {ll}, statistic:{o1}')

    o1= instance.vecc_ghg_statistic(params,instance.full_likelihood, instance.vecchia_interpolation_1to6,instance.matern_cov_anisotropy_v05, aggregated_data[:, :4],aggregated_data[:, 2])

    ll = instance.vecchia_interpolation_1to6(params, instance.matern_cov_anisotropy_v05 )
    print(f'vecchia_interpolation mm_cond_number: {mm_cond_number} likelihood: {ll}, statistic:{o1}')


    key_order = [0,1,2,4,3,5,7,6]
    keys = list(analysis_data_map.keys())
    reordered_dict = {keys[key]: analysis_data_map[keys[key]] for key in key_order}

    instance = kernels.vecchia_experiment(0.5, reordered_dict, aggregated_data,nns_map,mm_cond_number, nheads)

    o1= instance.vecc_ghg_statistic(params,instance.full_likelihood, instance.vecchia_b2, instance.matern_cov_anisotropy_v05, aggregated_data[:, :4],aggregated_data[:, 2])
    ll = instance.vecchia_b2(params, instance.matern_cov_anisotropy_v05 )
    print(f'vecchia_b2_reordered mm_cond_number: {mm_cond_number} likelihood: {ll}, statistic:{o1}')

    print( f'day {day} above')


cond_number of hessian 2816516.366634098
3940.2817598903225
-59.56466834318
vecchia_b2 mm_cond_number: 20 likelihood: 3975.3121026623235, statistic:-43.993833612555065
vecchia_interpolation mm_cond_number: 20 likelihood: 3898.6010621925916, statistic:-29.24946918728037
vecchia_b2_reordered mm_cond_number: 20 likelihood: 3972.5613997053033, statistic:-33.000113088118
day 1 above
cond_number of hessian 212182.97911473023
3663.07630645092
299.06449546243493
vecchia_b2 mm_cond_number: 20 likelihood: 3678.227239185976, statistic:306.403761341758
vecchia_interpolation mm_cond_number: 20 likelihood: 3593.9613722562485, statistic:346.83343565608084
vecchia_b2_reordered mm_cond_number: 20 likelihood: 3678.0105807485916, statistic:305.79937533188473
day 2 above
cond_number of hessian 183073.7950892095
4343.463121533729
83.91664846163812
vecchia_b2 mm_cond_number: 20 likelihood: 4367.746971329721, statistic:56.015318528607715
vecchia_interpolation mm_cond_number: 20 likelihood: 4295.084667816446,

vary the size of conditioning set

In [10]:
instance = kernels.vecchia_experiment(0.5, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)
fl= instance.full_likelihood(params, aggregated_data[:, :4],aggregated_data[:, 2], instance.matern_cov_anisotropy_v05)
fs = instance.full_ghg_statistic(params,instance.full_likelihood, instance.matern_cov_anisotropy_v05, aggregated_data[:, :4],aggregated_data[:, 2])

print(f'full likelihood: {fl}, full statistic: {fs}')
for i in range(5,30):
    mm_cond_number = i
    instance = kernels.vecchia_experiment(0.5, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)
    
    o1= instance.vecc_ghg_statistic(params,instance.full_likelihood, instance.vecchia_b2,instance.matern_cov_anisotropy_v05, aggregated_data[:, :4],aggregated_data[:, 2])
    ll = instance.vecchia_b2(params, instance.matern_cov_anisotropy_v05 )
    print(f'mm_cond_number: {i} likelihood: {ll}, statistic:{o1}')



cond_number of hessian 189194.83871267262
full likelihood: 2547.258276245673, full statistic: 5.100717330882949
cond_number of hessian 189194.83871267262
mm_cond_number: 5 likelihood: 2571.7202011676554, statistic:4.810536260238044
cond_number of hessian 189194.83871267262
mm_cond_number: 6 likelihood: 2569.9784416212933, statistic:4.106628202046433
cond_number of hessian 189194.83871267262
mm_cond_number: 7 likelihood: 2567.640680723155, statistic:4.602141260135241
cond_number of hessian 189194.83871267262
mm_cond_number: 8 likelihood: 2567.6782831405317, statistic:5.079131623977824
cond_number of hessian 189194.83871267262
mm_cond_number: 9 likelihood: 2568.0001926920804, statistic:5.343090709089482
cond_number of hessian 189194.83871267262
mm_cond_number: 10 likelihood: 2566.523313914881, statistic:5.518904441534436
cond_number of hessian 189194.83871267262
mm_cond_number: 11 likelihood: 2566.7857736093847, statistic:5.4051722056787765
cond_number of hessian 189194.83871267262
mm_co