In [2]:
import sys
import os
# when python interpreter is different, add path
gems_tco_path = "/Users/joonwonlee/Documents/GEMS_TCO-1/src"
sys.path.append(gems_tco_path)
import matplotlib.pyplot as plt

# Data manipulation and analysis
import pandas as pd
import numpy as np
import pickle 
from collections import defaultdict

from pathlib import Path
import time
import json
from json import JSONEncoder

# Special functions and optimizations
from typing import Callable, Union, Tuple
from scipy.spatial.distance import cdist  # For space and time distance
from scipy.special import gamma, kv  # Bessel function and gamma function
from scipy.interpolate import splrep, splev

import torch
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from torchcubicspline import natural_cubic_spline_coeffs, NaturalCubicSpline

import GEMS_TCO
from GEMS_TCO import kernels 
from GEMS_TCO import orderings as _orderings
from GEMS_TCO import load_data

from GEMS_TCO import configuration as config


In [2]:
# conda activate faiss_env

!/opt/anaconda3/envs/faiss_env/bin/python /Users/joonwonlee/Documents/GEMS_TCO-1/src/GEMS_TCO/mymac_config.py --space "20,20" --days "0,31"



In [3]:
lat_lon_resolution = [3,3]
years = ['2024']
month_range =[7,8]
nheads = 200
mm_cond_number = 10 
v= 1.0

data_load_instance = load_data(config.mac_data_load_path)
df = data_load_instance.read_pickle(config.mac_estimates_day_path,config.mac_full_day_v05_pickle)
map, ord_mm, nns_map= data_load_instance.load_mm20k_data_bymonthyear( lat_lon_resolution= lat_lon_resolution, mm_cond_number=mm_cond_number,years_=years, months_=month_range)

df.head()

for day in range(1,2):
    params = list(df.iloc[day-1][:-1])
    params = torch.tensor(params, dtype=torch.float64, requires_grad=True)
    print(f'2024-07-{day+1}, data size per day: { (200/lat_lon_resolution[0])*(100/lat_lon_resolution[0]) }, smooth: {v}')
    print(f'mm_cond_number: {mm_cond_number},\ninitial parameters: \n {params}')
               
   
    idx_for_datamap= [ 8*(day),8*(day+1)]
    analysis_data_map, aggregated_data = data_load_instance.load_working_data_byday( map, ord_mm, nns_map, idx_for_datamap= idx_for_datamap)

2024-07-2, data size per day: 2222.2222222222226, smooth: 1.0
mm_cond_number: 10,
initial parameters: 
 tensor([ 2.4793e+01,  1.5845e+00,  1.7182e+00,  9.0885e-03, -1.0730e-01,
         1.3104e-01,  2.7172e+00], dtype=torch.float64, requires_grad=True)


likelihood 

In [100]:
coarse_factor = 100
spline_instance = kernels.spline(epsilon = 1e-17, params=params, coarse_factor=coarse_factor, k=3, smooth = 0.5, input_map= analysis_data_map, aggregated_data= aggregated_data, nns_map=nns_map, mm_cond_number=10)
distances, non_zero_indices = spline_instance.precompute_coords_anisotropy(params, spline_instance.aggregated_data, spline_instance.aggregated_data)
# flat_distances = distances.flatten()
# spline_instance.max_distance = torch.max(distances).clone().detach()
# spline_instance.max_distance_len = len(flat_distances)
# spline_instance.spline_object = spline_instance.fit_cubic_spline(params)


full_ll = spline_instance.full_likelihood_using_spline( params, distances)
full_ll

instance_2 = kernels.vecchia_experiment(1.0, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)
excat_ll = instance_2.full_likelihood(params, aggregated_data[:,:4], aggregated_data[:,2], instance_2.matern_cov_anisotropy_v05)
# 12663.4804
print(excat_ll, full_ll)

: 

# debug error when high resolution cov_1d returns nans

resolution 3,3
10,000:   total diff   1.66       5.01e-9
100,000                868        2.6e-6

resolution 4,4  (160000**2/(10000**2)  1/256 from original)
#coarse factor 5 error coarse factor 10 okay
coarse_factor 100 took 18 sec       sum diff 0.167   1.67e-9
coarse_factor 1000 okay difference elementwise ( sum diff 0.2831, 2.83e-9 )
coarse_factor 10,000        sum difference 1.45 (   1.45/10000**2= 1.5e-8  )

resolution 6,6
100:     sum:0.028  1.315e-9
1000:   sum: 0.0314   1.47e-9
10000:  sum: -124    5.82e-6

resolution 10,10

coarse_factor 100     sum diff 0.02     8.5e-9
coarse_factor 1000    sum diff  -13.8154   -5.39 e-6

coarse_factor 10,000  sum diff 3793
coarse_factor 100,000 began to show difference at 10-4

resolution 20,20
coarse_facttor 100    sum diff 5.729   3.57e-5
coarse_factor 1000    sum diff  200

In [9]:
coarse_factor = 1000
spline_instance = kernels.spline(epsilon = 1e-17, params=params, coarse_factor=coarse_factor, k=3, smooth = 1.0, input_map= analysis_data_map, aggregated_data= aggregated_data, nns_map=nns_map, mm_cond_number=10)
distances, non_zero_indices = spline_instance.precompute_coords_anisotropy(params, spline_instance.aggregated_data, spline_instance.aggregated_data)

In [10]:
cov_1d =spline_instance.spline_object.evaluate(distances)
sigmasq, _, _, _, _, _, nugget = params
cov_matrix = cov_1d.reshape(distances.shape)
cov_matrix = cov_matrix * sigmasq
cov_matrix = cov_matrix + torch.eye(cov_matrix.shape[0], dtype=torch.float64) * nugget 
cov_matrix

: 

In [6]:
instance_2 = kernels.vecchia_experiment(1.0, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)
out = instance_2.matern_cov_anisotropy_kv(params, instance_2.aggregated_data, instance_2.aggregated_data)
out

tensor([[27.5107,  2.3007,  2.3007,  ...,  4.7549,  8.3482,  5.4595],
        [ 2.3007, 27.5107,  0.2492,  ...,  3.6172,  2.6496,  9.8358],
        [ 2.3007,  0.2492, 27.5107,  ...,  0.2775,  0.5529,  0.6937],
        ...,
        [ 4.7549,  3.6172,  0.2775,  ..., 27.5107, 16.2836,  5.7856],
        [ 8.3482,  2.6496,  0.5529,  ..., 16.2836, 27.5107,  5.7856],
        [ 5.4595,  9.8358,  0.6937,  ...,  5.7856,  5.7856, 27.5107]],
       dtype=torch.float64, grad_fn=<AddBackward0>)

In [49]:
cov_matrix.shape

torch.Size([18224, 18224])

In [7]:
torch.sum ( cov_matrix-out )


tensor(863.4873, dtype=torch.float64, grad_fn=<SumBackward0>)

In [8]:
torch.sum(cov_matrix-out)/ cov_matrix.shape[0]**2

tensor(2.6000e-06, dtype=torch.float64, grad_fn=<DivBackward0>)

optimization

In [None]:
# spline_instance = kernels.spline(epsilon = 1e-17, params=params, coarse_factor=5, k=3, smooth = 0.5, input_map= analysis_data_map, aggregated_data= aggregated_data, nns_map=nns_map, mm_cond_number=10)

print(params)
# spline_instance = kernels.spline(epsilon = 1e-17, coarse_factor=5, k=3, smooth = 0.5, input_map= analysis_data_map, aggregated_data= aggregated_data, nns_map=nns_map, mm_cond_number=10)
# optimizer, scheduler =  instance.optimizer_fun(params, lr= 0.01 , betas=(0.9, 0.99), eps=1e-8, step_size= 5, gamma=0.1)    
optimizer, scheduler = spline_instance.optimizer_fun(params, lr=0.02, betas=(0.9, 0.99), eps=1e-8, step_size=100, gamma=0.2)  
out, epoch = spline_instance.run_full(params, optimizer,scheduler, epochs=1500)


# Saved files below

In [9]:
class spline:
    def __init__(self, epsilon, coarse_factor, k, smooth):
        self.smooth = torch.tensor(smooth, dtype= torch.float64)
        self.k = k
        self.coarse_factor = coarse_factor
        self.epsilon = epsilon

    def compute_cov(self, params) :
         # fit_distances and flat_distances both 1d
        sigmasq, range_lat, range_lon, advec_lat, advec_lon, beta, nugget = params
        distances, non_zero_indices = instance_2.precompute_coords_anisotropy(params, aggregated_data[:,:4],aggregated_data[:,:4])
        
        flat_distances = distances.flatten()
        fit_distances = torch.linspace(self.epsilon, torch.max(flat_distances), len(flat_distances) // self.coarse_factor)

        # fit_distances = torch.zeros_like(distances)
        # print(fit_distances.shape)
        # Compute the covariance for non-zero distances
        non_zero_indices = fit_distances != 0
        out = torch.zeros_like(fit_distances, dtype= torch.float64)

        if torch.any(non_zero_indices):
            tmp = kv(self.smooth, torch.sqrt(fit_distances[non_zero_indices])).double().clone()
            out[non_zero_indices] = (sigmasq * (2**(1-self.smooth)) / gamma(self.smooth) *
                                    (torch.sqrt(fit_distances[non_zero_indices]) ) ** self.smooth *
                                    tmp)
        out[~non_zero_indices] = sigmasq

        # print(out.shape)
        #         
        # Compute spline coefficients
        coeffs = natural_cubic_spline_coeffs(fit_distances, out.unsqueeze(1))

        # Create spline object
        spline = NaturalCubicSpline(coeffs)
        # Interpolate using the spline
        out = spline.evaluate(distances)
        out = out.reshape(distances.shape)
        out += torch.eye(out.shape[0], dtype=torch.float64) * nugget 
        return out
     
    def full_likelihood(self,params: torch.Tensor, input_np: torch.Tensor, y: torch.Tensor, cov_matrix) -> torch.Tensor:
        input_arr = input_np[:, :4]  ## input_np is aggregated data over a day.
        y_arr = y

        # Compute the covariance matrix
        # cov_matrix = covariance_function(params=params, y=input_arr, x=input_arr)
        
        # Compute the log determinant of the covariance matrix
        sign, log_det = torch.slogdet(cov_matrix)
        # if sign <= 0:
        #     raise ValueError("Covariance matrix is not positive definite")
        
        # Extract locations
        locs = input_arr[:, :2]

        # Compute beta
        tmp1 = torch.matmul(locs.T, torch.linalg.solve(cov_matrix, locs))
        tmp2 = torch.matmul(locs.T, torch.linalg.solve(cov_matrix, y_arr))
        beta = torch.linalg.solve(tmp1, tmp2)

        # Compute the mean
        mu = torch.matmul(locs, beta)
        y_mu = y_arr - mu

        # Compute the quadratic form
        quad_form = torch.matmul(y_mu, torch.linalg.solve(cov_matrix, y_mu))

        # Compute the negative log likelihood
        neg_log_lik = 0.5 * (log_det + quad_form)
     
        return  neg_log_lik
    
    def compute_full_nll(self, params, covariance_function):
        cov_mat = covariance_function(params) 
        nll = self.full_likelihood( params,aggregated_data[:,:4], aggregated_data[:,2], cov_mat)
        return nll

    def optimizer_fun(self, params, lr=0.01, betas=(0.9, 0.8), eps=1e-8, step_size=40, gamma=0.5):
        optimizer = torch.optim.Adam([params], lr=lr, betas=betas, eps=eps)
        scheduler = StepLR(optimizer, step_size=step_size, gamma=gamma)  # Decrease LR by a factor of 0.1 every 10 epochs
        return optimizer, scheduler

   # use adpating lr
    def run_full(self, params, optimizer, scheduler,  covariance_function, epochs=10 ):
        prev_loss= float('inf')

        tol = 1e-4  # Convergence tolerance
        for epoch in range(epochs):  # Number of epochs
            optimizer.zero_grad()  # Zero the gradients 
            
            loss = self.compute_full_nll(params, covariance_function)
            loss.backward()  # Backpropagate the loss
            
            # Print gradients and parameters every 10th epoch
            if epoch % 10 == 0:
                print(f'Epoch {epoch+1}, Gradients: {params.grad.numpy()}\n Loss: {loss.item()}, Parameters: {params.detach().numpy()}')
            
            # if epoch % 500 == 0:
            #     print(f'Epoch {epoch+1}, Gradients: {params.grad.numpy()}\n Loss: {loss.item()}, Parameters: {params.detach().numpy()}')
            
            optimizer.step()  # Update the parameters
            scheduler.step()  # Update the learning rate
            # Check for convergence
            if abs(prev_loss - loss.item()) < tol:
                print(f"Converged at epoch {epoch}")
                print(f'Epoch {epoch+1}, : Loss: {loss.item()}, \n vecc Parameters: {params.detach().numpy()}')
                break

            prev_loss = loss.item()
        print(f'FINAL STATE: Epoch {epoch+1}, Loss: {loss.item()}, \n vecc Parameters: {params.detach().numpy()}')
        return params.detach().numpy().tolist() + [ loss.item()], epoch



test

# Train a model

In [None]:
print(params)

instance_2 = kernels.vecchia_experiment(1.0, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)
instance = spline( epsilon = 1e-8, coarse_factor = 4, k=3, smooth= 0.5)
# optimizer, scheduler =  instance.optimizer_fun(params, lr= 0.01 , betas=(0.9, 0.99), eps=1e-8, step_size= 5, gamma=0.1)    
optimizer, scheduler = instance.optimizer_fun(params, lr=0.03, betas=(0.9, 0.99), eps=1e-8, step_size=100, gamma=0.9)  
out, epoch = instance.run_full(params, optimizer,scheduler, instance.compute_cov, epochs=100)


In [16]:
out1 = splinenn.evaluate(distances)
out1 = out1.reshape(distances.shape)

distances

tensor([[0.0000, 2.8198, 2.0805,  ..., 1.4813, 0.5896, 1.7424],
        [2.8198, 0.0000, 9.7185,  ..., 3.6351, 3.4548, 4.7177],
        [2.0805, 9.7185, 0.0000,  ..., 4.4068, 2.6395, 4.0108],
        ...,
        [1.4813, 3.6351, 4.4068,  ..., 0.0000, 0.8485, 0.0821],
        [0.5896, 3.4548, 2.6395,  ..., 0.8485, 0.0000, 1.0949],
        [1.7424, 4.7177, 4.0108,  ..., 0.0821, 1.0949, 0.0000]],
       dtype=torch.float64, grad_fn=<AddBackward0>)

In [17]:
smooth = 0.5

instance_2 = kernels.vecchia_experiment(smooth, analysis_data_map, aggregated_data,nns_map,mm_cond_number, nheads)
instance = spline( epsilon = 1e-15, coarse_factor = 2, k=3, smooth= smooth)

distances, non_zero_indices = instance_2.precompute_coords_anisotropy(params, aggregated_data[:,:4],aggregated_data[:,:4])

flat_distances = distances.flatten()
sigmasq, range_lat, range_lon, advec_lat, advec_lon, beta, nugget = params
epsilon = 1e-8
coarse_factor = 4

fit_distances = torch.linspace(epsilon, torch.max(flat_distances), len(flat_distances) // coarse_factor)
print(fit_distances.shape)
# Compute the covariance for non-zero distances
non_zero_indices = fit_distances != 0
out = torch.zeros_like(fit_distances, dtype= torch.float64)

if torch.any(non_zero_indices):
    tmp = kv(smooth, torch.sqrt(fit_distances[non_zero_indices])).double().clone()
    out[non_zero_indices] = (sigmasq * (2**(1-smooth)) / gamma(smooth) *
                            (torch.sqrt(fit_distances[non_zero_indices]) ) ** smooth *
                            tmp)
    
out[~non_zero_indices] = sigmasq

print(out.shape)

# Compute spline coefficients
coeffs = natural_cubic_spline_coeffs(fit_distances, out.unsqueeze(1))

# Create spline object
splinenn = NaturalCubicSpline(coeffs)

# Interpolate using the spline
out1 = splinenn.evaluate(distances)
out1 = out1.reshape(distances.shape)
out1 += torch.eye(out1.shape[0], dtype=torch.float64) * nugget 

print(out1)
out2 = instance_2.matern_cov_anisotropy_kv(params, aggregated_data[:,:4],aggregated_data[:,:4])


print(out2)
instance.full_likelihood( params,aggregated_data[:,:4], aggregated_data[:,2], out1)


torch.Size([40000])
torch.Size([40000])
tensor([[24.8247,  4.3205,  5.4749,  ...,  6.8587, 10.7482,  6.1878],
        [ 4.3205, 24.8247,  1.0255,  ...,  3.4418,  3.6106,  2.6395],
        [ 5.4749,  1.0255, 24.8247,  ...,  2.8387,  4.5628,  3.1264],
        ...,
        [ 6.8587,  3.4418,  2.8387,  ..., 24.8247,  9.2207, 17.3917],
        [10.7482,  3.6106,  4.5628,  ...,  9.2207, 24.8247,  8.1353],
        [ 6.1878,  2.6395,  3.1264,  ..., 17.3917,  8.1353, 24.8247]],
       dtype=torch.float64, grad_fn=<AsStridedBackward0>)
tensor([[24.8270,  4.3205,  5.4749,  ...,  6.8587, 10.7482,  6.1878],
        [ 4.3205, 24.8270,  1.0255,  ...,  3.4418,  3.6106,  2.6395],
        [ 5.4749,  1.0255, 24.8270,  ...,  2.8387,  4.5628,  3.1264],
        ...,
        [ 6.8587,  3.4418,  2.8387,  ..., 24.8270,  9.2207, 17.3917],
        [10.7482,  3.6106,  4.5628,  ...,  9.2207, 24.8270,  8.1353],
        [ 6.1878,  2.6395,  3.1264,  ..., 17.3917,  8.1353, 24.8270]],
       dtype=torch.float64, grad_f

tensor(601.6160, dtype=torch.float64, grad_fn=<MulBackward0>)