In [1]:
# Standard libraries
import sys
# Add your custom path
gems_tco_path = "/Users/joonwonlee/Documents/GEMS_TCO-1/src"
sys.path.append(gems_tco_path)
import logging
import argparse # Argument parsing

# Data manipulation and analysis
import pandas as pd
import numpy as np
import pickle
import torch
import torch.optim as optim
import copy                    # clone tensor
import time

# Custom imports
import GEMS_TCO
from GEMS_TCO import kernels

from GEMS_TCO import kernels 
from GEMS_TCO import orderings as _orderings 
from GEMS_TCO import load_data
from GEMS_TCO import alg_optimization, alg_opt_Encoder
from GEMS_TCO import configuration as config

from typing import Optional, List, Tuple
from pathlib import Path
import typer
import json
from json import JSONEncoder

from GEMS_TCO import configuration as config
from GEMS_TCO import data_preprocess as dmbh

import os
from sklearn.neighbors import BallTree


import time



SyntaxError: invalid decimal literal (__init__.py, line 25)

### Full Lieklihood using raw data

In [6]:
import pickle
import os
# Assume your 'config' object is available
# import config
from GEMS_TCO import kernels 
from GEMS_TCO import orderings as _orderings 
from GEMS_TCO import load_data
from GEMS_TCO import alg_optimization, alg_opt_Encoder
from GEMS_TCO import configuration as config

# --- 1. Configuration ---
# Specify the year and month you want to load
YEAR_TO_LOAD = 2024
MONTH_TO_LOAD = 7

# Use the same base path as your saving script
BASE_PATH = config.mac_data_load_path

# --- 2. Construct the File Path ---
# This must exactly match the naming convention from your saving script
month_str = f"{MONTH_TO_LOAD:02d}"
pickle_path = os.path.join(BASE_PATH, f'pickle_{YEAR_TO_LOAD}')
filename = f"coarse_cen_map_without_decrement_latitude{str(YEAR_TO_LOAD)[2:]}_{month_str}.pkl"
filepath_to_load = os.path.join(pickle_path, filename)

print(f"Attempting to load data from: {filepath_to_load}")

# --- 3. Load the Data ---
try:
    with open(filepath_to_load, 'rb') as pickle_file:
        # Use pickle.load() to read the data from the file
        loaded_coarse_map = pickle.load(pickle_file)
    
    print("\nData loaded successfully! ✅")
    
    # --- 4. Verify the Loaded Data ---
    # The loaded data is a dictionary. Let's inspect it.
    print(f"Type of loaded data: {type(loaded_coarse_map)}")
    if isinstance(loaded_coarse_map, dict):
        print(f"Number of entries (hours) in the map: {len(loaded_coarse_map)}")
        # Print the first 5 keys to see what they look like
        first_five_keys = list(loaded_coarse_map.keys())[:5]
        print(f"Example keys: {first_five_keys}")
        
        # You can now access the data for a specific hour, for example:
        # first_hour_data = loaded_coarse_map[first_five_keys[0]]
        # print(f"\nData for first hour is a tensor of shape: {first_hour_data.shape}")

except FileNotFoundError:
    print(f"\nError: File not found. Please check if the file exists at the specified path.")
except Exception as e:
    print(f"\nAn error occurred: {e}")


print(loaded_coarse_map['y24m07day01_hm00:53']['Longitude'].nunique())
print(loaded_coarse_map['y24m07day01_hm00:53']['Latitude'].nunique())

import GEMS_TCO
load_data_instance = GEMS_TCO.load_data('')

df_day_aggregated_list = []
df_day_map_list = []
for i in range(31):
    cur_map, cur_df =load_data_instance.load_working_data_byday_wo_mm(loaded_coarse_map,[i*8, (i+1)*8])
    df_day_aggregated_list.append( cur_df )
    df_day_map_list.append( cur_map )

Attempting to load data from: /Users/joonwonlee/Documents/GEMS_DATA/pickle_2024/coarse_cen_map_without_decrement_latitude24_07.pkl

Data loaded successfully! ✅
Type of loaded data: <class 'dict'>
Number of entries (hours) in the map: 248
Example keys: ['y24m07day01_hm00:53', 'y24m07day01_hm01:53', 'y24m07day01_hm02:53', 'y24m07day01_hm03:53', 'y24m07day01_hm04:49']
270
273


In [7]:
import torch
from typing import Callable

# =========================================================================
# 1. Helper Function for Subsetting
# =========================================================================

def subset_by_area(input_tensor: torch.Tensor) -> torch.Tensor:
    """
    Subsets a tensor to the specified lat/lon area.
    Latitude between 0 and 5.
    Longitude between 123 and 133.
    """
    # Assumes columns are [lat, lon, value, time]
    lat_col, lon_col = 0, 1
    lat_mask = (input_tensor[:, lat_col] >= 0) & (input_tensor[:, lat_col] <= 5)
    lon_mask = (input_tensor[:, lon_col] >= 123) & (input_tensor[:, lon_col] <= 133)
    
    df_sub = input_tensor[lat_mask & lon_mask].clone()
    return df_sub

a = df_day_aggregated_list[0].shape[0]/8
hour_n = 2
print(f'will look at {hour_n} hours of data ')
a = int(a*hour_n)
# Use the first tensor from your data list
raw_data = df_day_aggregated_list[0][:a].to(torch.float64) # Ensure data is float64 for precision

# --- 2. Subset the data to the desired area ---
print(f"Original data shape: {raw_data.shape}")
subset_data = subset_by_area(raw_data)
print(f"Subset data shape:   {subset_data.shape}")

will look at 2 hours of data 
Original data shape: torch.Size([147420, 4])
Subset data shape:   torch.Size([36252, 4])


In [8]:
# =========================================================================
# 2. Covariance and Likelihood Functions
# =========================================================================

def custom_distance_matrix(U: torch.Tensor, V: torch.Tensor) -> torch.Tensor:
    """Efficient distance computation with broadcasting."""
    spatial_diff = torch.norm(U[:, :2].unsqueeze(1) - V[:, :2].unsqueeze(0), dim=2)
    temporal_diff = torch.abs(U[:, 2].unsqueeze(1) - V[:, 2].unsqueeze(0))
    distance = (spatial_diff**2 + temporal_diff**2)
    return distance

def precompute_coords_anisotropy(params: torch.Tensor, y_data: torch.Tensor, x_data: torch.Tensor) -> torch.Tensor:
    """Pre-computes transformed coordinates and the distance matrix."""
    sigmasq, range_lat, range_lon, advec_lat, advec_lon, beta, nugget = params

    if y_data is None or x_data is None:
        raise ValueError("Both y_data and x_data must be provided.")

    # Assumes columns are [lat, lon, value, time]
    x1, y1, t1 = x_data[:, 0], x_data[:, 1], x_data[:, 3]
    x2, y2, t2 = y_data[:, 0], y_data[:, 1], y_data[:, 3]

    spat_coord1 = torch.stack(((x1 - advec_lat * t1) / range_lat, (y1 - advec_lon * t1) / range_lon), dim=-1)
    spat_coord2 = torch.stack(((x2 - advec_lat * t2) / range_lat, (y2 - advec_lon * t2) / range_lon), dim=-1)

    U = torch.cat((spat_coord1, (beta * t1).reshape(-1, 1)), dim=1)
    V = torch.cat((spat_coord2, (beta * t2).reshape(-1, 1)), dim=1)

    distance = custom_distance_matrix(U, V)
    return distance

def matern_cov_anisotropy_v05(params: torch.Tensor, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
    """Computes the Matérn covariance with v=0.5 (exponential)."""
    sigmasq, range_lat, range_lon, advec_lat, advec_lon, beta, nugget = params
    
    distance = precompute_coords_anisotropy(params, x, y)
    out = torch.zeros_like(distance)

    non_zero_indices = distance != 0
    if torch.any(non_zero_indices):
        out[non_zero_indices] = sigmasq * torch.exp(-torch.sqrt(distance[non_zero_indices]))
    out[~non_zero_indices] = sigmasq

    # Add nugget/jitter only to the diagonal of the main covariance matrix
    if torch.equal(x, y):
        out += torch.eye(out.shape[0], dtype=out.dtype) * nugget 
    return out
           
def full_likelihood(params: torch.Tensor, input_data: torch.Tensor, response: torch.Tensor, covariance_function: Callable) -> torch.Tensor:
    """Calculates the full Gaussian negative log-likelihood."""
    cov_matrix = covariance_function(params=params, y=input_data, x=input_data)
    sign, log_det = torch.slogdet(cov_matrix)

    # The design matrix 'X' in GLM, here just the spatial locations
    locs = input_data[:, :2]

    # Compute beta (trend coefficients)
    tmp1 = torch.matmul(locs.T, torch.linalg.solve(cov_matrix, locs))
    tmp2 = torch.matmul(locs.T, torch.linalg.solve(cov_matrix, response))
    beta_coeffs = torch.linalg.solve(tmp1, tmp2)

    # Compute the mean and residuals
    mu = torch.matmul(locs, beta_coeffs)
    y_mu = response - mu

    # Compute the quadratic form
    quad_form = torch.matmul(y_mu, torch.linalg.solve(cov_matrix, y_mu))

    # Compute the negative log likelihood
    neg_log_lik = 0.5 * (log_det + quad_form)
    return neg_log_lik 



In [None]:
# =========================================================================
# 3. Main Execution Block
# =========================================================================
if __name__ == '__main__':
    # --- 1. Define your parameters and load your data ---
    
    # Example parameters (on their natural scale)
    #a = [25.0, 3.0, 4.0, 0.02, -0.08, 0.02, 3.01] # 24125. 50580
    #a = [20.89, 1.04, 1.337, 0.040, -0.178, 0.195, 4.498] # 47385
    a = [28.6847, 0.9147, 5.0289, 0.1551, 0.6344, 0.0, 4.1061] # 24608. 62513
    a = [12.9046, 6.2184, 4.3523, -0.0724, -0.2941, -0.262, 5.4445]
    a = [5.3553, 2.9051, 3.4829, -0.4144, -0.2813, -0.2952, 4.2593]
    
    a = [4.4833, 3.1014, 3.7344, -0.6867, 0.08, 0.2891, 4.8089]
    #a = [19.89, 1.04, 1.337, 0.04, -0.178, 0.195, 4.498]
    a = [24.7512, 1.0101, 1.5151, -0.03, -0.06, 0.03, 2.0201] # large data
    a = [24.7512, 1.0101, 1.5151, -0.03, -0.06, 0.03, 2.0201] # small data
    a = [9.442, 2.5688, 3.8532, -0.0, -0.0012, 0.0, 0.7356] # inde 3 components
    a = [48.9713, 0.4105, 0.6803, 0.0, -0.49, 0.0, 2.0]
    a = [300.6555, 0.4139, 0.4649, -1.2799, -0.1826, 1.0906, 2.0] #v1.5
    a = [34.1987, 0.6579, 1.9012, 0.002, 0.2631, 0.0003, 1.6811]
    a = [23.9528, 1.0246, 1.73, 0.244, 0.1445, 0.0, 1.7276]
    a = [30.2594, 0.665, 1.8981, 0.0, 0.1317, -0.0, 1.9785]
    a = [45.1402, 0.6299, 0.7308, -0.0003, -0.0151, 0.0, 7.8922]
    a = [24.214204843325692, 0.7605639325477755, 1.3559509025375993, 0.04455842873488032, -0.14632970126172773, 0.16969297486942467, 3.8711802967371067]
    # above 23536
    a = [33.5371, 0.5245, 2.3767, -0.0, 0.2104, -0.0, 1.638] # 24650 78378

    a = [22.892229690601923, 0.798430619739265, 1.9505592698122576, 5.977196685598199e-06, 4.443502918276306e-06, 0.0, 1.8082124041763596]
    # above 24129
    a =[ 29.7507942474575, 0.9895617352853928, 1.0668105024738395, 0.03646879682204495, -0.15565418931819383, 0.17944566056749772, 1.8907510307884106]
    # above 23051. from 5000 vecchia    # 2 45718

    a = [29.89872140748479, 1.1529714369768411, 1.7862356661866714, 0.03927727761515986, -0.15656505052873793, 0.1320573870050866, 4.130349222670235]
    # above 23583  from 1250 vecchia
    
    #a = [29.776, 0.6619, 0.7052, 0.0, 0.0, 0.0, 1.3839]. # 22965. 
    params = torch.tensor(a, dtype=torch.float64)

    # ⚠️ ASSUMPTION: 'df_day_aggregated_list' is loaded and available here.
    # For example:
    # with open("path_to_your_data.pkl", 'rb') as f:
    #     df_day_aggregated_list = pickle.load(f)
    
    a = df_day_aggregated_list[0].shape[0]/8
    hour_n = 1
    a = int(a*hour_n)
    # Use the first tensor from your data list
    raw_data = df_day_aggregated_list[0][:a].to(torch.float64) # Ensure data is float64 for precision

    # --- 2. Subset the data to the desired area ---
    print(f"Original data shape: {raw_data.shape}")
    subset_data = subset_by_area(raw_data)
    print(f"Subset data shape:   {subset_data.shape}")

    # --- 3. Calculate the full likelihood on the subset ---
    if subset_data.shape[0] > 0:
        # The 'response' is the ozone column (index 2) of the subsetted data
        response_y = subset_data[:, 2]

        neg_log_lik_result = full_likelihood(
            params=params, 
            input_data=subset_data, 
            response=response_y, 
            covariance_function=matern_cov_anisotropy_v05
        )

        print(f"\nCalculated Negative Log Likelihood: {neg_log_lik_result.item():.4f}")
    else:
        print("\nNo data points found in the specified area.")

# 24125

Original data shape: torch.Size([73710, 4])
Subset data shape:   torch.Size([18126, 4])

Calculated Negative Log Likelihood: 23583.9029


In [46]:
print(f'Variance of data {torch.var(df_day_aggregated_list[0][:,2])}')
print(f'Estimated SQ + Nugget = {20.89+4.498}')



print(f'\n\nEast to West wind speed { round(1.337*0.178*111/3600 *1000,4)} (m/s) ')
print(f'North to South Wind speed  { round(1.04*0.04*111/3600*1000,4)} (m/s)')

print(f'\n\nEast to West wind speed  { round(0.072*0.0151*111/3600*1000,4)} (m/s)')

Variance of data 31.390108108520508
Estimated SQ + Nugget = 25.388


East to West wind speed 7.3379 (m/s) 
North to South Wind speed  1.2827 (m/s)


East to West wind speed  0.0335 (m/s)


In [1]:
2.43*-0.155988*111/3600*1000

-11.6874009

# Full Likelihood

Vecchia Parameters Used (raw data without differencing, but demeaned, regulargrid by center matching) 
```[21.303, 1.307, 1.563, 0.022, -0.144, 0.198, 4.769]``` 
### 1 hour of data 18126x1
Calculated Negative Log Likelihood: ```23849.8334``
if 21.303 --> 20.303:
Calculated Negative Log Likelihood: ```23857.1187```

### 2 hours of data 18126x2
Calculated Negative Log Likelihood: ```47732.5073```.  ## 47732 ~= 2*238550, so it scales well


Whittle Parameters Used (Once differenced in space [[-2 1][1 0 ]]) + hamming tapering 
```[31.2594, 0.665, 1.8981, 0.0, 0.1317, -0.0, 1.9785]```

### 1 hour of data 18126x1
Calculated Negative Log Likelihood: ```23642```
if 31.2594 --> 30.2594:
Calculated Negative Log Likelihood: ```23669```

### 2 hours of data 18126x2
Calculated Negative Log Likelihood: ```70664``` ## 70664> 23642*2, does not scale well


Whittle Parameters Used (Once differenced in space [[-2 1][1 0 ]]) + once difference in time (two stage differencing)
```[45.1402, 0.6299, 0.7308, -0.0003, -0.0151, 0.0, 7.8922]```

### 1 hour of data 18126x1
Calculated Negative Log Likelihood: ```26766```
if 45.7499-->44.7499:
Calculated Negative Log Likelihood: ```26726``` ## nll actually dereased? when the variance parameter is 1 unit away from local optimal

### 2 hours of data 18126x2
Calculated Negative Log Likelihood: ```56130``` ## difference in time seems to help capturing temporal structure



# Whittle Likelihood on 8 hours of data

-> Whittle Likelihood ('Vecchia Optimized' Params): 
```[21.303, 1.307, 1.563, 0.022, -0.144, 0.198, 4.769]```, negative log likelihood:   ```58326.332```
if 21.303 --> 20.303    ```57998.410```

-> Whittle Likelihood ('Whittle Optimized' Params): 
```[31.2594, 0.665, 1.8981, 0.0, 0.1317, -0.0, 1.9785]```  , negative log likelihood:  ```41604.566```
if 31.2594 --> 30.2594    ```41623.910```


# Vecchia Likelihood on 8 hours of data

-> Vecchia Likelihood ('Vecchia Optimized' Params): 
```[21.303, 1.307, 1.563, 0.022, -0.144, 0.198, 4.769]```, negative log likelihood:   ```64351```
if 21.303 --> 20.303    ```64384```

-> Vecchia Likelihood ('Whittle Optimized' Params): 
```[31.2594, 0.665, 1.8981, 0.0, 0.1317, -0.0, 1.9785]```  , negative log likelihood:  ```71865```
if 31.2594 --> 30.2594    ```72132```






### Full likelihood using spatially differenced data

In [6]:


import torch
import numpy as np
import torch.nn.functional as F
import os
import pickle

# Assume GEMS_TCO is a custom class/module you have available
# from your_project import GEMS_TCO

# =========================================================================
# 1. Helper Functions
# =========================================================================

def subset_tensor(df_tensor: torch.Tensor) -> torch.Tensor:
    """Subsets a tensor to a specific lat/lon range."""
    #lat_mask = (df_tensor[:, 0] >= -5) & (df_tensor[:, 0] <= 6.3)
    #lon_mask = (df_tensor[:, 1] >= 118) & (df_tensor[:, 1] <= 134.2)
    lat_mask = (df_tensor[:, 0] >= 0) & (df_tensor[:, 0] <= 5)
    lon_mask = (df_tensor[:, 1] >= 123) & (df_tensor[:, 1] <= 133)

    df_sub = df_tensor[lat_mask & lon_mask].clone()
    return df_sub

def apply_first_difference_2d_tensor(df_tensor: torch.Tensor) -> torch.Tensor:
    """
    Applies a 2D first-order difference filter using convolution.
    This approximates Z(s) = [X(s+d_lat) - X(s)] + [X(s+d_lon) - X(s)].
    """
    if df_tensor.size(0) == 0:
        return torch.empty(0, 4)

    # 1. Get grid dimensions and validate
    unique_lats = torch.unique(df_tensor[:, 0])
    unique_lons = torch.unique(df_tensor[:, 1])
    lat_count, lon_count = unique_lats.size(0), unique_lons.size(0)

    if df_tensor.size(0) != lat_count * lon_count:
        raise ValueError("Tensor size does not match grid dimensions. Must be a complete grid.")
    if lat_count < 2 or lon_count < 2:
        return torch.empty(0, 4)

    # 2. Reshape data and define the correct kernel
    ozone_data = df_tensor[:, 2].reshape(1, 1, lat_count, lon_count)
    
    # ✅ CORRECT KERNEL: This kernel results in the standard first-order difference:
    # Z(i,j) = X(i+1,j) + X(i,j+1) - 2*X(i,j)
    # Note: F.conv2d in PyTorch actually performs cross-correlation. To get a true
    # convolution result, the kernel would need to be flipped. However, for a 
    # forward difference operator, defining the kernel for cross-correlation is more direct.
    # The kernel below is designed for cross-correlation to achieve the desired differencing.
    diff_kernel = torch.tensor([[[[-2., 1.],
                                  [ 1., 0.]]]], dtype=torch.float32)

    # 3. Apply convolution (which acts as cross-correlation)
    filtered_grid = F.conv2d(ozone_data, diff_kernel, padding='valid').squeeze()

    # 4. Determine coordinates for the new, smaller grid
    # The new grid corresponds to the anchor points of the kernel
    new_lats = unique_lats[:-1]
    new_lons = unique_lons[:-1]

    # 5. Reconstruct the output tensor
    new_lat_grid, new_lon_grid = torch.meshgrid(new_lats, new_lons, indexing='ij')
    filtered_values = filtered_grid.flatten()
    time_value = df_tensor[0, 3].repeat(filtered_values.size(0))

    new_tensor = torch.stack([
        new_lat_grid.flatten(),
        new_lon_grid.flatten(),
        filtered_values,
        time_value
    ], dim=1)
    
    return new_tensor

# =========================================================================
# 2. Data Loading (Unchanged)
# =========================================================================
# ⚠️ NOTE: You must define these variables
# mac_data_path = "..."
# year = 2022
# month_str = "01"
# class GEMS_TCO: # Placeholder
#     def load_data(self, path): return self
#     def load_working_data_byday_wo_mm(self, data, indices):
#         return {'key': torch.randn(100, 4)}, torch.randn(100, 4)
mac_data_path = config.mac_data_load_path
year = 2024
pickle_path = os.path.join(mac_data_path, f'pickle_{year}')
output_filename = f"coarse_cen_map_without_decrement_latitude{str(year)[2:]}_{month_str}.pkl"
output_filepath = os.path.join(pickle_path, output_filename)
print(f"Loading data from: {output_filepath}")
with open(output_filepath, 'rb') as pickle_file:
    cbmap_ori = pickle.load(pickle_file)

load_data_instance = GEMS_TCO.load_data('')
df_day_map_list = []
for i in range(31): # Adjust if necessary
    cur_map, _ = load_data_instance.load_working_data_byday_wo_mm(cbmap_ori, [i*8, (i+1)*8])
    df_day_map_list.append(cur_map)
print(f"Loaded {len(df_day_map_list)} days of raw data.")

# =========================================================================
# 3. Main Processing Loop (Unchanged)
# =========================================================================
spatially_filtered_days = []
for day_idx, day_map in enumerate(df_day_map_list):
    tensors_to_aggregate = []
    for key, tensor in day_map.items():
        subsetted = subset_tensor(tensor)
        if subsetted.size(0) > 0:
            try:
                diff_applied = apply_first_difference_2d_tensor(subsetted)
                if diff_applied.size(0) > 0:
                    tensors_to_aggregate.append(diff_applied)
            except ValueError as e:
                print(f"Skipping data chunk on day {day_idx+1} due to error: {e}")

    if tensors_to_aggregate:
        aggregated_day_tensor = torch.cat(tensors_to_aggregate, dim=0)
        spatially_filtered_days.append(aggregated_day_tensor)

# =========================================================================
# 4. Verification (Unchanged)
# =========================================================================
print("\n--- Results ---")
print(f"Number of final spatially-differenced day tensors: {len(spatially_filtered_days)}")
if spatially_filtered_days:
    # Save the processed data for the next script
    processed_output_path = "spatial_first_difference_data.pkl"
    with open(processed_output_path, 'wb') as f:
        pickle.dump(spatially_filtered_days, f)
    print(f"Processed data saved to {processed_output_path}")

    print(f"\nShape of the first final tensor: {spatially_filtered_days[0].shape}")
    print("First final tensor head:")
    print(spatially_filtered_days[0][:5])
else:
    print("\nNo final differenced tensors were created.")

Loading data from: /Users/joonwonlee/Documents/GEMS_DATA/pickle_2024/coarse_cen_map_without_decrement_latitude24_07.pkl
Loaded 31 days of raw data.

--- Results ---
Number of final spatially-differenced day tensors: 31
Processed data saved to spatial_first_difference_data.pkl

Shape of the first final tensor: torch.Size([142832, 4])
First final tensor head:
tensor([[ 4.0000e-03,  1.2303e+02,  2.9422e+00,  2.1000e+01],
        [ 4.0000e-03,  1.2309e+02,  1.9636e+00,  2.1000e+01],
        [ 4.0000e-03,  1.2316e+02, -1.3187e+00,  2.1000e+01],
        [ 4.0000e-03,  1.2322e+02, -3.1683e+00,  2.1000e+01],
        [ 4.0000e-03,  1.2328e+02, -5.4922e-01,  2.1000e+01]])


In [8]:
# =========================================================================
# 3. Main Execution Block (Adjusted to use only the first two hours)
# =========================================================================
if __name__ == '__main__':
    
    # --- 1. Load the processed data ---
    processed_output_path = "spatial_first_difference_data.pkl"
    print(f"Loading processed data from: {processed_output_path}")

    try:
        with open(processed_output_path, 'rb') as f:
            # 💡 Loading your spatially differenced data
            spatially_filtered_days = pickle.load(f)
    except FileNotFoundError:
        print(f"Error: Processed data file not found at {processed_output_path}. Ensure the differencing script ran and saved the data.")
        exit()
    except Exception as e:
        print(f"An error occurred while loading data: {e}")
        exit()

    # --- 2. Define parameters and select data subset ---
    
    # Parameters: [sigmasq, range_lat, range_lon, advec_lat, advec_lon, beta, nugget]
    # Using the new parameters provided in your prompt
    a = [19.89, 1.04, 1.337, 0.040, -0.178, 0.195, 4.498] # 47385
    #a = [28.6847, 0.9147, 5.0289, 0.1551, 0.6344, 0.0, 4.1061] 
    a = [12.9046, 6.2184, 4.3523, -0.0724, -0.2941, -0.262, 5.4445]
    a = [20.7046, 1.952, 4.1366, -1.0769, -0.3244, 0.1074, 3.9057]
    a = [11.0973, 3.6585, 4.6663, 0.3542, -0.7808, -0.0019, 3.0497]
    params = torch.tensor(a, dtype=torch.float64)
    
    # Select the first day's differenced tensor
    raw_data_day1 = spatially_filtered_days[0].to(torch.float64) 
    
    # --- 3. Subset by Area and Time ---
    
    # Apply the spatial area subset first
    subset_data_area = subset_by_area(raw_data_day1)
    
    print(f"Total points after spatial subsetting: {subset_data_area.shape[0]}")

    # --- Time-based Filtering (First Two Hours) ---
    # The 'time' column is the 4th column (index 3).
    # Since the original data was loaded in 8-hour chunks per day, and the time indices 
    # typically represent the chunk number or the time within the day, let's examine 
    # the time indices in the subsetted data.
    
    # Find the unique time indices in the subsetted data
    unique_times = torch.unique(subset_data_area[:, 3], sorted=True)
    hour_n = 1
    if len(unique_times) >= hour_n:
        # Select the first two unique time indices
        time_limit = unique_times[: hour_n]
        
        # Create a mask for data points where the time index matches one of the first two times
        time_mask = torch.isin(subset_data_area[:, 3], time_limit)
        subset_data = subset_data_area[time_mask]
        
        print(f"Using time indices: {time_limit.tolist()}")
    else:
        # Fallback if there aren't two unique time indices
        print("Warning: Less than two unique time indices available. Using all data after spatial subset.")
        subset_data = subset_data_area
        
    print(f"Final subset data shape for likelihood (time-limited): {subset_data.shape}")

    # --- 4. Calculate the full likelihood ---
    N = subset_data.shape[0]
    if N > 2000:
        print(f"\n🛑 WARNING: The current data size ({N} points) will be extremely slow for NLL calculation ($\mathcal{{O}}(N^3)$).")
        print("         The previous limit of 1000 was for performance. Proceeding may take a long time or fail due to memory.")
        # Proceed with caution or add a user confirmation/exit here
        
    if N > 3: 
        # The 'response' is the differenced ozone column (index 2)
        response_y = subset_data[:, 2]

        neg_log_lik_result = full_likelihood(
            params=params, 
            input_data=subset_data, 
            response=response_y, 
            covariance_function=matern_cov_anisotropy_v05
        )

        print(f"\nCalculated Negative Log Likelihood: {neg_log_lik_result.item():.4f}")
        print(f"Parameters used: {a}")
    else:
        print("\nNot enough data points found in the specified area after filtering/sampling.")



Loading processed data from: spatial_first_difference_data.pkl
Total points after spatial subsetting: 142832
Using time indices: [21.0]
Final subset data shape for likelihood (time-limited): torch.Size([17854, 4])

         The previous limit of 1000 was for performance. Proceeding may take a long time or fail due to memory.

Calculated Negative Log Likelihood: 64440.0133
Parameters used: [11.0973, 3.6585, 4.6663, 0.3542, -0.7808, -0.0019, 3.0497]


Parameters used: [28.6847, 0.9147, 5.0289, 0.1551, 0.6344, 0.0, 4.1061]
### 1
Calculated Negative Log Likelihood: 51301.4845

### 2
Calculated Negative Log Likelihood: 100719.2866

Parameters used: [27.6847, 0.9147, 5.0289, 0.1551, 0.6344, 0.0, 4.1061]
### 1
Calculated Negative Log Likelihood: 51382.0330


###################################


Parameters used: [20.89, 1.04, 1.337, 0.04, -0.178, 0.195, 4.498]
### 1
Calculated Negative Log Likelihood: 47928.9610

Parameters used: [19.89, 1.04, 1.337, 0.04, -0.178, 0.195, 4.498]
### 1
Calculated Negative Log Likelihood: 48065.0109

### 2
Calculated Negative Log Likelihood: 93039.9350


this is estimate from 3d once differencing filter using larger data set
Parameters used: [12.9046, 6.2184, 4.3523, -0.0724, -0.2941, -0.262, 5.4445]
### 1
Calculated Negative Log Likelihood: 46520.4330



# 3d first differenced data

In [5]:
import torch
import numpy as np
import torch.nn.functional as F

# =========================================================================
# 1. Helper Function (UNCHANGED)
# =========================================================================

def subset_tensor(df_tensor: torch.Tensor) -> torch.Tensor:
    """
    Subsets a tensor to a specific lat/lon range.
    Columns are assumed to be [lat, lon, ozone, time].
    """
    lat_mask = (df_tensor[:, 0] >= 0) & (df_tensor[:, 0] <= 5)
    lon_mask = (df_tensor[:, 1] >= 123) & (df_tensor[:, 1] <= 133)
    return df_tensor[lat_mask & lon_mask].clone()

# =========================================================================
# 2. CORRECTED 3D Differencing Function
# =========================================================================

def apply_first_difference_3d(day_tensor: torch.Tensor) -> torch.Tensor:
    """
    Applies a 3D first-difference filter to calculate the gradient (rate of change)
    simultaneously across time, latitude, and longitude.

    Args:
        day_tensor: A tensor for a single day with columns [lat, lon, ozone, time].

    Returns:
        A tensor with columns [lat, lon, time, grad_t, grad_lat, grad_lon].
    """
    if day_tensor.numel() == 0:
        return torch.empty(0, 6)

    # 1. Map long-format data to a dense 3D grid
    unique_lats = torch.unique(day_tensor[:, 0])
    unique_lons = torch.unique(day_tensor[:, 1])
    unique_times = torch.unique(day_tensor[:, 3])
    
    T, H, W = len(unique_times), len(unique_lats), len(unique_lons)
    if T < 2 or H < 2 or W < 2:
        return torch.empty(0, 6)

    lat_map = {lat.item(): i for i, lat in enumerate(unique_lats)}
    lon_map = {lon.item(): i for i, lon in enumerate(unique_lons)}
    time_map = {time.item(): i for i, time in enumerate(unique_times)}

    ozone_grid = torch.zeros(T, H, W, dtype=torch.float32)
    for row in day_tensor:
        lat, lon, ozone, time = row
        t_idx, h_idx, w_idx = time_map[time.item()], lat_map[lat.item()], lon_map[lon.item()]
        ozone_grid[t_idx, h_idx, w_idx] = ozone
    
    # Reshape for conv3d: (N, C_in, D, H, W) -> (1, 1, Time, Lat, Lon)
    ozone_grid = ozone_grid.unsqueeze(0).unsqueeze(0)

    # 2. Define 3D kernels for first difference Z(i) - Z(i-1) along each axis
    kernel_t = torch.tensor([-1., 1.], dtype=torch.float32).reshape(1, 1, 2, 1, 1)   # D-axis (Time)
    kernel_lat = torch.tensor([-1., 1.], dtype=torch.float32).reshape(1, 1, 1, 2, 1) # H-axis (Lat)
    kernel_lon = torch.tensor([-1., 1.], dtype=torch.float32).reshape(1, 1, 1, 1, 2) # W-axis (Lon)

    # 3. Apply 3D convolution to get gradient components
    # Output shapes: grad_t (T-1, H, W), grad_lat (T, H-1, W), grad_lon (T, H, W-1)
    grad_t = F.conv3d(ozone_grid, kernel_t, padding='valid').squeeze()
    grad_lat = F.conv3d(ozone_grid, kernel_lat, padding='valid').squeeze()
    grad_lon = F.conv3d(ozone_grid, kernel_lon, padding='valid').squeeze()

    # 4. Align gradient grids to the common (T-1, H-1, W-1) shape
    # This aligns the anchor point (Lat[i], Lon[j], Time[k]) for all three derivatives.
    
    # grad_t is (T-1, H, W). We slice H -> H-1 and W -> W-1
    grad_t_common = grad_t[:, :-1, :-1] 

    # grad_lat is (T, H-1, W). We slice T -> T-1 and W -> W-1
    grad_lat_common = grad_lat[:-1, :, :-1]

    # grad_lon is (T, H, W-1). We slice T -> T-1 and H -> H-1
    grad_lon_common = grad_lon[:-1, :-1, :]

    # 5. Create new coordinate grids (T-1, H-1, W-1)
    # The new coordinates correspond to the point *after* the difference (i.e., X(i)-X(i-1) is anchored at i)
    new_times = unique_times[1:] # T-1 coordinates (e.g., hour 2 to 8)
    new_lats = unique_lats[1:]   # H-1 coordinates
    new_lons = unique_lons[1:]   # W-1 coordinates
    
    time_grid, lat_grid, lon_grid = torch.meshgrid(new_times, new_lats, new_lons, indexing='ij')

    # 6. Flatten and stack
    final_tensor = torch.stack([
        lat_grid.flatten(),
        lon_grid.flatten(),
        time_grid.flatten(),
        grad_t_common.flatten(),
        grad_lat_common.flatten(),
        grad_lon_common.flatten()
    ], dim=1)
    
    return final_tensor

# =========================================================================
# 3. Main Processing Loop (UNCHANGED logic, requires df_day_map_list)
# =========================================================================

# Assuming df_day_map_list is pre-loaded and sorted by date
all_gradient_tensors = []
# NOTE: df_day_map_list must be defined and loaded for this loop to run correctly.
# For demonstration purposes, we assume it is loaded as requested.
# for day_map in df_day_map_list: 
#     # Aggregate all data for one day and apply the initial spatial subset
#     tensors_for_day = [subset_tensor(t) for t in day_map.values() if t.numel() > 0]

#     if not tensors_for_day:
#         continue
        
#     full_day_tensor = torch.cat(tensors_for_day, dim=0)
    
#     # Apply the unified 3D differencing function to the day's data
#     gradient_tensor = apply_first_difference_3d(full_day_tensor)
    
#     if gradient_tensor.size(0) > 0:
#         all_gradient_tensors.append(gradient_tensor)


# --- Verification ---
# print(f"Number of final gradient tensors (one per day): {len(all_gradient_tensors)}")

# if all_gradient_tensors:
#     print("\nShape of the first day's gradient tensor:", all_gradient_tensors[0].shape)
#     print("Columns: [lat, lon, time, grad_t, grad_lat, grad_lon]")
#     print("Head of the first gradient tensor:")
#     print(all_gradient_tensors[0][:5])

In [None]:
# =========================================================================
# 3. Main Execution Block (Adjusted to use only the first two hours)
# =========================================================================
if __name__ == '__main__':
    
    # --- 1. Load the processed data ---
    # CHANGED: Using 'all_gradient_tensors.pkl' as requested.
    processed_output_path = "all_gradient_tensors.pkl"
    print(f"Loading processed data from: {processed_output_path}")

    try:
        with open(processed_output_path, 'rb') as f:
            # 💡 Loading your spatially differenced data
            spatially_filtered_days = pickle.load(f)
    except FileNotFoundError:
        print(f"Error: Processed data file not found at {processed_output_path}. Ensure the differencing script ran and saved the data.")
        exit()
    except Exception as e:
        print(f"An error occurred while loading data: {e}")
        exit()

    # --- 2. Define parameters and select data subset ---
    
    # Parameters: [sigmasq, range_lat, range_lon, advec_lat, advec_lon, beta, nugget]
    # Using the new parameters provided in your prompt
    a = [19.89, 1.04, 1.337, 0.040, -0.178, 0.195, 4.498] # 47385
    #a = [28.6847, 0.9147, 5.0289, 0.1551, 0.6344, 0.0, 4.1061] 
    #a = [12.9046, 6.2184, 4.3523, -0.0724, -0.2941, -0.262, 5.4445]
    #a = [20.7046, 1.952, 4.1366, -1.0769, -0.3244, 0.1074, 3.9057]
    a = [5.2406, 1.4667, 1.3835, -0.5644, -0.6237, 0.5911, 0.0698]
    params = torch.tensor(a, dtype=torch.float64)
    
    # Select the first day's differenced tensor
    raw_data_day1 = spatially_filtered_days[0].to(torch.float64) 
    
    # --- 3. Subset by Area and Time ---
    
    # Apply the spatial area subset first
    subset_data_area = subset_by_area(raw_data_day1)
    
    print(f"Total points after spatial subsetting: {subset_data_area.shape[0]}")

    # --- Time-based Filtering (First Two Hours) ---
    # The 'time' column is the 4th column (index 3).
    # Since the original data was loaded in 8-hour chunks per day, and the time indices 
    # typically represent the chunk number or the time within the day, let's examine 
    # the time indices in the subsetted data.
    
    # Find the unique time indices in the subsetted data
    unique_times = torch.unique(subset_data_area[:, 3], sorted=True)
    hour_n = 1
    if len(unique_times) >= hour_n:
        # Select the first two unique time indices
        time_limit = unique_times[: hour_n]
        
        # Create a mask for data points where the time index matches one of the first two times
        time_mask = torch.isin(subset_data_area[:, 3], time_limit)
        subset_data = subset_data_area[time_mask]
        
        print(f"Using time indices: {time_limit.tolist()}")
    else:
        # Fallback if there aren't two unique time indices
        print("Warning: Less than two unique time indices available. Using all data after spatial subset.")
        subset_data = subset_data_area
        
    print(f"Final subset data shape for likelihood (time-limited): {subset_data.shape}")

    # --- 4. Calculate the full likelihood ---
    N = subset_data.shape[0]
    if N > 2000:
        print(f"\n🛑 WARNING: The current data size ({N} points) will be extremely slow for NLL calculation ($\mathcal{{O}}(N^3)$).")
        print("         The previous limit of 1000 was for performance. Proceeding may take a long time or fail due to memory.")
        # Proceed with caution or add a user confirmation/exit here
        
    if N > 3: 
        # The 'response' is the differenced ozone column (index 2)
        response_y = subset_data[:, 2]

        neg_log_lik_result = full_likelihood(
            params=params, 
            input_data=subset_data, 
            response=response_y, 
            covariance_function=matern_cov_anisotropy_v05
        )

        print(f"\nCalculated Negative Log Likelihood: {neg_log_lik_result.item():.4f}")
        print(f"Parameters used: {a}")
    else:
        print("\nNot enough data points found in the specified area after filtering/sampling.")

Loading processed data from: all_gradient_tensors.pkl
Error: Processed data file not found at all_gradient_tensors.pkl. Ensure the differencing script ran and saved the data.




NameError: name 'spatially_filtered_days' is not defined

: 

In [None]:
Calculated Negative Log Likelihood: 53734.1218
Parameters used: [20.7046, 1.952, 4.1366, -1.0769, -0.3244, 0.1074, 3.9057]