### Interpolate Bias Obtained at 0.25 Degree to 0.01 Degree Using Kriging Interpolation

In [None]:
%pip install pykrige

In [7]:
import os 
import rasterio
from pykrige.ok import OrdinaryKriging
from rasterio.mask import mask
from shapely.geometry import box, Point, Polygon
import geopandas as gpd
import numpy as np

def create_bounds(src_bounds, dst_shape, dst_transform):
    rows, cols = dst_shape
    mask = np.zeros((rows, cols), dtype=bool)
    
    xs = np.linspace(dst_transform[2], 
                    dst_transform[2] + cols * dst_transform[0], 
                    cols)
    ys = np.linspace(dst_transform[5], 
                    dst_transform[5] + rows * dst_transform[4], 
                    rows)
    boundary = box(src_bounds.left, src_bounds.bottom,
                  src_bounds.right, src_bounds.top).buffer(0.1)
    
    X, Y = np.meshgrid(xs, ys)
    points = np.column_stack((X.ravel(), Y.ravel()))
    
    batch_size = 10000
    
    for i in range(0, len(points), batch_size):
        batch_points = points[i:i+batch_size]
        batch_results = [boundary.contains(Point(x, y)) for x, y in batch_points]
        mask.ravel()[i:i+batch_size] = batch_results
    
    return mask

def interpolate_tiff(input_dir, output_dir, variogram_model='spherical', max_points=1000):
    os.makedirs(output_dir, exist_ok=True)
    
    dst_crs = 'EPSG:4326'
    dst_bounds = (76.5, 28.31, 77.5, 28.9)
    width = int((dst_bounds[2] - dst_bounds[0]) / 0.01)
    height = int((dst_bounds[3] - dst_bounds[1]) / 0.01)
    dst_shape = (height, width)
    
    dst_transform = rasterio.transform.from_bounds(
        dst_bounds[0], dst_bounds[1], 
        dst_bounds[2], dst_bounds[3], 
        dst_shape[1], dst_shape[0]
    )
    
    dst_y, dst_x = np.mgrid[dst_bounds[1]:dst_bounds[3]:dst_shape[0]*1j,
                           dst_bounds[0]:dst_bounds[2]:dst_shape[1]*1j]
    
    tiff_files = sorted([f for f in os.listdir(input_dir) if f.endswith('.tif')])
    
    for file in tiff_files:
        try:
            input_path = os.path.join(input_dir, file)
            output_path = os.path.join(output_dir, file)
            
            with rasterio.open(input_path) as src:
                src_data = src.read(1)
                nodata_value = src.nodata if src.nodata is not None else -9999
                src_mask = src_data != nodata_value
                
                profile = src.profile.copy()
                profile.update({
                    'crs': dst_crs,
                    'transform': dst_transform,
                    'width': dst_shape[1],
                    'height': dst_shape[0],
                    'nodata': nodata_value,
                })
                
                rows, cols = src_data.shape
                src_y, src_x = np.mgrid[src.bounds.bottom:src.bounds.top:rows*1j,
                                      src.bounds.left:src.bounds.right:cols*1j]
                
                valid_mask = (src_mask & 
                            ~np.isnan(src_data) & 
                            ~np.isinf(src_data) &
                            (src_data != nodata_value))
                
                x_valid = src_x[valid_mask].flatten()
                y_valid = src_y[valid_mask].flatten()
                z_valid = src_data[valid_mask].flatten()
                
                if len(z_valid) < 3:
                    print(f"Warning: Too few valid points ({len(z_valid)}) in {file}")
                    continue
                
                print(f"Number of valid points: {len(z_valid)}")
                print(f"X range: {np.min(x_valid):.2f} to {np.max(x_valid):.2f}")
                print(f"Y range: {np.min(y_valid):.2f} to {np.max(y_valid):.2f}")
                print(f"Z range: {np.min(z_valid):.2f} to {np.max(z_valid):.2f}")
                
                valid_min = np.min(z_valid)
                valid_max = np.max(z_valid)
                
                if len(z_valid) > max_points:
                    idx = np.random.choice(len(z_valid), max_points, replace=False)
                    x_valid = x_valid[idx]
                    y_valid = y_valid[idx]
                    z_valid = z_valid[idx]
                
                try:
                    boundary_mask = create_bounds(src.bounds, dst_shape, dst_transform)
                    
                    ok = OrdinaryKriging(
                        x_valid, y_valid, z_valid,
                        variogram_model=variogram_model,
                        verbose=False,
                        enable_plotting=False
                    )
                    
                    dst_array, _ = ok.execute('grid', dst_x[0, :], dst_y[:, 0])
                    
                    dst_array[~boundary_mask] = nodata_value
                    
                    valid_pixels = (dst_array != nodata_value) & ~np.isnan(dst_array)
                    if np.any(valid_pixels):
                        dst_array[valid_pixels] = np.clip(
                            dst_array[valid_pixels],
                            valid_min,
                            valid_max
                        )
                        
                        with rasterio.open(output_path, 'w', **profile) as dst:
                            dst.write(dst_array.astype(profile['dtype']), 1)
                            dst.update_tags(
                                interpolation_method=f'ordinary_kriging_{variogram_model}',
                                original_value_range=f"{valid_min:.2f} to {valid_max:.2f}",
                                valid_points_count=str(len(z_valid))
                            )
                        print(f"Successfully processed {file}")
                        print(f"Original value range: {valid_min:.2f} to {valid_max:.2f}")
                        print(f"Output value range: {np.min(dst_array[valid_pixels]):.2f} "
                              f"to {np.max(dst_array[valid_pixels]):.2f}")
                    else:
                        print(f"Warning: No valid pixels in output after masking for {file}")
                        
                except Exception as e:
                    print(f"Error in kriging for {file}: {str(e)}")
                    continue
        except Exception as e:
            print(f"Error processing {file}: {str(e)}")
            continue

def main():
    input_dir = r"/home/stormej/dev/rainscale/data/bias/bias_0.25"
    output_dir = r"/home/stormej/dev/rainscale/data/bias/bias_kriging_0.01"
    
    interpolate_tiff(input_dir, output_dir, variogram_model='spherical', max_points=1000)

main()

Number of valid points: 15
X range: 76.38 to 77.62
Y range: 28.15 to 29.05
Z range: -3.32 to 45.95
Successfully processed bias_2000_02.tif
Original value range: -3.32 to 45.95
Output value range: 6.48 to 6.48
Number of valid points: 15
X range: 76.38 to 77.62
Y range: 28.15 to 29.05
Z range: -12.33 to -1.15
Successfully processed bias_2000_03.tif
Original value range: -12.33 to -1.15
Output value range: -11.80 to -4.02
Number of valid points: 15
X range: 76.38 to 77.62
Y range: 28.15 to 29.05
Z range: -15.45 to -2.04
Successfully processed bias_2000_04.tif
Original value range: -15.45 to -2.04
Output value range: -15.26 to -5.02
Number of valid points: 15
X range: 76.38 to 77.62
Y range: 28.15 to 29.05
Z range: -21.97 to 0.76
Successfully processed bias_2000_05.tif
Original value range: -21.97 to 0.76
Output value range: -16.43 to -0.96
Number of valid points: 15
X range: 76.38 to 77.62
Y range: 28.15 to 29.05
Z range: -19.28 to 63.18
Successfully processed bias_2000_06.tif
Original va

### Interpolate Bias Obtained at 0.25 Degree to 0.1 Degree Using Kriging Interpolation

In [5]:
import os 
import rasterio
from pykrige.ok import OrdinaryKriging
from rasterio.mask import mask
from shapely.geometry import box, Point, Polygon
import geopandas as gpd
import numpy as np

def create_bounds(src_bounds, dst_shape, dst_transform):
    rows, cols = dst_shape
    mask = np.zeros((rows, cols), dtype=bool)
    
    xs = np.linspace(dst_transform[2], 
                    dst_transform[2] + cols * dst_transform[0], 
                    cols)
    ys = np.linspace(dst_transform[5], 
                    dst_transform[5] + rows * dst_transform[4], 
                    rows)
    boundary = box(src_bounds.left, src_bounds.bottom,
                  src_bounds.right, src_bounds.top).buffer(0.1)
    
    X, Y = np.meshgrid(xs, ys)
    points = np.column_stack((X.ravel(), Y.ravel()))
    
    batch_size = 10000
    
    for i in range(0, len(points), batch_size):
        batch_points = points[i:i+batch_size]
        batch_results = [boundary.contains(Point(x, y)) for x, y in batch_points]
        mask.ravel()[i:i+batch_size] = batch_results
    
    return mask

def interpolate_tiff(input_dir, output_dir, variogram_model='spherical', max_points=1000):
    os.makedirs(output_dir, exist_ok=True)

    
    dst_crs = 'EPSG:4326'
    dst_bounds = (76.5, 28.31, 77.5, 28.9)
    width = int((dst_bounds[2] - dst_bounds[0]) / 0.1)  # 0.1 degree per pixel
    height = int((dst_bounds[3] - dst_bounds[1]) / 0.1)  # 0.1 degree per pixel
    dst_shape = (height, width)
    
    dst_transform = rasterio.transform.from_bounds(
        dst_bounds[0], dst_bounds[1], 
        dst_bounds[2], dst_bounds[3], 
        dst_shape[1], dst_shape[0]
    )
    
    dst_y, dst_x = np.mgrid[dst_bounds[1]:dst_bounds[3]:dst_shape[0]*1j,
                           dst_bounds[0]:dst_bounds[2]:dst_shape[1]*1j]
    
    tiff_files = sorted([f for f in os.listdir(input_dir) if f.endswith('.tif')])
    
    for file in tiff_files:
        try:
            input_path = os.path.join(input_dir, file)
            output_path = os.path.join(output_dir, file)
            
            with rasterio.open(input_path) as src:
                src_data = src.read(1)
                nodata_value = src.nodata if src.nodata is not None else -9999
                src_mask = src_data != nodata_value
                
                profile = src.profile.copy()
                profile.update({
                    'crs': dst_crs,
                    'transform': dst_transform,
                    'width': dst_shape[1],
                    'height': dst_shape[0],
                    'nodata': nodata_value,
                })
                
                rows, cols = src_data.shape
                src_y, src_x = np.mgrid[src.bounds.bottom:src.bounds.top:rows*1j,
                                      src.bounds.left:src.bounds.right:cols*1j]
                
                valid_mask = (src_mask & 
                            ~np.isnan(src_data) & 
                            ~np.isinf(src_data) &
                            (src_data != nodata_value))
                
                x_valid = src_x[valid_mask].flatten()
                y_valid = src_y[valid_mask].flatten()
                z_valid = src_data[valid_mask].flatten()
                
                if len(z_valid) < 3:
                    print(f"Warning: Too few valid points ({len(z_valid)}) in {file}")
                    continue
                
                print(f"Number of valid points: {len(z_valid)}")
                print(f"X range: {np.min(x_valid):.2f} to {np.max(x_valid):.2f}")
                print(f"Y range: {np.min(y_valid):.2f} to {np.max(y_valid):.2f}")
                print(f"Z range: {np.min(z_valid):.2f} to {np.max(z_valid):.2f}")
                
                valid_min = np.min(z_valid)
                valid_max = np.max(z_valid)
                
                if len(z_valid) > max_points:
                    idx = np.random.choice(len(z_valid), max_points, replace=False)
                    x_valid = x_valid[idx]
                    y_valid = y_valid[idx]
                    z_valid = z_valid[idx]
                
                try:
                    boundary_mask = create_bounds(src.bounds, dst_shape, dst_transform)
                    
                    ok = OrdinaryKriging(
                        x_valid, y_valid, z_valid,
                        variogram_model=variogram_model,
                        verbose=False,
                        enable_plotting=False
                    )
                    
                    dst_array, _ = ok.execute('grid', dst_x[0, :], dst_y[:, 0])
                    
                    dst_array[~boundary_mask] = nodata_value
                    
                    valid_pixels = (dst_array != nodata_value) & ~np.isnan(dst_array)
                    if np.any(valid_pixels):
                        dst_array[valid_pixels] = np.clip(
                            dst_array[valid_pixels],
                            valid_min,
                            valid_max
                        )
                        
                        with rasterio.open(output_path, 'w', **profile) as dst:
                            dst.write(dst_array.astype(profile['dtype']), 1)
                            dst.update_tags(
                                interpolation_method=f'ordinary_kriging_{variogram_model}',
                                original_value_range=f"{valid_min:.2f} to {valid_max:.2f}",
                                valid_points_count=str(len(z_valid))
                            )
                        print(f"Successfully processed {file}")
                        print(f"Original value range: {valid_min:.2f} to {valid_max:.2f}")
                        print(f"Output value range: {np.min(dst_array[valid_pixels]):.2f} "
                              f"to {np.max(dst_array[valid_pixels]):.2f}")
                    else:
                        print(f"Warning: No valid pixels in output after masking for {file}")
                        
                except Exception as e:
                    print(f"Error in kriging for {file}: {str(e)}")
                    continue
        except Exception as e:
            print(f"Error processing {file}: {str(e)}")
            continue

def main():
    input_dir = r"/home/stormej/dev/rainscale/data/bias/bias_0.25"
    output_dir = r"/home/stormej/dev/rainscale/data/bias/bias_kriging_0.1"
    
    interpolate_tiff(input_dir, output_dir, variogram_model='spherical', max_points=1000)

main()

Number of valid points: 15
X range: 76.38 to 77.62
Y range: 28.15 to 29.05
Z range: -3.32 to 45.95
Successfully processed bias_2000_02.tif
Original value range: -3.32 to 45.95
Output value range: 6.48 to 6.48
Number of valid points: 15
X range: 76.38 to 77.62
Y range: 28.15 to 29.05
Z range: -12.33 to -1.15
Successfully processed bias_2000_03.tif
Original value range: -12.33 to -1.15
Output value range: -10.15 to -4.06
Number of valid points: 15
X range: 76.38 to 77.62
Y range: 28.15 to 29.05
Z range: -15.45 to -2.04
Successfully processed bias_2000_04.tif
Original value range: -15.45 to -2.04
Output value range: -14.11 to -5.05
Number of valid points: 15
X range: 76.38 to 77.62
Y range: 28.15 to 29.05
Z range: -21.97 to 0.76
Successfully processed bias_2000_05.tif
Original value range: -21.97 to 0.76
Output value range: -16.44 to -2.03
Number of valid points: 15
X range: 76.38 to 77.62
Y range: 28.15 to 29.05
Z range: -19.28 to 63.18
Successfully processed bias_2000_06.tif
Original va