In [None]:
%pip install rioxarray
%pip install numpy gdal==$(gdal-config --version)
%pip install scipy
%pip install matplotlib
%pip install geopandas
%pip install imdlib
%pip install xarray

### Preprocess IMD GRD to Tiff

In [2]:
import imdlib as imd
start_yr = 2000
end_yr = 2023
variable = 'rain'
# Specify the directory where you want to download and store data
input_dir = r"/home/stormej/dev/varsha/data/rain/rain_grd"
data = imd.open_data(variable, start_yr, end_yr, fn_format='yearwise', file_dir=input_dir)
ds = data.get_xarray()

In [None]:
import os
import numpy as np
import rioxarray
import pandas as pd
from shapely.geometry import box

output_dir = r"/home/stormej/dev/rainscale/data/rain/rain_tif"
os.makedirs(output_dir, exist_ok=True)

delhi_bounds = {
    'min_lon': 76.5,  # Western boundary of Delhi
    'max_lon': 77.5,  # Eastern boundary of Delhi
    'min_lat': 28.3,  # Southern boundary of Delhi
    'max_lat': 28.9   # Northern boundary of Delhi
}

if ds.rain.isnull().all().item() or (ds.rain == -999.0).all().item():
    raise ValueError("Source data appears to be invalid - all values are NaN or -999.0")

ds_delhi = ds.sel(
    lat=slice(delhi_bounds['min_lat'], delhi_bounds['max_lat']),
    lon=slice(delhi_bounds['min_lon'], delhi_bounds['max_lon'])
)

# Resample to ensure output shape is (3,5)
new_lat = np.linspace(delhi_bounds['min_lat'], delhi_bounds['max_lat'], 3)
new_lon = np.linspace(delhi_bounds['min_lon'], delhi_bounds['max_lon'], 5)
ds_delhi = ds_delhi.interp(lat=new_lat, lon=new_lon)

# Verify the shape
print(f"Data shape (lat, lon): {ds_delhi.rain.isel(time=0).shape}")
assert ds_delhi.rain.isel(time=0).shape == (3, 5), "Output shape is not (3, 5)"

ds_delhi.rio.write_crs("EPSG:4326", inplace=True)

valid_files = 0
invalid_files = 0

for time_step in ds_delhi.time.values:
    dt = pd.to_datetime(time_step)
    date_str = dt.strftime("%Y-%m-%d")
    day = ds_delhi.sel(time=time_step)
    
    rain_data = day.rain.values
    if np.all(rain_data == -999.0) or np.all(np.isnan(rain_data)):
        print(f"Skipping {date_str} as all values are invalid or NaN")
        invalid_files += 1
        continue
    
    day = day.where(day.rain != -999.0)
    
    if day.rain.isnull().all().item():
        print(f"Skipping {date_str} as all values became NaN after filtering")
        invalid_files += 1
        continue
    
    output_file = os.path.join(output_dir, f"rain_{date_str}.tif")
    
    try:
        day.rain.rio.to_raster(
            output_file,
            driver='GTiff',
            nodata=np.nan,
            dtype='float32',
            crs='EPSG:4326',
        )

        if os.path.exists(output_file) and os.path.getsize(output_file) > 0:
            if dt.day == 1:
                print(f"Created: {output_file}")
            valid_files += 1
        else:
            print(f"Warning: Empty file created for {date_str}")
            invalid_files += 1
    except Exception as e:
        print(f"Error writing {date_str}: {str(e)}")
        invalid_files += 1
        continue

print(f"Processing complete. Valid files: {valid_files}, Invalid/skipped: {invalid_files}")

Data shape (lat, lon): (3, 5)
Created: /home/stormej/dev/rainscale/data/rain/rain_tif/rain_2000-01-01.tif
Created: /home/stormej/dev/rainscale/data/rain/rain_tif/rain_2000-02-01.tif
Created: /home/stormej/dev/rainscale/data/rain/rain_tif/rain_2000-03-01.tif
Created: /home/stormej/dev/rainscale/data/rain/rain_tif/rain_2000-04-01.tif
Created: /home/stormej/dev/rainscale/data/rain/rain_tif/rain_2000-05-01.tif
Created: /home/stormej/dev/rainscale/data/rain/rain_tif/rain_2000-06-01.tif
Created: /home/stormej/dev/rainscale/data/rain/rain_tif/rain_2000-07-01.tif
Created: /home/stormej/dev/rainscale/data/rain/rain_tif/rain_2000-08-01.tif
Created: /home/stormej/dev/rainscale/data/rain/rain_tif/rain_2000-09-01.tif
Created: /home/stormej/dev/rainscale/data/rain/rain_tif/rain_2000-10-01.tif
Created: /home/stormej/dev/rainscale/data/rain/rain_tif/rain_2000-11-01.tif
Created: /home/stormej/dev/rainscale/data/rain/rain_tif/rain_2000-12-01.tif
Created: /home/stormej/dev/rainscale/data/rain/rain_tif/ra

### Convert Daily IMD Data to Monthly IMD Data

In [4]:
import os
import numpy as np
import pandas as pd

output_dir = r"/home/stormej/dev/rainscale/data/rain/rain_tif_monthly"
os.makedirs(output_dir, exist_ok=True)

ds_clean = ds_delhi.where(ds_delhi.rain != -999.0)

for year_num in range(2000, 2023):
    print(f"Processing year: {year_num}")
    
    year_data = ds_clean.sel(time=slice(f"{year_num}-01-01", f"{year_num}-12-31"))
    
    for month_num in range(1, 13):
        print(f"Processing month: {month_num}")
        
        month_data = year_data.sel(time=year_data.time.dt.month == month_num)
        
        if len(month_data.time) == 0:
            print(f"No data for month {month_num} in year {year_num}")
            continue
        
        monthly_sum = month_data.rain.sum(dim='time', skipna=True)
        
        monthly_sum = monthly_sum.rio.write_crs("EPSG:4326", inplace=True)
        
        output_file = os.path.join(output_dir, f"monthly_rain_{year_num}_{month_num:02d}.tif")
        
        monthly_sum.rio.to_raster(output_file)

print(f"All monthly files created successfully.")

Processing year: 2000
Processing month: 1
Processing month: 2
Processing month: 3
Processing month: 4
Processing month: 5
Processing month: 6
Processing month: 7
Processing month: 8
Processing month: 9
Processing month: 10
Processing month: 11
Processing month: 12
Processing year: 2001
Processing month: 1
Processing month: 2
Processing month: 3
Processing month: 4
Processing month: 5
Processing month: 6
Processing month: 7
Processing month: 8
Processing month: 9
Processing month: 10
Processing month: 11
Processing month: 12
Processing year: 2002
Processing month: 1
Processing month: 2
Processing month: 3
Processing month: 4
Processing month: 5
Processing month: 6
Processing month: 7
Processing month: 8
Processing month: 9
Processing month: 10
Processing month: 11
Processing month: 12
Processing year: 2003
Processing month: 1
Processing month: 2
Processing month: 3
Processing month: 4
Processing month: 5
Processing month: 6
Processing month: 7
Processing month: 8
Processing month: 9
Pro