In [None]:
import os
from dotenv import load_dotenv

import climpred
import xarray as xr
import xesmf as xe
import numpy as np
import pandas as pd
import regionmask
import geopandas as gp
from climpred import HindcastEnsemble
from datetime import datetime


In [None]:
load_dotenv()

data_path = os,getenv("data_path")

## Steps in the Code

1. **Environment Setup:**
   - Loads environment variables, including the data path, using dotenv.
   
2. **Data Retrieval and Preprocessing (SPI-3):**
   - Opens the SPI-3 forecast dataset (`ea_seas51_spi3_xclim_20240306.nc`) and selects the region of interest based on latitude and longitude bounds.
   - Renames longitude and latitude dimensions to `lon` and `lat` respectively.
   - Opens the SPI-3 observed dataset (`ea_chrips_spi3_xclim_20240306.nc`) and selects the region of interest.
   - Sets up containers for regridding data.
   - Iterates over lead times, performs bilinear regridding of forecast SPI-3 data to match observed grid, and saves the regridded dataset.
   - Concatenates regridded forecast SPI-3 data along the lead dimension and saves it.
   - Saves observed SPI-3 data.
   
3. **Data Retrieval and Preprocessing (SPI-4):**
   - Opens the SPI-4 forecast dataset (`ea_seas51_spi4_xclim_20240306.nc`) and selects the region of interest based on latitude and longitude bounds.
   - Renames longitude and latitude dimensions to `lon` and `lat` respectively.
   - Opens the SPI-4 observed dataset (`ea_chrips_spi4_xclim_20240306.nc`) and selects the region of interest.
   - Sets up containers for regridding data.
   - Iterates over lead times, performs bilinear regridding of forecast SPI-4 data to match observed grid, and saves the regridded dataset.
   - Concatenates regridded forecast SPI-4 data along the lead dimension and saves it.
   - Saves observed SPI-4 data.

## for SPI3

In [None]:
llat=-2.5
llon=30.0
ulat=7.5
ulon=42.5

fcst=xr.open_dataset(f'{data_path}ea_seas51_spi3_xclim_20240306.nc')
kn_fc=fcst.sel(longitude=slice(llon, ulon), latitude=slice(ulat,llat))


fc_llon = kn_fc.longitude.min().values
fc_ulon = kn_fc.longitude.max().values
fc_llat = kn_fc.latitude.min().values
fc_ulat = kn_fc.latitude.max().values

obs=xr.open_dataset(f'{data_path}ea_chrips_spi3_xclim_20240306.nc')
kn_obs=obs.sel(lon=slice(fc_llon, fc_ulon), lat=slice( fc_llat,fc_ulat))
#kn_obs['lon']

cont_d=[]

for fm in [0,1,2,3,4,5]:
    ds_p_m1=kn_fc.sel(lead=fm)
    ds_out = xr.Dataset(
          {"lat": (["lat"], kn_obs['lat'].values, {"units": "degrees_north"}),
          "lon": (["lon"], kn_obs['lon'].values, {"units": "degrees_east"}),})
    gd2=ds_p_m1.rename({'longitude':'lon','latitude':'lat'})
    agd = gd2["spi3"]
    regridder = xe.Regridder(gd2, ds_out, "bilinear")
    dr_out = regridder(agd, keep_attrs=True)
    ds2=dr_out.to_dataset()
    cont_d.append(ds2)
    #monthname=mnl.lower().split('.')[0]
    #ds2.to_netcdf(f'{output_path_location}kmj_25km_lt_month_{fm}.nc')
    
kn_fct = xr.concat(cont_d, dim='lead')
kn_fct=kn_fct.rename({'time':'init','forecastMonth':'lead'})
#ld1 = ld.rename({'time':'init','forecastMonth':'lead'}).set_index(init='time', lead='forecastMonth')
#ld1 = ld.swap_dims({'time': 'init', 'forecastMonth': 'lead'})
kn_fct['lead'].attrs['units'] = 'months'


#output
kn_fct.to_netcdf(f'{data_path}kn_fct_spi3.nc')
kn_obs.to_netcdf(f'{data_path}kn_obs_spi3.nc')

## for SPI4

In [None]:
fcst=xr.open_dataset(f'{data_path}ea_seas51_spi4_xclim_20240306.nc')
kn_fc=fcst.sel(longitude=slice(llon, ulon), latitude=slice(ulat,llat))


fc_llon = kn_fc.longitude.min().values
fc_ulon = kn_fc.longitude.max().values
fc_llat = kn_fc.latitude.min().values
fc_ulat = kn_fc.latitude.max().values

obs=xr.open_dataset(f'{data_path}ea_chrips_spi4_xclim_20240306.nc')
kn_obs=obs.sel(lon=slice(fc_llon, fc_ulon), lat=slice( fc_llat,fc_ulat))
#kn_obs['lon']

cont_d=[]

for fm in [0,1,2,3,4,5]:
    ds_p_m1=kn_fc.sel(lead=fm)
    ds_out = xr.Dataset(
          {"lat": (["lat"], kn_obs['lat'].values, {"units": "degrees_north"}),
          "lon": (["lon"], kn_obs['lon'].values, {"units": "degrees_east"}),})
    gd2=ds_p_m1.rename({'longitude':'lon','latitude':'lat'})
    agd = gd2["spi4"]
    regridder = xe.Regridder(gd2, ds_out, "bilinear")
    dr_out = regridder(agd, keep_attrs=True)
    ds2=dr_out.to_dataset()
    cont_d.append(ds2)
    #monthname=mnl.lower().split('.')[0]
    #ds2.to_netcdf(f'{output_path_location}kmj_25km_lt_month_{fm}.nc')
    
kn_fct = xr.concat(cont_d, dim='lead')
kn_fct=kn_fct.rename({'time':'init','forecastMonth':'lead'})
#ld1 = ld.rename({'time':'init','forecastMonth':'lead'}).set_index(init='time', lead='forecastMonth')
#ld1 = ld.swap_dims({'time': 'init', 'forecastMonth': 'lead'})
kn_fct['lead'].attrs['units'] = 'months'


#output
kn_fct.to_netcdf(f'{data_path}kn_fct_spi4.nc')
kn_obs.to_netcdf(f'{data_path}kn_obs_spi4.nc')