In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy as sp
import xarray as xr
import subprocess

In [2]:
cmap_url = "https://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP/.CPC/.Merged_Analysis/.monthly/.latest/.ver1/.prcp_est/X/-180/1/179/GRID/Y/-90/1/90/GRID/Y/12/37/RANGE/X/32/60/RANGE/T/(days%20since%201960-01-01)/streamgridunitconvert/data.nc"
cmap_path = 'download/cmap_precip.nc'

print(cmap_url)
subprocess.call(['curl', '-b', 'cookies.txt', '-k', cmap_url, '-o', cmap_path])

cmap = xr.open_dataset(cmap_path, decode_times=True)
#change datetime to 01-MM-YYYY
new_dates = pd.to_datetime(cmap['T'].values, format="%d-%m-%Y").strftime("01-%m-%Y")
cmap['T'] = pd.to_datetime(new_dates, format="%d-%m-%Y")




https://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP/.CPC/.Merged_Analysis/.monthly/.latest/.ver1/.prcp_est/X/-180/1/179/GRID/Y/-90/1/90/GRID/Y/12/37/RANGE/X/32/60/RANGE/T/(days%20since%201960-01-01)/streamgridunitconvert/data.nc


In [3]:
cmap

In [12]:
#create variable that is the annual mean of the precip
cmap_annual = cmap['prcp_est'].groupby('T.year').mean('T')
#change year to T
cmap_annual = cmap_annual.rename({'year': 'T'})


In [24]:
x_range = cmap_annual.X.values
y_range = cmap_annual.Y.values

T_size = len(cmap_annual['T'])
spi_storage = np.full((T_size,  len(y_range), len(x_range)), np.nan)  # Pre-fill with NaNs

for i, y in enumerate(y_range):
    for j, x in enumerate(x_range):
        data_grid = cmap_annual.sel(X=x, Y=y)
        # Check if the entire grid is NaN
        if np.isnan(data_grid.values).all():
            # All values remain NaN in spi_storage
            continue

        data_grid_climo = data_grid.sel(T=slice('1991-01-01', '2020-12-01'))

        #check if 10% of the values are < 1
        if ((data_grid_climo.values < 1/30).sum() / data_grid_climo.size) > 0.1:
            continue
        else:
            # Fit pearson type III distribution to the data
            #try:
            a, loc, scale = sp.stats.pearson3.fit(data_grid.values)
                # Calculate the CDF of the Pearson Type III distribution
            cdf_values = sp.stats.pearson3.cdf(data_grid.values, skew=a, loc=loc, scale=scale)
            q = ((np.sum(data_grid.values == 0)) / len(data_grid.values))
            cdf_values = (cdf_values*(1-q)) + q
                #replace 0 with 1e-6
            cdf_values = np.where(cdf_values < 0.0001, 0.0001, cdf_values)
                #replace 1 with 1-1e-6
            cdf_values = np.where(cdf_values > 0.9999, 0.9999, cdf_values)
        
                # Convert CDF to SPI using the inverse normal distribution (PPF)
            spi_storage[:, i, j] = sp.stats.norm.ppf(cdf_values)
            #except (ValueError, RuntimeError) as e:
                # If fitting fails, leave Nans in the SPI values
                #   continue
# Convert the stored SPI values into an xarray DataArray
spi = xr.DataArray(spi_storage, dims=['T', 'Y', 'X'],
                coords={'T': cmap_annual['T'], 'Y': y_range, 'X': x_range})



In [26]:
save = True
if save:
    spi.to_netcdf('data/spi12_cmap_1x1.nc')