In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import rioxarray as rxr
import scipy.stats as stats
import geopandas as gpd
import seaborn as sns

In [2]:
raster_ds = rxr.open_rasterio("RF_IMD0.25_1979_2023.nc")

In [3]:
india = gpd.read_file("D:\daily rainfall data (imd)\India%3A_State_Boundary_2021_\India%3A_State_Boundary_2021_.shp")
kerala = india[india["name"]=="Kerala"]
kerala_geom = kerala.geometry
kerala_geom

1    POLYGON ((74.99675 12.78919, 74.99718 12.78918...
Name: geometry, dtype: geometry

In [4]:
raster_ds.rio.write_crs("EPSG:4326", inplace=True)
kerala_clipped = raster_ds.rio.clip(kerala_geom)
klds = kerala_clipped.to_dataset()
klds

In [5]:
bad_data_condition = klds.data_vars["RAINFALL"] < 0
klds = klds.where(~bad_data_condition, np.nan)
klds

In [6]:
jjas_dataset = klds.sel(TIME=klds["TIME.month"].isin(range(6,10)))
jjas_dataset

In [7]:
jjas_df = pd.DataFrame(jjas_dataset.to_dataframe()).dropna()
jjas_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,RAINFALL,spatial_ref
TIME,y,x,Unnamed: 3_level_1,Unnamed: 4_level_1
1979-06-01 00:00:00,12.50,75.00,0.000000,0
1979-06-01 00:00:00,12.50,75.25,0.176546,0
1979-06-01 00:00:00,12.25,75.25,0.297413,0
1979-06-01 00:00:00,12.25,75.50,0.548583,0
1979-06-01 00:00:00,12.00,75.50,0.425613,0
...,...,...,...,...
2023-09-30 00:00:00,9.00,76.75,42.400002,0
2023-09-30 00:00:00,9.00,77.00,41.456497,0
2023-09-30 00:00:00,8.75,76.75,44.043106,0
2023-09-30 00:00:00,8.75,77.00,18.729004,0


In [8]:
jjas = jjas_df.reset_index(level=['x','y'], drop=True)
jjas = jjas[jjas['RAINFALL'] != 0]
jjas

Unnamed: 0_level_0,RAINFALL,spatial_ref
TIME,Unnamed: 1_level_1,Unnamed: 2_level_1
1979-06-01 00:00:00,0.176546,0
1979-06-01 00:00:00,0.297413,0
1979-06-01 00:00:00,0.548583,0
1979-06-01 00:00:00,0.425613,0
1979-06-01 00:00:00,0.100656,0
...,...,...
2023-09-30 00:00:00,42.400002,0
2023-09-30 00:00:00,41.456497,0
2023-09-30 00:00:00,44.043106,0
2023-09-30 00:00:00,18.729004,0


In [9]:
perc95 = np.percentile(jjas['RAINFALL'], 95)
perc99 = np.percentile(jjas['RAINFALL'], 99)
print(f'95th percentile: {perc95}, 99th percentile: {perc99}')

95th percentile: 61.89989738464355, 99th percentile: 106.52830734252915


In [10]:
def coeff_var(df,threshold):
    df_mod = df[df['RAINFALL']>threshold]
    cv = []
    empty_years = []
    years = np.arange(1979,2024)
    for i in range(1979,2024):
        df_i = df_mod[df_mod.index.year==i]
        if not df_i['RAINFALL'].empty and df_i['RAINFALL'].std() != np.nan:
            mean = df_i['RAINFALL'].mean()
            std = df_i['RAINFALL'].std()
            cv.append(std/mean)
        elif df_i['RAINFALL'].empty:
            cv.append(np.nan)
            empty_years.append(i)
    nan_years = []
    for i in range(len(cv)):
        if np.isnan(cv[i]) == True:
            nan_years.append(years[i])
    print(f"empty years: {empty_years}, nan years: {nan_years}")
    return cv


In [11]:
coeff_var(jjas, 150)

empty years: [2011, 2012, 2021], nan years: [2002, 2011, 2012, 2015, 2021]


[0.18226899706973954,
 0.09327695742646969,
 0.25173515282296827,
 0.21015170894237603,
 0.30357376764665167,
 0.2630834247819538,
 0.19086425358214953,
 0.1892934353669674,
 0.049319702160250785,
 0.12126738606887151,
 0.28780403416356304,
 0.21374824063876127,
 0.13582655067786564,
 0.3546911832148557,
 0.07508472742462631,
 0.20651672721031605,
 0.10481075965222156,
 0.09871552547150318,
 0.1109824059178796,
 0.13282716756303453,
 0.3339247335560158,
 0.1612789687711367,
 0.10242572632611467,
 nan,
 0.1255584266769585,
 0.11671460483720153,
 0.15391499551461577,
 0.1668048936995869,
 0.22052919595239387,
 0.10708493278442424,
 0.15003979837476963,
 0.0442959047323486,
 nan,
 nan,
 0.16103792689793653,
 0.06331733429785598,
 nan,
 0.03443208062736003,
 0.25669341966920123,
 0.1920603681563388,
 0.22278740664574948,
 0.21143498028961027,
 nan,
 0.0696083524409827,
 0.08707832631170763]

In [12]:
def nan_year(cv_list):
    years = np.arange(1979,2024)
    nan_years = []
    for i in range(len(cv_list)):
        if np.isnan(cv_list[i]) == True:
            nan_years.append(years[i])
    return nan_years

In [13]:
nan_year(coeff_var(jjas, 150))

empty years: [2011, 2012, 2021], nan years: [2002, 2011, 2012, 2015, 2021]


[2002, 2011, 2012, 2015, 2021]