In [1]:
#!pip install geopandas
#!pip install imdlib
#!pip install rasterio

In [2]:
import geopandas as gpd
import imdlib as imd
import xarray as xr
import numpy as np
import pandas as pd

In [3]:
file_name = r"C:\Users\Kanishk Goyal\OneDrive - IIT Kanpur\Desktop\Prof. Hamim Zafar\Data\Shape file for Districts India\output.shp"
gdf = gpd.read_file(file_name)

In [4]:
statename = 'Uttar Pradesh'
my = gdf[gdf.statename == statename]

In [5]:
dist_list = pd.Series(my['distname'])
pd.set_option('display.max_columns', None)
dist_list_values = dist_list.tolist()

In [7]:
start_yr = 1966
end_yr = 2022

variable_tmin = 'tmin' # other options are ('tmin'/ 'tmax')
variable_tmax = 'tmax'
variable_rain = 'rain'

data_tmin = imd.get_data(variable_tmin, start_yr, end_yr, fn_format='yearwise')
data_tmax = imd.get_data(variable_tmax, start_yr, end_yr, fn_format='yearwise')
data_rain = imd.get_data(variable_rain, start_yr, end_yr, fn_format='yearwise')
ds_tmin = data_tmin.get_xarray() # Converting to xarray is also required for SLICING, for a date.
ds_tmax = data_tmax.get_xarray() # Converting to xarray is also required for SLICING, for a date.
ds_rain = data_rain.get_xarray() # Converting to xarray is also required for SLICING, for a date.

Downloading: mintemp for year 1966


Downloading: mintemp for year 1967
Downloading: mintemp for year 1968
Downloading: mintemp for year 1969
Downloading: mintemp for year 1970
Downloading: mintemp for year 1971
Downloading: mintemp for year 1972
Downloading: mintemp for year 1973
Downloading: mintemp for year 1974
Downloading: mintemp for year 1975
Downloading: mintemp for year 1976
Downloading: mintemp for year 1977
Downloading: mintemp for year 1978
Downloading: mintemp for year 1979
Downloading: mintemp for year 1980
Downloading: mintemp for year 1981
Downloading: mintemp for year 1982
Downloading: mintemp for year 1983
Downloading: mintemp for year 1984
Downloading: mintemp for year 1985
Downloading: mintemp for year 1986
Downloading: mintemp for year 1987
Downloading: mintemp for year 1988
Downloading: mintemp for year 1989
Downloading: mintemp for year 1990
Downloading: mintemp for year 1991
Downloading: mintemp for year 1992
Downloading: mintemp for year 1993
Downloading: mintemp for year 1994
Downloading: mintemp

In [8]:
def mask_avg(xarray_,district_shape):
    import rasterio
    import rasterio.features
    import numpy as np
    lon_values = xarray_.lon.values
    lat_values = xarray_.lat.values
    lon_spacing = lon_values[1] - lon_values[0]
    lat_spacing = lat_values[1] - lat_values[0]
    transform = rasterio.transform.from_origin(lon_values[0], lat_values[-1], lon_spacing, lat_spacing)

    mask = rasterio.features.geometry_mask(district_shape, out_shape=(xarray_.lat.size, xarray_.lon.size),
                                       transform=transform, invert=True)

    masked_xarray = xarray_.where(mask)

    for var_name in masked_xarray.data_vars:
        alpha = np.nanmean(masked_xarray[var_name])
    
    return alpha

In [9]:
def values_closest(xarray_,district_shape,type):
    import geopandas as gpd
    import xarray as xr
    ds = xarray_
    centroid = district_shape.centroid
    a = centroid.x
    b = centroid.y 
    a = list(a)
    b = list(b)
    lat = b[0]
    lon = a[0]

    idxs = (np.abs(ds.lon - lon) + np.abs(ds.lat - lat)).argmin(dim = ["lat", "lon"])
    values_ = ds[type].isel(lat=idxs['lat'].item(), lon=idxs['lon'].item()).values
    
    return values_

In [10]:
def avg_value(year,district_shape):

    list_march_rain = []
    list_april_rain = []
    list_march_tmin = []
    list_april_tmin = []
    list_march_tmax = []
    list_april_tmax = []

    #print(1)

    for i in range(31):
        time_str = f'{year}-03-{i+1:02d}'
        d_tmin_march = ds_tmin.sel(time=time_str)
        d_tmax_march = ds_tmax.sel(time=time_str)
        d_rain_march = ds_rain.sel(time=time_str)
        list_march_tmin.append(d_tmin_march)
        list_march_tmax.append(d_tmax_march)
        list_march_rain.append(d_rain_march)
    
    for i, ds in enumerate(list_march_tmin):
        list_march_tmin[i] = ds.where(ds < 99, np.nan)
    for i, ds in enumerate(list_march_tmax):
        list_march_tmax[i] = ds.where(ds < 99, np.nan)  
    for i, ds in enumerate(list_march_rain):
        list_march_rain[i] = ds.where(ds != -999, np.nan) 

    march_tmin = xr.concat(list_march_tmin, dim='dataset').mean(dim='dataset')
    march_tmax = xr.concat(list_march_tmax, dim='dataset').mean(dim='dataset')
    march_rain = xr.concat(list_march_rain, dim='dataset').mean(dim='dataset')
    
    tmin_march = mask_avg(march_tmin,district_shape)
    tmax_march = mask_avg(march_tmax,district_shape)
    rain_march = mask_avg(march_rain,district_shape)

    for j in range(30):
        time_str = f'{year}-04-{j+1:02d}'
        d_tmin_april = ds_tmin.sel(time=time_str)
        d_tmax_april = ds_tmax.sel(time=time_str)
        d_rain_april = ds_rain.sel(time=time_str)
        list_april_tmin.append(d_tmin_april)
        list_april_tmax.append(d_tmax_april)
        list_april_rain.append(d_rain_april)
    
    for i, ds in enumerate(list_april_tmin):
        list_april_tmin[i] = ds.where(ds < 99, np.nan)
    for i, ds in enumerate(list_april_tmax):
        list_april_tmax[i] = ds.where(ds < 99, np.nan)  
    for i, ds in enumerate(list_april_rain):
        list_april_rain[i] = ds.where(ds != -999, np.nan) 

    april_tmin = xr.concat(list_april_tmin, dim='dataset').mean(dim='dataset')
    april_tmax = xr.concat(list_april_tmax, dim='dataset').mean(dim='dataset')
    april_rain = xr.concat(list_april_rain, dim='dataset').mean(dim='dataset')
    
    tmin_april = mask_avg(april_tmin,district_shape)
    tmax_april = mask_avg(april_tmax,district_shape)
    rain_april = mask_avg(april_rain,district_shape)

    #print(2)



    return tmin_march,tmax_march,rain_march, tmin_april,tmax_april,rain_april

In [11]:
def closest(year,district_shape):

    import geopandas as gpd
    import xarray as xr
    
    list_march_rain = []
    list_april_rain = []
    list_march_tmin = []
    list_april_tmin = []
    list_march_tmax = []
    list_april_tmax = []


    for i in range(31):
        time_str = f'{year}-03-{i+1:02d}'
        d_tmin_march = ds_tmin.sel(time=time_str)
        d_tmax_march = ds_tmax.sel(time=time_str)
        d_rain_march = ds_rain.sel(time=time_str)
        list_march_tmin.append(d_tmin_march)
        list_march_tmax.append(d_tmax_march)
        list_march_rain.append(d_rain_march)
    
    for i, ds in enumerate(list_march_tmin):
        list_march_tmin[i] = ds.where(ds < 99, np.nan)
    for i, ds in enumerate(list_march_tmax):
        list_march_tmax[i] = ds.where(ds < 99, np.nan)  
    for i, ds in enumerate(list_march_rain):
        list_march_rain[i] = ds.where(ds != -999, np.nan) 

    march_tmin = xr.concat(list_march_tmin, dim='dataset').mean(dim='dataset')
    march_tmax = xr.concat(list_march_tmax, dim='dataset').mean(dim='dataset')
    march_rain = xr.concat(list_march_rain, dim='dataset').mean(dim='dataset')
    tmin_march = values_closest(march_tmin,district_shape,variable_tmin)
    tmax_march = values_closest(march_tmax,district_shape,variable_tmax)
    rain_march = values_closest(march_rain,district_shape,variable_rain)

    for j in range(30):
        time_str = f'{year}-04-{j+1:02d}'
        d_tmin_april = ds_tmin.sel(time=time_str)
        d_tmax_april = ds_tmax.sel(time=time_str)
        d_rain_april = ds_rain.sel(time=time_str)
        list_april_tmin.append(d_tmin_april)
        list_april_tmax.append(d_tmax_april)
        list_april_rain.append(d_rain_april)
    
    for i, ds in enumerate(list_april_tmin):
        list_april_tmin[i] = ds.where(ds < 99, np.nan)
    for i, ds in enumerate(list_april_tmax):
        list_april_tmax[i] = ds.where(ds < 99, np.nan)  
    for i, ds in enumerate(list_april_rain):
        list_april_rain[i] = ds.where(ds != -999, np.nan) 

    april_tmin = xr.concat(list_april_tmin, dim='dataset').mean(dim='dataset')
    april_tmax = xr.concat(list_april_tmax, dim='dataset').mean(dim='dataset')
    april_rain = xr.concat(list_april_rain, dim='dataset').mean(dim='dataset')
    
    tmin_april = values_closest(april_tmin,district_shape,'tmin')
    tmax_april = values_closest(april_tmax,district_shape,'tmax')
    rain_april = values_closest(april_rain,district_shape,'rain')


    return tmin_march,tmax_march,rain_march, tmin_april,tmax_april,rain_april

In [13]:
data = []
for district in dist_list_values:
    adf = my[my.distname==district]
    a_shape = adf.geometry

    for i in range(57):
        year = 1966 + i
        values = avg_value(year,a_shape)
        if not np.isnan(values[0]):
            row_data = {'Year': year, 'District':district, 'tmin_march': values[0], 'tmax_march': values[1], 'rain_march': values[2],
                    'tmin_april': values[3], 'tmax_april': values[4], 'rain_april': values[5]}
            data.append(row_data)
        else:
            values = closest(year,a_shape)
            row_data = {'Year': year, 'District':district, 'tmin_march': values[0], 'tmax_march': values[1], 'rain_march': values[2],
                    'tmin_april': values[3], 'tmax_april': values[4], 'rain_april': values[5]}
            data.append(row_data)
            
        #print(i)
df = pd.DataFrame(data)


  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid = district_shape.centroid

  centroid 

In [14]:
df

Unnamed: 0,Year,District,tmin_march,tmax_march,rain_march,tmin_april,tmax_april,rain_april
0,1966,Ambedkar Nagar,15.380322548650927,33.51032275538291,0.0,22.08166650136312,39.68033294677734,0.0
1,1967,Ambedkar Nagar,13.927096705282889,30.376774018810643,1.7050276840886762,20.926333395640054,36.87700030008952,0.7340999881426493
2,1968,Ambedkar Nagar,15.461935474026587,32.100645249889745,0.040495184160047965,20.814000129699707,37.47766672770182,0.0
3,1969,Ambedkar Nagar,17.11129016260947,35.44225791192824,0.0,22.21099999745687,39.36333338419596,0.41436629692713417
4,1970,Ambedkar Nagar,15.454193545926001,31.706129258678807,0.26042112227409114,21.85000009536743,39.08499972025553,0.0
...,...,...,...,...,...,...,...,...
4270,2018,Allahabad,20.802032,36.479344,0.012335,24.534117,39.073861,1.645486
4271,2019,Allahabad,21.23177,36.156461,0.005734,24.562214,40.184507,0.871625
4272,2020,Allahabad,20.505407,34.554676,1.250154,23.242434,38.432839,2.094432
4273,2021,Allahabad,20.174568,36.291604,0.356478,23.780184,38.636488,0.902982


In [15]:
df.to_csv(r"C:\Users\Kanishk Goyal\OneDrive - IIT Kanpur\Desktop\Prof. Hamim Zafar\Data\temp&rain\final.csv", index=False)