In [87]:
from netCDF4 import Dataset
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
import geopandas as gpd

# Climate projections

In [52]:
#Climate projections (rsds)
#Read netCDF file
data = Dataset(r'data.nc')
rsds = data.variables['rsds']
lat = data.variables['lat']
lon = data.variables['lon']

#Read needed indexes
df_coor = pd.read_csv('coors_inds.csv')
#Modife indexes to use in slice segment of the NetCDF file
df_coor['lat_ind-lat_ind_min'] = df_coor['lat_ind'] - df_coor['lat_ind'].min()
df_coor['lon_ind-lon_ind_min'] = df_coor['lon_ind'] - df_coor['lon_ind'].min()

#Cut needed segment of the NetCDF file
lat_ind_min = df_coor['lat_ind'].min()
lat_ind_max = df_coor['lat_ind'].max()
lon_ind_min = df_coor['lon_ind'].min()
lon_ind_max = df_coor['lon_ind'].max()
rsds_cut = rsds[:,lat_ind_min:lat_ind_max+1,lon_ind_min:lon_ind_max+1]

#Write rsds_cut into Dataframe
date_range = pd.date_range(start = '2006-01-01', end = '2010-12-31')
df = pd.DataFrame(index = date_range)
for i in range(len(df_coor)):
    lat_coor = round(float(lat[df_coor.iloc[i,0],df_coor.iloc[i,1]]), 3)
    lon_coor = round(float(lon[df_coor.iloc[i,0],df_coor.iloc[i,1]]), 3)
    df[f'{lat_coor}_{lon_coor}'] = rsds_cut[:,df_coor.iloc[i,2],df_coor.iloc[i,3]]

df.to_csv('rsds_test.csv')



# ERA5_Land

In [94]:
#ERA5_Land. PRECIPITATION (tp)
#Read netCDF file
data = Dataset(r'netCDF/tp/adaptor.mars.internal-1627375828.403961-28083-10-198623cc-e22b-4c80-96d2-e5747a54029b.nc')
tp = data.variables['tp']
lat = data.variables['latitude']
lon = data.variables['longitude']
time = data.variables['time']
#Завантажуємо дані в оперативну пам'ять, щоби код працював швидке
tp = tp[:,:,:]

#Створюємо пусти датафрейм із індеком, що відповідає часу в netCDF файлі
df = pd.DataFrame(index = time[:])
#Визначеємо кіклькість точок по широті та долготі
lat_nodes = len(lat)
lon_nodes = len(lon)
#Створюємо датафрейм з колонками, що відповідаються координатам
for i in range(lat_nodes):
    for j in range(lon_nodes):
        lat_coor = round(float(lat[i]), 1)
        lon_coor = round(float(lon[j]), 1)
        df[f'{lat_coor}_{lon_coor}'] = (tp[:,i,j] * 1000).round(2) #Переводимо м у мм
df.index = pd.to_timedelta(df.index, unit='H') + datetime(1900, 1, 1)
df_pcp = df[df.index.hour == 0]
df_pcp.index = df_pcp.index - timedelta(days=1)

#Замінюємо missing values на пусті значення
df_pcp = df_pcp.replace(-32767,np.nan)

#Обрізаємо зібрані дані по контуру басейнів України
#Create geodataframe of ERA5_Land nodes
lat, lon = [],[]
columns = df_pcp.columns.tolist()
for el in columns:
    lat.append(el.split('_')[0])
    lon.append(el.split('_')[1])
df_ERA5_land_nodes = pd.DataFrame({'lat':lat, 'lon':lon})
gdf_ERA5_land_nodes = gpd.GeoDataFrame(df_ERA5_land_nodes, crs="EPSG:4326", 
                                  geometry=gpd.points_from_xy(df_ERA5_land_nodes.lon,df_ERA5_land_nodes.lat))

#Define ERA5_Land nodes within UA basins
gdf_all_basins = gpd.read_file('C:/Users/User/Geospatial coding/GIS/Borders_basin.shp')
gdf_all_basins = gdf_all_basins.to_crs(epsg=4326)
ERA5_land_nodes_basins = gpd.sjoin(gdf_ERA5_land_nodes, gdf_all_basins, op='within')

#Define columns names within UA_basins
cols_ERA5_land = (ERA5_land_nodes_basins['lat'].astype('str') + '_' + ERA5_land_nodes_basins['lon'].astype('str')).tolist()

df_pcp_cut = df_pcp[cols_ERA5_land]
df_pcp_cut.to_csv('ERA5_Land/pcp_era5_land.csv')

In [98]:
#ERA5_Land. SOLAR RADIATION (ssrd)
#Read netCDF file
data = Dataset(r'netCDF/rsds/adaptor.mars.internal-1627380447.5173392-25370-3-0cadde8b-a272-4ace-af7b-3f44eba85ea2.nc')
ssrd = data.variables['ssrd']
lat = data.variables['latitude']
lon = data.variables['longitude']
time = data.variables['time']
ssrd = ssrd[:,:,:]

df = pd.DataFrame(index = time[:])
lat_nodes = len(lat)
lon_nodes = len(lon)
for i in range(lat_nodes):
    for j in range(lon_nodes):
        lat_coor = round(float(lat[i]), 1)
        lon_coor = round(float(lon[j]), 1)
        df[f'{lat_coor}_{lon_coor}'] = (ssrd[:,i,j] * 0.000001).round(2) #Переводимо J/m2 на MJ/m2
df.index = pd.to_timedelta(df.index, unit='H') + datetime(1900, 1, 1)
df_ssrd = df[df.index.hour == 0]
df_ssrd.index = df_ssrd.index - timedelta(days=1)

df_ssrd = df_ssrd.replace(-32767,np.nan)

#Create geodataframe of ERA5_Land nodes
lat, lon = [],[]
columns = df_ssrd.columns.tolist()
for el in columns:
    lat.append(el.split('_')[0])
    lon.append(el.split('_')[1])
df_ERA5_land_nodes = pd.DataFrame({'lat':lat, 'lon':lon})
gdf_ERA5_land_nodes = gpd.GeoDataFrame(df_ERA5_land_nodes, crs="EPSG:4326", 
                                  geometry=gpd.points_from_xy(df_ERA5_land_nodes.lon,df_ERA5_land_nodes.lat))

#Define ERA5_Land nodes within UA basins
gdf_all_basins = gpd.read_file('C:/Users/User/Geospatial coding/GIS/Borders_basin.shp')
gdf_all_basins = gdf_all_basins.to_crs(epsg=4326)
ERA5_land_nodes_basins = gpd.sjoin(gdf_ERA5_land_nodes, gdf_all_basins, op='within')

#Define columns names within UA_basins
cols_ERA5_land = (ERA5_land_nodes_basins['lat'].astype('str') + '_' + ERA5_land_nodes_basins['lon'].astype('str')).tolist()

df_ssrd_cut = df_ssrd[cols_ERA5_land]
df_ssrd_cut.to_csv('ERA5_Land/ssrd_era5_land.csv')

In [100]:
#ERA5_Land. AIR TEMPERATURE (t2m)
#Read netCDF file
data = Dataset(r'netCDF/tmp/adaptor.mars.internal-1627397947.7911344-16200-12-142649c4-3484-4428-a305-b07909a16e7b.nc')
t2m = data.variables['t2m']
lat = data.variables['latitude']
lon = data.variables['longitude']
time = data.variables['time']
t2m = t2m[:,:,:]

df = pd.DataFrame(index = time[:])
lat_nodes = len(lat)
lon_nodes = len(lon)
for i in range(lat_nodes):
    for j in range(lon_nodes):
        lat_coor = round(float(lat[i]), 1)
        lon_coor = round(float(lon[j]), 1)
        df[f'{lat_coor}_{lon_coor}'] = (t2m[:,i,j] - 273.15).round(2) #Переводимо K на цельсій
df.index = pd.to_timedelta(df.index, unit='H') + datetime(1900, 1, 1)

df = df.replace(-32767,np.nan)
df_t2m = df.resample('D').mean().round(2)

#Create geodataframe of ERA5_Land nodes
lat, lon = [],[]
columns = df_t2m.columns.tolist()
for el in columns:
    lat.append(el.split('_')[0])
    lon.append(el.split('_')[1])
df_ERA5_land_nodes = pd.DataFrame({'lat':lat, 'lon':lon})
gdf_ERA5_land_nodes = gpd.GeoDataFrame(df_ERA5_land_nodes, crs="EPSG:4326", 
                                  geometry=gpd.points_from_xy(df_ERA5_land_nodes.lon,df_ERA5_land_nodes.lat))

#Define ERA5_Land nodes within UA basins
gdf_all_basins = gpd.read_file('C:/Users/User/Geospatial coding/GIS/Borders_basin.shp')
gdf_all_basins = gdf_all_basins.to_crs(epsg=4326)
ERA5_land_nodes_basins = gpd.sjoin(gdf_ERA5_land_nodes, gdf_all_basins, op='within')

#Define columns names within UA_basins
cols_ERA5_land = (ERA5_land_nodes_basins['lat'].astype('str') + '_' + ERA5_land_nodes_basins['lon'].astype('str')).tolist()

df_t2m_cut = df_t2m[cols_ERA5_land]
df_t2m_cut.to_csv('ERA5_Land/t2m_era5_land.csv')

11.498019992605716

In [None]:
#ERA5_Land. SNOW DEPTH (sde)
#Read netCDF file
data = Dataset(r'netCDF/snow_depth/adaptor.mars.internal-1627397929.140722-25027-12-b8cb5004-2d00-40fe-945d-9c40d2433e7b.nc')
sde = data.variables['sde']
lat = data.variables['latitude']
lon = data.variables['longitude']
time = data.variables['time']
sde = sde[:,:,:]

df = pd.DataFrame(index = time[:])
lat_nodes = len(lat)
lon_nodes = len(lon)
for i in range(lat_nodes):
    for j in range(lon_nodes):
        lat_coor = round(float(lat[i]), 1)
        lon_coor = round(float(lon[j]), 1)
        df[f'{lat_coor}_{lon_coor}'] = (sde[:,i,j] * 1000).round() #Переводимо m у mm
df.index = pd.to_timedelta(df.index, unit='H') + datetime(1900, 1, 1)

df = df.replace(-32767,np.nan)
df_sde = df.resample('D').mean().round().astype('int')

#Create geodataframe of ERA5_Land nodes
lat, lon = [],[]
columns = df_sde.columns.tolist()
for el in columns:
    lat.append(el.split('_')[0])
    lon.append(el.split('_')[1])
df_ERA5_land_nodes = pd.DataFrame({'lat':lat, 'lon':lon})
gdf_ERA5_land_nodes = gpd.GeoDataFrame(df_ERA5_land_nodes, crs="EPSG:4326", 
                                  geometry=gpd.points_from_xy(df_ERA5_land_nodes.lon,df_ERA5_land_nodes.lat))

#Define ERA5_Land nodes within UA basins
gdf_all_basins = gpd.read_file('C:/Users/User/Geospatial coding/GIS/Borders_basin.shp')
gdf_all_basins = gdf_all_basins.to_crs(epsg=4326)
ERA5_land_nodes_basins = gpd.sjoin(gdf_ERA5_land_nodes, gdf_all_basins, op='within')

#Define columns names within UA_basins
cols_ERA5_land = (ERA5_land_nodes_basins['lat'].astype('str') + '_' + ERA5_land_nodes_basins['lon'].astype('str')).tolist()

df_sde_cut = df_sde[cols_ERA5_land]
df_sde_cut.to_csv('ERA5_Land/snow_depth_era5_land.csv')

In [110]:
#ERA5_Land. WIND (u10, v10)
#Read netCDF file
data = Dataset(r'netCDF/wind/adaptor.mars.internal-1627399456.3568714-16544-11-8dc58c07-2d21-4c72-92dd-2e086bc39e9a.nc')
u10 = data.variables['u10']
v10 = data.variables['v10']
lat = data.variables['latitude']
lon = data.variables['longitude']
time = data.variables['time']
u10 = u10[:,:,:]
v10 = v10[:,:,:]

df = pd.DataFrame(index = time[:])
lat_nodes = len(lat)
lon_nodes = len(lon)
for i in range(lat_nodes):
    for j in range(lon_nodes):
        lat_coor = round(float(lat[i]), 1)
        lon_coor = round(float(lon[j]), 1)
        df[f'{lat_coor}_{lon_coor}'] = ((u10[:,i,j]**2 + v10[:,i,j]**2)**0.5).round(1) 
df.index = pd.to_timedelta(df.index, unit='H') + datetime(1900, 1, 1)

df = df.replace(-32767,np.nan)
df_w10 = df.resample('D').mean().round(1)

#Create geodataframe of ERA5_Land nodes
lat, lon = [],[]
columns = df_w10.columns.tolist()
for el in columns:
    lat.append(el.split('_')[0])
    lon.append(el.split('_')[1])
df_ERA5_land_nodes = pd.DataFrame({'lat':lat, 'lon':lon})
gdf_ERA5_land_nodes = gpd.GeoDataFrame(df_ERA5_land_nodes, crs="EPSG:4326", 
                                  geometry=gpd.points_from_xy(df_ERA5_land_nodes.lon,df_ERA5_land_nodes.lat))

#Define ERA5_Land nodes within UA basins
gdf_all_basins = gpd.read_file('C:/Users/User/Geospatial coding/GIS/Borders_basin.shp')
gdf_all_basins = gdf_all_basins.to_crs(epsg=4326)
ERA5_land_nodes_basins = gpd.sjoin(gdf_ERA5_land_nodes, gdf_all_basins, op='within')

#Define columns names within UA_basins
cols_ERA5_land = (ERA5_land_nodes_basins['lat'].astype('str') + '_' + ERA5_land_nodes_basins['lon'].astype('str')).tolist()

df_w10_cut = df_w10[cols_ERA5_land]
df_w10_cut.to_csv('ERA5_Land/wind_era5_land.csv')

Unnamed: 0,51.0_30.0,51.0_30.1,51.0_30.2,51.0_30.3,51.0_30.4,51.0_30.5,51.0_30.6,51.0_30.7,51.0_30.8,51.0_30.9,...,50.0_30.1,50.0_30.2,50.0_30.3,50.0_30.4,50.0_30.5,50.0_30.6,50.0_30.7,50.0_30.8,50.0_30.9,50.0_31.0
1981-01-01,1,0,0,0,0,4,9,4,1,1,...,2,2,2,2,2,2,2,2,4,6
1981-01-02,4,3,3,2,1,5,10,5,2,2,...,3,3,2,2,2,2,2,2,4,6
1981-01-03,9,8,8,7,2,7,15,9,6,6,...,6,5,4,4,4,4,4,4,5,7
1981-01-04,34,33,32,26,7,18,41,35,32,32,...,25,24,23,22,21,20,19,19,20,21
1981-01-05,35,34,34,28,7,20,46,39,36,36,...,27,26,24,23,23,22,21,21,22,24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1981-12-27,261,255,247,199,49,118,276,247,233,236,...,238,241,237,237,235,234,232,229,232,233
1981-12-28,257,251,243,195,48,116,271,242,229,232,...,235,237,233,233,231,230,229,226,228,229
1981-12-29,255,249,240,193,47,114,268,238,226,228,...,232,234,230,230,228,227,225,222,225,226
1981-12-30,241,234,224,179,44,110,257,224,209,212,...,208,210,206,205,203,202,201,198,203,206


In [121]:
#ERA5_Land. DEWPOINT TEMPERATURE (d2m)
#Read netCDF file
data = Dataset(r'netCDF/tmp_dewpoint/adaptor.mars.internal-1627398082.5712051-19284-11-85f545f1-e949-4332-969c-376c92ee2305.nc')
d2m = data.variables['d2m']
lat = data.variables['latitude']
lon = data.variables['longitude']
time = data.variables['time']
d2m = d2m[:,:,:]

df = pd.DataFrame(index = time[:])
lat_nodes = len(lat)
lon_nodes = len(lon)
for i in range(lat_nodes):
    for j in range(lon_nodes):
        lat_coor = round(float(lat[i]), 1)
        lon_coor = round(float(lon[j]), 1)
        df[f'{lat_coor}_{lon_coor}'] = (d2m[:,i,j] - 273.15).round(2) #Переводимо K на цельсій
df.index = pd.to_timedelta(df.index, unit='H') + datetime(1900, 1, 1)

df = df.replace(-32767,np.nan)
df_d2m = df.resample('D').mean().round(2)

#Create geodataframe of ERA5_Land nodes
lat, lon = [],[]
columns = df_d2m.columns.tolist()
for el in columns:
    lat.append(el.split('_')[0])
    lon.append(el.split('_')[1])
df_ERA5_land_nodes = pd.DataFrame({'lat':lat, 'lon':lon})
gdf_ERA5_land_nodes = gpd.GeoDataFrame(df_ERA5_land_nodes, crs="EPSG:4326", 
                                  geometry=gpd.points_from_xy(df_ERA5_land_nodes.lon,df_ERA5_land_nodes.lat))

#Define ERA5_Land nodes within UA basins
gdf_all_basins = gpd.read_file('C:/Users/User/Geospatial coding/GIS/Borders_basin.shp')
gdf_all_basins = gdf_all_basins.to_crs(epsg=4326)
ERA5_land_nodes_basins = gpd.sjoin(gdf_ERA5_land_nodes, gdf_all_basins, op='within')

#Define columns names within UA_basins
cols_ERA5_land = (ERA5_land_nodes_basins['lat'].astype('str') + '_' + ERA5_land_nodes_basins['lon'].astype('str')).tolist()

df_d2m_cut = df_d2m[cols_ERA5_land]
df_d2m_cut.to_csv('ERA5_Land/d2m_era5_land.csv')

51.0_30.0    3.611507
51.0_30.1    3.602192
51.0_30.2    3.582466
51.0_30.3    3.566849
51.0_30.4    3.549041
               ...   
50.0_30.6    2.942466
50.0_30.7    2.968767
50.0_30.8    2.988219
50.0_30.9    2.976438
50.0_31.0    2.951233
Length: 121, dtype: float64