In [1]:
import pandas as pd

In [2]:
pip install openpyxl

Note: you may need to restart the kernel to use updated packages.


## Load prepared weather dataset

In [3]:
weather_data_url = "https://drive.google.com/file/d/1chmMsKkMnzE1Xi347Za9ZWyMtuOpTe3T/view?usp=sharing"
weather_data_download_url = 'https://drive.google.com/uc?id=' + weather_data_url.split('/')[-2]

weather_data_download_url

'https://drive.google.com/uc?id=1chmMsKkMnzE1Xi347Za9ZWyMtuOpTe3T'

In [4]:
weather = pd.read_csv(weather_data_download_url)

In [5]:
weather

Unnamed: 0,SCODE,eastBoundingCoordinate,northBoundingCoordinate,altitudeMinimum,YEAR,MONTH,MONTH_NORMALIZED,DAY,DAY_NORMALIZED,Date,TAXA,SUBST,Ref_SUBST,METHOD_CODE,LAB_METHOD,VALUE,UNIT,LISTSUB
0,,14442,47842,893,1995,10,10,4,4,19951004,,tas,Near-Surface Air Temperature,METH.tas,Kroneis NTC,1359,°C,CORDEX
1,,14442,47842,893,1995,10,10,5,5,19951005,,tas,Near-Surface Air Temperature,METH.tas,Kroneis NTC,1411,°C,CORDEX
2,,14442,47842,893,1995,10,10,6,6,19951006,,tas,Near-Surface Air Temperature,METH.tas,Kroneis NTC,1356,°C,CORDEX
3,,14442,47842,893,1995,10,10,7,7,19951007,,tas,Near-Surface Air Temperature,METH.tas,Kroneis NTC,1327,°C,CORDEX
4,,14442,47842,893,1995,10,10,8,8,19951008,,tas,Near-Surface Air Temperature,METH.tas,Kroneis NTC,1368,°C,CORDEX
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74193,,14442,47842,893,2016,12,12,28,28,20161228,,WindDir,Wind direction,METH.WindDir,"Kroneis, 263PRH",32762,DEGREES,OWN
74194,,14442,47842,893,2016,12,12,29,29,20161229,,WindDir,Wind direction,METH.WindDir,"Kroneis, 263PRH",29099,DEGREES,OWN
74195,,14442,47842,893,2016,12,12,30,30,20161230,,WindDir,Wind direction,METH.WindDir,"Kroneis, 263PRH",30896,DEGREES,OWN
74196,,14442,47842,893,2016,12,12,31,31,20161231,,WindDir,Wind direction,METH.WindDir,"Kroneis, 263PRH",25114,DEGREES,OWN


In [6]:
weather['time'] = pd.to_datetime(weather['Date'].astype(str), format='%Y%m%d')

In [7]:
weather

Unnamed: 0,SCODE,eastBoundingCoordinate,northBoundingCoordinate,altitudeMinimum,YEAR,MONTH,MONTH_NORMALIZED,DAY,DAY_NORMALIZED,Date,TAXA,SUBST,Ref_SUBST,METHOD_CODE,LAB_METHOD,VALUE,UNIT,LISTSUB,time
0,,14442,47842,893,1995,10,10,4,4,19951004,,tas,Near-Surface Air Temperature,METH.tas,Kroneis NTC,1359,°C,CORDEX,1995-10-04
1,,14442,47842,893,1995,10,10,5,5,19951005,,tas,Near-Surface Air Temperature,METH.tas,Kroneis NTC,1411,°C,CORDEX,1995-10-05
2,,14442,47842,893,1995,10,10,6,6,19951006,,tas,Near-Surface Air Temperature,METH.tas,Kroneis NTC,1356,°C,CORDEX,1995-10-06
3,,14442,47842,893,1995,10,10,7,7,19951007,,tas,Near-Surface Air Temperature,METH.tas,Kroneis NTC,1327,°C,CORDEX,1995-10-07
4,,14442,47842,893,1995,10,10,8,8,19951008,,tas,Near-Surface Air Temperature,METH.tas,Kroneis NTC,1368,°C,CORDEX,1995-10-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74193,,14442,47842,893,2016,12,12,28,28,20161228,,WindDir,Wind direction,METH.WindDir,"Kroneis, 263PRH",32762,DEGREES,OWN,2016-12-28
74194,,14442,47842,893,2016,12,12,29,29,20161229,,WindDir,Wind direction,METH.WindDir,"Kroneis, 263PRH",29099,DEGREES,OWN,2016-12-29
74195,,14442,47842,893,2016,12,12,30,30,20161230,,WindDir,Wind direction,METH.WindDir,"Kroneis, 263PRH",30896,DEGREES,OWN,2016-12-30
74196,,14442,47842,893,2016,12,12,31,31,20161231,,WindDir,Wind direction,METH.WindDir,"Kroneis, 263PRH",25114,DEGREES,OWN,2016-12-31


In [8]:
# Define a custom function to combine columns
def compute_lat_mean(row):
    return float(str(row['eastBoundingCoordinate']).replace(",","."))
def compute_lon_mean(row):
    return float(str(row['northBoundingCoordinate']).replace(",","."))

def compute_value_mean(row):
    return float(str(row['VALUE']).replace(",","."))

In [9]:
# Apply the custom function to create a new column 'latitude_mean'
weather['eastBoundingCoordinate'] = weather.apply(compute_lat_mean, axis=1)

In [10]:
weather['northBoundingCoordinate'] = weather.apply(compute_lon_mean, axis=1)

In [11]:
weather['VALUE'] = weather.apply(compute_value_mean, axis=1)

### Create Cube and Save to native grid

In [14]:
import xarray as xr

In [15]:
weather["Ref_SUBST"].unique()

array(['Near-Surface Air Temperature',
       'Daily Maximum Near-Surface Air Temperature',
       'Daily Minimum Near-Surface Air Temperature', 'Precipitation',
       'Near-Surface Relative Humidity', 'Global radiation',
       'Duration of Sunshine', 'Surface Air Pressure',
       'Near-Surface Wind Speed', 'Wind direction'], dtype=object)

In [16]:
lst_array = []
for var in weather["Ref_SUBST"].unique():
    lst_array.append(weather.loc[weather['Ref_SUBST'] == var].rename(columns={"eastBoundingCoordinate": "lon", "northBoundingCoordinate": "lat"}).groupby(['time', 'lat', 'lon'])[['VALUE']].mean().rename(columns={"VALUE": var.replace(" ", "_")}).to_xarray())

xr.merge(lst_array).to_netcdf("weather.nc")

In [17]:
dset = xr.open_dataset("weather.nc")
dset

## MGRS 100m grid

In [18]:
import mgrs

In [19]:
# Define a custom function to combine columns
# Precision	Size of grid side
# 5	1m
# 4	10m
# 3	100m
# 2	1000m (1km)
# 1	10000m (10km)
# 0	100000m (100km)
def to_mgrs_compute(row):
    return mgrs.MGRS().toMGRS(row['northBoundingCoordinate'], row['eastBoundingCoordinate'],4)
# Define a custom function to combine columns
def from_mgrs_compute_lat(row):
    return mgrs.MGRS().toLatLon(row['MGRS'])[0]
# Define a custom function to combine columns
def from_mgrs_compute_lon(row):
    return mgrs.MGRS().toLatLon(row['MGRS'])[1]

In [20]:
# Apply the custom function to create a new column 'MGRS'
weather['MGRS'] = weather.apply(to_mgrs_compute, axis=1)

In [21]:
weather['lon'] = weather.apply(from_mgrs_compute_lon, axis=1)
weather['lat'] = weather.apply(from_mgrs_compute_lat, axis=1)

In [22]:
weather

Unnamed: 0,SCODE,eastBoundingCoordinate,northBoundingCoordinate,altitudeMinimum,YEAR,MONTH,MONTH_NORMALIZED,DAY,DAY_NORMALIZED,Date,...,Ref_SUBST,METHOD_CODE,LAB_METHOD,VALUE,UNIT,LISTSUB,time,MGRS,lon,lat
0,,14.442,47.842,893,1995,10,10,4,4,19951004,...,Near-Surface Air Temperature,METH.tas,Kroneis NTC,13.59,°C,CORDEX,1995-10-04,33TVN5824898890,14.441989,47.841999
1,,14.442,47.842,893,1995,10,10,5,5,19951005,...,Near-Surface Air Temperature,METH.tas,Kroneis NTC,14.11,°C,CORDEX,1995-10-05,33TVN5824898890,14.441989,47.841999
2,,14.442,47.842,893,1995,10,10,6,6,19951006,...,Near-Surface Air Temperature,METH.tas,Kroneis NTC,13.56,°C,CORDEX,1995-10-06,33TVN5824898890,14.441989,47.841999
3,,14.442,47.842,893,1995,10,10,7,7,19951007,...,Near-Surface Air Temperature,METH.tas,Kroneis NTC,13.27,°C,CORDEX,1995-10-07,33TVN5824898890,14.441989,47.841999
4,,14.442,47.842,893,1995,10,10,8,8,19951008,...,Near-Surface Air Temperature,METH.tas,Kroneis NTC,13.68,°C,CORDEX,1995-10-08,33TVN5824898890,14.441989,47.841999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74193,,14.442,47.842,893,2016,12,12,28,28,20161228,...,Wind direction,METH.WindDir,"Kroneis, 263PRH",327.62,DEGREES,OWN,2016-12-28,33TVN5824898890,14.441989,47.841999
74194,,14.442,47.842,893,2016,12,12,29,29,20161229,...,Wind direction,METH.WindDir,"Kroneis, 263PRH",290.99,DEGREES,OWN,2016-12-29,33TVN5824898890,14.441989,47.841999
74195,,14.442,47.842,893,2016,12,12,30,30,20161230,...,Wind direction,METH.WindDir,"Kroneis, 263PRH",308.96,DEGREES,OWN,2016-12-30,33TVN5824898890,14.441989,47.841999
74196,,14.442,47.842,893,2016,12,12,31,31,20161231,...,Wind direction,METH.WindDir,"Kroneis, 263PRH",251.14,DEGREES,OWN,2016-12-31,33TVN5824898890,14.441989,47.841999


## Convert to Xarray to get datacube

In [23]:
weather["Ref_SUBST"].unique()

array(['Near-Surface Air Temperature',
       'Daily Maximum Near-Surface Air Temperature',
       'Daily Minimum Near-Surface Air Temperature', 'Precipitation',
       'Near-Surface Relative Humidity', 'Global radiation',
       'Duration of Sunshine', 'Surface Air Pressure',
       'Near-Surface Wind Speed', 'Wind direction'], dtype=object)

In [24]:
lst_array = []
for var in weather["Ref_SUBST"].unique():
    lst_array.append(weather.loc[weather['Ref_SUBST'] == var].groupby(['time', 'lat', 'lon'])[['VALUE']].mean().rename(columns={"VALUE": var.replace(" ", "_")}).to_xarray())



In [25]:
import xarray as xr

In [26]:
xr.merge(lst_array).to_netcdf("weather_MGRS100.nc")

In [27]:
dset = xr.open_dataset("weather_MGRS100.nc")

In [28]:
dset

# Adding attributes: 
- See https://docs.xarray.dev/en/latest/generated/xarray.DataArray.assign_attrs.html