In [2]:
from netCDF4 import Dataset
from config import DATA_DOWNLOAD_URL
from shapely.geometry import Point
import geopandas
from shapely.prepared import prep
import json
import os
import numpy as np
from datetime import datetime, timedelta

In [None]:
file = Dataset(f"{DATA_DOWNLOAD_URL}raw/ERA5_VPD_at_Tmax_2000.nc4")

In [44]:
file

<class 'netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    description: Original data from ERA5
    creation_date: 20210125T114114Z
    author: Carlo Montes, CIMMYT, c.montes@cgiar.org
    dimensions(sizes): lon(1440), lat(601), doy(366)
    variables(dimensions): float32 vpd(doy, lat, lon), float32 lon(lon), float32 lat(lat), float32 doy(doy)
    groups: 

In [None]:
file.variables["vpd"][:][0].shape #file.variables["vpd"][:][x] = year from 0 to 365 ;

(601, 1440)

In [None]:
#file.variables["vpd"][:][x] is shape : 
#latitude[0][.. .. .. .. longitude from -180 to 180 .. .. .. ..]
#latitude[1][.. .. .. .. longitude from -180 to 180 .. .. .. ..]
#...
#latitude[601][.. .. .. .. longitude from -180 to 180 .. .. .. ..]

In [None]:
#flattened data (for each year) is : [data[lat0, lon0], data[lat0, lon1], data[lat0, lon2], ... data[lat1, lon0], data[lat1, lon1], ..., data[lat601, lon1440]]

In [None]:
#We make list of point and the index corresponding following the definition of our flattened data
pointsDict = {}
points = []
compteur = 0
for lat in file.variables["lat"][:]:
    for lon in file.variables["lon"][:]:
        pointsDict[Point(lon, lat)] = compteur #make a dict with points as index and future flattened data index as value
        points.append(Point(lon, lat))
        compteur += 1

In [18]:
geo = geopandas.read_file("data/geojsonfrance_corse_20.json") #get polygon values for each french dep
geo["code"] = geo["code"].astype(int)
geo = geo.sort_values(by="code").reset_index(drop=True)

In [None]:
data = file.variables["vpd"][:].filled(np.nan)

In [None]:
for coords_file in os.listdir("data/coords/"):
    resList = []
    with open(f"data/coords/{coords_file}") as f:
        indexes_dep = json.load(f)
    for vpd_file in os.listdir(f"{DATA_DOWNLOAD_URL}raw/"):
        file = Dataset(f"{DATA_DOWNLOAD_URL}raw/{vpd_file}")
        year = vpd_file.split("_")[4].split(".")[0]
        date = datetime(int(year), 1, 1)
        data = file.variables["vpd"][:].filled(np.nan)
        for i in range(len(data)): # for each year in dataset
            data_flatten = data[i].flatten()
            if indexes_dep["valid_index"]: #check if there is at least an index for this dep
                datas = data_flatten[indexes_dep["valid_index"]]
                mean_datas = np.nanmean(datas) #ignore nan values
            else:
                mean_datas = np.nan
            resList.append({"dep": indexes_dep["name"], "date": date.strftime("%Y-%m-%d"), "vpd": float(mean_datas)})
            date = date + timedelta(days=1)
    with open(f"{DATA_DOWNLOAD_URL}dailyDepMean/{indexes_dep["name"]}.json", "w") as outfile:
        outfile.write(json.dumps(resList))


In [39]:
for _, dep in geo.iterrows():
    prepared = prep(dep["geometry"])
    valid_points = []
    valid_points.extend(filter(prepared.contains, pointsDict))
    valid_indices = [pointsDict[point] for point in valid_points if point in points]
    res = {"name": dep["nom"], "valid_index": valid_indices}
    with open(f"data/coords/{dep["code"]}-{dep["nom"]}.json", "w") as outfile: 
        json.dump(res, outfile)