In [1]:
from netCDF4 import Dataset
import numpy as np
import pandas as pd
from shapely.geometry import Point
from shapely.prepared import prep
import geopandas
from config import NDVI_DATA_URL
import os
from datetime import datetime, timedelta

In [2]:
data_index = 0
points = dict()
latitude = Dataset(f"{NDVI_DATA_URL}/yearly_subsample/1981.nc").variables["latitude"][:]
longitude = Dataset(f"{NDVI_DATA_URL}/yearly_subsample/1981.nc").variables["longitude"][:]
#for every lat and lon, we make a dict of index POINT(lon, lat) and value the index of the data associated with this point
for lat in latitude:
    for lon in longitude:
        points[Point(lon, lat)] = data_index
        data_index +=1

In [3]:
geo = geopandas.read_file("geojsonfrance_corse_20.json") #read france departement geometries
geo["code"] = geo["code"].astype(int)
geo = geo.sort_values(by="code").reset_index(drop=True)

In [None]:
result = []
for file in sorted(os.listdir(f"{NDVI_DATA_URL}/yearly_subsample/")):
    ds = Dataset(f"{NDVI_DATA_URL}/yearly_subsample/{file}")
    data = ds.variables["NDVI"][:]
    days = data.shape[0]
    first_date = datetime(1981, 1, 1)
    year = file[:4]
    print(year)
    for day in range(days):
        date = (first_date + timedelta(days=int(ds.variables["time"][day].data.item()))).strftime("%Y-%m-%d")
        daily_flattened = data[day].flatten()
        daily_data = daily_flattened.filled(np.nan)
        for _, dep in geo.iterrows():
            if not np.all(np.isnan(daily_data)): #check if full array is not nan
                prepared = prep(dep["geometry"]) #use prep for batch operations
                valid_points = []
                valid_points.extend(filter(prepared.contains, points)) #find POINTS in dep
                valid_indices = [points[point] for point in valid_points if point in points] #make a list of valid points that are in the dep
                ndvi = daily_data[valid_indices]
                mean_ndvi = np.nanmean(ndvi)
            else:
                mean_ndvi = np.nan
            result.append({"date": date, "departement": dep["nom"], "dep": dep["code"], "ndvi_mean": float(mean_ndvi)})
    df = pd.DataFrame(result)
    df.to_csv(f"{NDVI_DATA_URL}/{year}.csv")