In [1]:
from netCDF4 import Dataset
import numpy as np
import pandas as pd
from shapely.geometry import Point
from shapely.prepared import prep
import geopandas
from config import NDVI_DATA_URL
import os
from datetime import datetime, timedelta

In [15]:
data_index = 0
points = dict()
latitude = Dataset(f"{NDVI_DATA_URL}/yearly_aggregated/1981.nc").variables["latitude"][:]
longitude = Dataset(f"{NDVI_DATA_URL}/yearly_aggregated/1981.nc").variables["longitude"][:]
#for every lat and lon, we make a dict of index POINT(lon, lat) and value the index of the data associated with this point
for lat in latitude:
    for lon in longitude:
        points[Point(lon, lat)] = data_index
        data_index +=1

In [14]:
geo = geopandas.read_file("geojsonfrance_corse_20.json") #read france departement geometries
geo["code"] = geo["code"].astype(int)
geo = geo.sort_values(by="code").reset_index(drop=True)

In [17]:
years = [year for year in os.listdir(f"{NDVI_DATA_URL}/")]
files = [f"{year}/{file}" for year in years for file in os.listdir(f"{NDVI_DATA_URL}/{year}") if file.endswith(".nc")]

In [None]:
result = []
for file in files:
    data = Dataset(f"{NDVI_DATA_URL}/{file}").variables["NDVI"][:]
    date = file[37:45] #get date from file name
    data_flattened = data.flatten() #flatten data
    for _, dep in geo.iterrows():
        prepared = prep(dep["geometry"]) #use prep for batch operations
        valid_points = []
        valid_points.extend(filter(prepared.contains, points)) #find POINTS in dep
        valid_indices = [points[point] for point in valid_points if point in points] #make a list of valid points that are in the dep
        ndvi = data_flattened[valid_indices]
        ndvi = ndvi.filled(np.nan)
        if not np.all(np.isnan(ndvi)): #check if full array is not nan
            mean_ndvi = np.nanmean(ndvi)
        else:
            mean_ndvi = np.nan

        result.append({"date": date, "departement": dep["nom"], "dep": dep["code"], "ndvi_mean": float(mean_ndvi)})
    break
result

[{'date': '19810624', 'departement': 'Ain', 'dep': 1, 'ndvi_mean': nan},
 {'date': '19810624', 'departement': 'Aisne', 'dep': 2, 'ndvi_mean': nan},
 {'date': '19810624', 'departement': 'Allier', 'dep': 3, 'ndvi_mean': nan},
 {'date': '19810624',
  'departement': 'Alpes-de-Haute-Provence',
  'dep': 4,
  'ndvi_mean': nan},
 {'date': '19810624',
  'departement': 'Hautes-Alpes',
  'dep': 5,
  'ndvi_mean': nan},
 {'date': '19810624',
  'departement': 'Alpes-Maritimes',
  'dep': 6,
  'ndvi_mean': nan},
 {'date': '19810624', 'departement': 'Ardèche', 'dep': 7, 'ndvi_mean': nan},
 {'date': '19810624', 'departement': 'Ardennes', 'dep': 8, 'ndvi_mean': nan},
 {'date': '19810624', 'departement': 'Ariège', 'dep': 9, 'ndvi_mean': nan},
 {'date': '19810624', 'departement': 'Aube', 'dep': 10, 'ndvi_mean': nan},
 {'date': '19810624', 'departement': 'Aude', 'dep': 11, 'ndvi_mean': nan},
 {'date': '19810624', 'departement': 'Aveyron', 'dep': 12, 'ndvi_mean': nan},
 {'date': '19810624',
  'departement': 

In [50]:
result = []
for file in os.listdir(f"{NDVI_DATA_URL}/yearly_aggregated/"):
    if file == "1981.nc":
        continue
    ds = Dataset(f"{NDVI_DATA_URL}/yearly_aggregated/{file}")
    data = ds.variables["NDVI"][:]
    days = data.shape[0]
    first_date = datetime(int(file[:4]), 1, 1)
    for day in range(days):
        date = (first_date + timedelta(days=int(ds.variables["time"][0].data.item()))).strftime("%Y-%m-%d")
        daily_flattened = data[day].flatten()
        daily_data = daily_flattened.filled(np.nan)
        for _, dep in geo.iterrows():
            if not np.all(np.isnan(daily_data)): #check if full array is not nan
                prepared = prep(dep["geometry"]) #use prep for batch operations
                valid_points = []
                valid_points.extend(filter(prepared.contains, points)) #find POINTS in dep
                valid_indices = [points[point] for point in valid_points if point in points] #make a list of valid points that are in the dep
                ndvi = daily_data[valid_indices]
                mean_ndvi = np.nanmean(daily_data)
            else:
                mean_ndvi = np.nan
            result.append({"date": date, "departement": dep["nom"], "dep": dep["code"], "ndvi_mean": float(mean_ndvi)})
        break
    break
result

[{'date': '1983-01-01',
  'departement': 'Ain',
  'dep': 1,
  'ndvi_mean': 0.06161925508376356},
 {'date': '1983-01-01',
  'departement': 'Aisne',
  'dep': 2,
  'ndvi_mean': 0.06161925508376356},
 {'date': '1983-01-01',
  'departement': 'Allier',
  'dep': 3,
  'ndvi_mean': 0.06161925508376356},
 {'date': '1983-01-01',
  'departement': 'Alpes-de-Haute-Provence',
  'dep': 4,
  'ndvi_mean': 0.06161925508376356},
 {'date': '1983-01-01',
  'departement': 'Hautes-Alpes',
  'dep': 5,
  'ndvi_mean': 0.06161925508376356},
 {'date': '1983-01-01',
  'departement': 'Alpes-Maritimes',
  'dep': 6,
  'ndvi_mean': 0.06161925508376356},
 {'date': '1983-01-01',
  'departement': 'Ardèche',
  'dep': 7,
  'ndvi_mean': 0.06161925508376356},
 {'date': '1983-01-01',
  'departement': 'Ardennes',
  'dep': 8,
  'ndvi_mean': 0.06161925508376356},
 {'date': '1983-01-01',
  'departement': 'Ariège',
  'dep': 9,
  'ndvi_mean': 0.06161925508376356},
 {'date': '1983-01-01',
  'departement': 'Aube',
  'dep': 10,
  'ndvi

In [6]:
Dataset(f"{NDVI_DATA_URL}/1981/AVHRR-Land_v005_AVH13C1_NOAA-07_19810711_c20170609200548.nc").variables["NDVI"][:]

masked_array(
  data=[[[--, --, --, ..., 0.0585, 0.1579, 0.1403],
         [--, --, --, ..., 0.11900000000000001, 0.11900000000000001,
          0.0956],
         [--, --, --, ..., 0.0641, 0.06620000000000001, 0.0734],
         ...,
         [0.18780000000000002, 0.18630000000000002, 0.1827, ..., --, --,
          --],
         [0.21130000000000002, 0.2126, 0.2281, ..., --, --, --],
         [0.23970000000000002, 0.3064, 0.2838, ..., --, --, --]]],
  mask=[[[ True,  True,  True, ..., False, False, False],
         [ True,  True,  True, ..., False, False, False],
         [ True,  True,  True, ..., False, False, False],
         ...,
         [False, False, False, ...,  True,  True,  True],
         [False, False, False, ...,  True,  True,  True],
         [False, False, False, ...,  True,  True,  True]]],
  fill_value=-9999)