In [5]:
from netCDF4 import Dataset
import numpy as np
import pandas as pd
from shapely.geometry import Point
from shapely.prepared import prep
import geopandas
from config import NDVI_DATA_URL
import os
from datetime import datetime, timedelta

In [6]:
def check_bit(arr, satellite="AVHRR"):
    if satellite == "AVHRR":
        # bit mask 5.3 in doc : https://www.ncei.noaa.gov/pub/data/sds/cdr/CDRs/Normalized_Difference_Vegetation_Index/AVHRR/AlgorithmDescriptionAVHRR_01B-20b.pdf
        mask_fixed_bits = np.uint16(0b0001111111011110)  # Où les bits fixes sont à 1
        expected_values = np.uint16(0b0000000010000000)  # Les valeurs attendues pour ces bits fixes
    elif satellite == "VIIRS":
        #bit mask 4.3 in doc : https://www.ncei.noaa.gov/pub/data/sds/cdr/CDRs/Normalized_Difference_Vegetation_Index/VIIRS/AlgorithmDescriptionVIIRS_01B-20b.pdf
        mask_fixed_bits = np.uint16(0b1000011101110111)  # Où les bits fixes sont à 1
        expected_values = np.uint16(0b0000000001000000)  # Les valeurs attendues pour ces bits fixes
    else:
        print("Bad argument for QA")
        return ""
    
    # mask
    return (arr & mask_fixed_bits) == expected_values

In [7]:
data_index = 0
points = dict()
latitude = Dataset(f"{NDVI_DATA_URL}/yearly_subsample/1981.nc").variables["latitude"][:]
longitude = Dataset(f"{NDVI_DATA_URL}/yearly_subsample/1981.nc").variables["longitude"][:]
#for every lat and lon, we make a dict of index POINT(lon, lat) and value the index of the data associated with this point
for lat in latitude:
    for lon in longitude:
        points[Point(lon, lat)] = data_index
        data_index +=1

In [8]:
geo = geopandas.read_file("geojsonfrance_corse_20.json") #read france departement geometries
geo["code"] = geo["code"].astype(int)
geo = geo.sort_values(by="code").reset_index(drop=True)

In [None]:
result = []
for file in sorted(os.listdir(f"{NDVI_DATA_URL}/yearly_subsample/")):
    year = file[:4]
    ds = Dataset(f"{NDVI_DATA_URL}/yearly_subsample/{file}")
    data = ds.variables["NDVI"][:]
    qa = ds.variables["QA"][:]
    if int(year) >= 2014:
        is_cloudy = check_bit(qa, satellite="VIIRS") #qa is integer -> bitwise operation to make a 16 bit binary value with 1 at 10th place and logical and operation between the two (10th bit is set to one when cloudy)
    else:
        is_cloudy = check_bit(qa, satellite="AVHRR")
    days = data.shape[0]
    first_date = datetime(1981, 1, 1)
    print(year)
    for day in range(days):
        date = (first_date + timedelta(days=int(ds.variables["time"][day].data.item()))).strftime("%Y-%m-%d")
        cloud_masked = np.ma.masked_array(data[day], mask=~is_cloudy[day])
        daily_flattened = cloud_masked.flatten()
        daily_data = daily_flattened.filled(np.nan)
        for _, dep in geo.iterrows():
            if not np.all(np.isnan(daily_data)): #check if full array is not nan
                prepared = prep(dep["geometry"]) #use prep for batch operations
                valid_points = []
                valid_points.extend(filter(prepared.contains, points)) #find POINTS in dep
                valid_indices = [points[point] for point in valid_points if point in points] #make a list of valid points that are in the dep
                ndvi = daily_data[valid_indices]
                mean_ndvi = np.nanmean(ndvi)
            else:
                mean_ndvi = np.nan
            result.append({"date": date, "departement": dep["nom"], "dep": dep["code"], "ndvi_mean": float(mean_ndvi)})
    df = pd.DataFrame(result)
    df.to_csv(f"{NDVI_DATA_URL}/no_cloud/{year}.csv")

In [None]:
#single file (for concat last year)
result = []
year = "2025"
ds = Dataset(f"{NDVI_DATA_URL}/yearly_subsample/2025.nc")
data = ds.variables["NDVI"][:]
qa = ds.variables["QA"][:]
is_cloudy = check_bit(qa, satellite="VIIRS") #qa is integer -> bitwise operation to make a 16 bit binary value with 1 at 10th place and logical and operation between the two (10th bit is set to one when cloudy)
days = data.shape[0]
first_date = datetime(1981, 1, 1)
for day in range(days):
    date = (first_date + timedelta(days=int(ds.variables["time"][day].data.item()))).strftime("%Y-%m-%d")
    cloud_masked = np.ma.masked_array(data[day], mask=~is_cloudy[day])
    daily_flattened = cloud_masked.flatten()
    daily_data = daily_flattened.filled(np.nan)
    for _, dep in geo.iterrows():
        if not np.all(np.isnan(daily_data)): #check if full array is not nan
            prepared = prep(dep["geometry"]) #use prep for batch operations
            valid_points = []
            valid_points.extend(filter(prepared.contains, points)) #find POINTS in dep
            valid_indices = [points[point] for point in valid_points if point in points] #make a list of valid points that are in the dep
            ndvi = daily_data[valid_indices]
            mean_ndvi = np.nanmean(ndvi)
        else:
            mean_ndvi = np.nan
        result.append({"date": date, "departement": dep["nom"], "dep": dep["code"], "ndvi_mean": float(mean_ndvi)})
df = pd.DataFrame(result)

hist = pd.read_csv(f"{NDVI_DATA_URL}/no_cloud/{year}.csv").drop("Unnamed: 0", axis=1)
hist = hist.sort_values(by="date")

current = df[df["date"] > hist["date"].iloc[-1]]

result = pd.concat([hist, current])
result.to_csv(f"{NDVI_DATA_URL}/no_cloud/{year}.csv")

  mean_ndvi = np.nanmean(ndvi)


In [None]:
ds = Dataset(f"{NDVI_DATA_URL}/yearly_subsample/1981.nc")
data = ds.variables["NDVI"][:]
qa = ds.variables["QA"][:]
is_cloudy = check_bit(qa, satellite="AVHRR") #qa is integer -> bitwise operation to make a 16 bit binary value with 1 at 10th place and logical and operation between the two (10th bit is set to one when cloudy)