In [None]:
import pandas as pd
import geopandas as gpd
import xarray as xr
import datetime
import numpy as np
import os

In [None]:
def complete_and_fill_dates(df, start_date="2024-01-01", end_date="2024-12-31"):
    """
    Ensures all days within the full year range are present as columns and fills missing values
    using the nearest available value, including first and last days.

    Parameters:
        df (pd.DataFrame): A DataFrame where the index is the city and columns are dates.
        start_date (str): The starting date for the full range.
        end_date (str): The ending date for the full range.

    Returns:
        pd.DataFrame: The updated DataFrame with all dates included and NaNs filled.
    """
    # Convert column names to datetime
    df.columns = pd.to_datetime(df.columns)

    # Create a complete date range for the entire year
    full_date_range = pd.date_range(start=start_date, end=end_date, freq="D")

    # Reindex the DataFrame to include all days
    df = df.reindex(columns=full_date_range)

    # Fill missing values with nearest values based on time
    df = df.interpolate(method="nearest", axis=1)

    # Fill remaining NaNs at the start with the first available value and at the end with the last
    df = df.bfill(axis=1).ffill(axis=1)

    return df

In [None]:
def preprocess_NDVI_italy():
    """
    Loads multiple NDVI NetCDF files, extracts data, fills missing dates, and combines into a single DataFrame.

    Returns:
        pd.DataFrame: A DataFrame where rows are cities and columns are daily NDVI values.
    """
    list_data = os.listdir("data/NDVI_nc/")
    list_data = [x for x in list_data if x.endswith(".nc")]  # Fixed list comprehension

    data_final = pd.DataFrame()  # Initialize empty DataFrame

    for name in list_data:
        name_com = name.replace('_ndvi_ts.nc', '')  # Extract city name
        data_set = xr.open_dataset(os.path.join("data/NDVI_nc/Italia", name))  # Load dataset
        
        date = data_set.t.values  # Extract time values
        dates = list(pd.to_datetime(date).strftime('%Y-%m-%d'))  # Convert to YYYY-MM-DD

        ndvi_values = data_set.squeeze().to_array().values.flatten()  # Extract NDVI values

        # Create DataFrame for the city
        data = pd.DataFrame({name_com: ndvi_values}, index=dates).T

        # Ensure all days are present and fill missing values
        data = complete_and_fill_dates(data)

        # Combine into final dataset
        data_final = pd.concat([data_final, data], axis=0)
    def exp_norm (x):
        return np.exp(x) / np.exp(1)
        
    data_scaled = data_final.apply(exp_norm)
    return data_scaled        

In [None]:
data_italy = preprocess_NDVI_italy()
data_italy = data_italy.rename(index={"Vallée d’Aoste":"Valle d’Aosta/Vallée d’Aoste"})
data_italy.to_csv("data/NDVI_italia.csv")

In [None]:
def preprocess_NDVI_BELGIO(minimo = 0.10, massimo = 1):
    """
    Loads multiple NDVI NetCDF files, extracts data, fills missing dates, and combines into a single DataFrame.

    Returns:
        pd.DataFrame: A DataFrame where rows are cities and columns are daily NDVI values.
    """
    list_data = os.listdir("data/NDVI/Belgio/")
    list_data = [x for x in list_data if x.endswith(".nc")]  # Fixed list comprehension

    data_final = pd.DataFrame()  # Initialize empty DataFrame

    for name in list_data:
        name_com = name.replace('_ndvi_ts.nc', '')  # Extract city name
        data_set = xr.open_dataset(os.path.join("data/NDVI/Belgio", name))  # Load dataset
        
        date = data_set.t.values  # Extract time values
        dates = list(pd.to_datetime(date).strftime('%Y-%m-%d'))  # Convert to YYYY-MM-DD

        ndvi_values = data_set.squeeze().to_array().values.flatten()  # Extract NDVI values

        # Create DataFrame for the city
        data = pd.DataFrame({name_com: ndvi_values}, index=dates).T

        # Ensure all days are present and fill missing values
        data = complete_and_fill_dates(data)

        # Combine into final dataset
        data_final = pd.concat([data_final, data], axis=0)
    def exp_norm (x):
        return np.exp(x) / np.exp(1)
        
    data_scaled = data_final.apply(exp_norm)
    return data_scaled

In [None]:
data_belgio = preprocess_NDVI_BELGIO()
data_belgio.rename(index = {"Arr. Brussel-Hoofdstad" : "Arr. de Bruxelles-Capitale/Arr. Brussel-Hoofdstad"})
data_belgio.to_csv("NDVI_belgio.csv")