In [None]:
# | default_exp create_climate_data

In [None]:
# | hide
from nbdev.showdoc import *
from fastcore.test import *
from fastcore.utils import *

In [None]:
# | export
import os
import numpy as np
import pandas as pd
import pandera as pa
from typing import Dict
from pathlib import Path
from pandera.typing import DataFrame, Series
from sureau_ecos_py.create_simulation_parameters import (
    create_simulation_parameters,
)
from sureau_ecos_py.create_modeling_options import create_modeling_options

In [None]:
# | export


# This class is created for validating the input climate dataframe
# If the data don't follow the structure in the class the function will fail
class ClimateDataValidation(pa.SchemaModel):
    "Schema for validating the climate data"

    DATE: Series[np.datetime64] = pa.Field(
        description="date with format 1991/12/23"
    )
    Tair_min: Series[float] = pa.Field(
        description="minimum air temperature of the day (degC)"
    )
    Tair_max: Series[float] = pa.Field(
        description=" maximum air temperature of the day (degC)"
    )
    Tair_mean: Series[float] = pa.Field(
        description="mean air temperature of the day (degC)"
    )
    RG_sum: Series[float] = pa.Field(
        ge=0, description="global radiation (MJ/m2)"
    )

    PPT_sum: Series[float] = pa.Field(ge=0, description="precipitation (mm)")

    RHair_min: Series[float] = pa.Field(
        ge=0, le=100, description="minimum relative humidity of the day (%)"
    )
    RHair_max: Series[float] = pa.Field(
        ge=0,
        le=100,
        description="maximum relative humidity of the day (%)",
        coerce=True,
    )
    RHair_mean: Series[float] = pa.Field(
        ge=0, le=100, description="mean relative humidity of the day (%)"
    )
    WS_mean: Series[float] = pa.Field(
        ge=0, description="mean wind speed of the day (m/s)"
    )

    # Added for making sure that it only accepts the columns specified above
    class Config:
        strict = True


def create_climate_data(
    simulation_parameters: Dict,  # Dictionary created using the `create_simulation_parameters` function
    modeling_options: Dict,  # Dictionary created using the `create_modeling_options` function
    file_path: Path,  # Path to the input CSV climate file. i.e. path/to/file/climate.csv
    sep: str = ";",  # CSV file separator can be ',' or ';'
) -> DataFrame[ClimateDataValidation]:
    "Create a climate data.frame to run SureauR. Read input climate data select the desired period and put it in the right format to run `run.SurEauR` Also check data consistency and input variables according to modeling options (see \code{create.modeling.options} and simulation parameters (see \code{create.simulation.parameters)"



    # Assert parameters ---------------------------------------------------------

        # Make sure the file_path exist or is None
    assert file_path is None or os.path.exists(
        file_path
    ), f"Path: {file_path} not found, check presence or spelling"


    # Make sure that simulation_parameters and modeling_options are dictionaries
    assert isinstance(
        simulation_parameters, Dict
    ), f"simulation_parameters must be a dictionary not a {type(simulation_parameters)}"

    assert isinstance(
        modeling_options, Dict
    ), f"modeling_options must be a dictionary not a {type(modeling_options)}"



    # Read csv file  ------------------------------------------------------------

    climate_data = pd.read_csv(
                file_path, sep=sep, header=0, parse_dates=["DATE"], dayfirst=True
            )

    # Raise error if climate_data don't follow the ClimateDataValidation Schema
    ClimateDataValidation.validate(climate_data)

    # Create climate data based on constant_climate parameter -------------------

    if modeling_options["constant_climate"] is False:
        # Break DATE into year,month, day_of_year, day_of_month columns

        # Create function for extracting the day of the year (from 0 to 365)
        def get_day_of_year(date):
            return pd.Period(date, freq="H").day_of_year

        # Map function over each row and create new column
        climate_data["day_of_year"] = pd.DataFrame(
            map(get_day_of_year, climate_data["DATE"])
        )

        # Get the day of the month (from 1 to 31)
        climate_data["day_of_month"] = pd.DatetimeIndex(climate_data["DATE"]).day

        # Get the month (from 1 to 12)
        climate_data["month"] = pd.DatetimeIndex(climate_data["DATE"]).month

        # Get the year
        climate_data["year"] = pd.DatetimeIndex(climate_data["DATE"]).year

        # Filter data based on start_year_simulation and end_year_simulation
        # parameters specified in similation_parameters dictionary
        climate_data = climate_data.loc[
            (
                climate_data["year"]
                >= simulation_parameters["start_year_simulation"]
            )
            & (
                climate_data["year"]
                <= simulation_parameters["end_year_simulation"]
            )
        ]

        print(
            f'{climate_data.shape[0]} days were selected in the input climate file, covering the period: {climate_data["year"].min()} - {climate_data["year"].max()}'
        )

        return climate_data

    if modeling_options["constant_climate"] is True:
        # Use a List Comprehension to create a sequence of dates with the format
        # Day/Month/Year
        date_ref = [
            each_date.strftime("%d-%m-%Y")
            for each_date in pd.date_range(
                start=f'01/01/{simulation_parameters["start_year_simulation"]}',
                end=f'31/12/{simulation_parameters["end_year_simulation"]}',
                freq="D",
            )
        ]

        # Get the first row of the climate_data
        constant_climate_data = climate_data.loc[:0]

        # Repeat it based on the lenght of date_ref. This is done for creating a
        # constant climate
        constant_climate_data = constant_climate_data.loc[
            constant_climate_data.index.repeat(len(date_ref))
        ]

        # Substitute the old dates with the new ones
        constant_climate_data.DATE = pd.to_datetime(date_ref, format="%d-%m-%Y")

        # Break DATE into year,month, day_of_year, day_of_month columns ---------

        # Create function for extracting the day of the year (from 0 to 365)
        def get_day_of_year(date):
            return pd.Period(date, freq="H").day_of_year

        # Map function over each row and create new column
        constant_climate_data["day_of_year"] = pd.DataFrame(
            map(get_day_of_year, constant_climate_data["DATE"])
        )

        # Get the day of the month (from 1 to 31)
        constant_climate_data["day_of_month"] = pd.DatetimeIndex(
            constant_climate_data["DATE"]
        ).day

        # Get the month (from 1 to 12)
        constant_climate_data["month"] = pd.DatetimeIndex(
            constant_climate_data["DATE"]
        ).month

        # Get the year
        constant_climate_data["year"] = pd.DatetimeIndex(
            constant_climate_data["DATE"]
        ).year

        print(
            f'{constant_climate_data.shape[0]} days of the period: {constant_climate_data["year"].min()} - {constant_climate_data["year"].max()} have the same climatic conditions'
        )

        return constant_climate_data

## Create simulation parameters dictionary

In [None]:
simulation_parameters_dict = create_simulation_parameters(
    main_dir="./sample_data",
    start_year_simulation=1991,
    end_year_simulation=1991,
    output_type=None,
    output_path="./sample_data",
    overwrite=True,
    resolution_output="subdaily",
)

## Create modeling options dictionary

In [None]:
modeling_options_dict = create_modeling_options(
    time_step_for_evapo=2,
    reset_swc=True,
    avoid_water_soil_transfer=True,
    constant_climate=False,
    defoliation=True,
    soil_evapo=True,
    threshold_mortality=51,
    etp_formulation="pt",
    rn_formulation="linear",
    comp_options_for_evapo="custom",
    stomatal_reg_formulation="turgor",
    transpiration_model="jarvis",
    numerical_scheme="implicit",
    pedo_transfer_formulation="vg",
)

## Create climate dataset 

In [None]:
create_climate_data(
    modeling_options=modeling_options_dict,
    simulation_parameters=simulation_parameters_dict,
    file_path="./sample_data/climat_example.csv",
    sep=";",
)

365 days were selected in the input climate file, covering the period: 1991 - 1991


Unnamed: 0,DATE,Tair_min,Tair_max,Tair_mean,RG_sum,RHair_min,RHair_max,RHair_mean,WS_mean,PPT_sum,day_of_year,day_of_month,month,year
365,1991-01-01,2.6,7.3,4.95,3.20,95.160421,95.160421,95.160421,3.1,1.2,1,1,1,1991
366,1991-01-02,3.5,9.0,6.25,0.76,90.403122,95.000000,92.701561,5.8,9.8,2,2,1,1991
367,1991-01-03,8.5,10.7,9.60,1.16,92.731052,95.000000,93.865526,4.8,4.8,3,3,1,1991
368,1991-01-04,3.4,9.6,6.50,1.16,96.644009,96.644009,96.644009,2.6,5.0,4,4,1,1991
369,1991-01-05,2.3,6.3,4.30,3.25,96.915145,96.915145,96.915145,5.4,1.0,5,5,1,1991
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,1991-12-27,4.1,6.6,5.35,2.23,85.326980,95.000000,90.163490,1.9,0.2,361,27,12,1991
726,1991-12-28,-1.5,5.9,2.20,4.19,96.915145,96.915145,96.915145,1.4,0.0,362,28,12,1991
727,1991-12-29,-2.6,4.0,0.70,4.72,96.915145,96.915145,96.915145,1.4,0.0,363,29,12,1991
728,1991-12-30,2.4,4.4,3.40,1.19,96.915145,96.915145,96.915145,1.8,0.0,364,30,12,1991
