In [1]:
# | default_exp create_climate_data

In [2]:
# | hide
from nbdev.showdoc import *
from fastcore.test import *
from fastcore.utils import *

In [3]:
# | export
import pandas as pd
from pathlib import Path
from typing import Dict
import os
import pandera as pa
from pandera.typing import DataFrame, Series
import numpy as np

from sureau_ecos_py.create_modeling_options import create_modeling_options
from sureau_ecos_py.create_simulation_parameters import create_simulation_parameters

In [4]:
# | export


# This class is created for validating the input climate dataframe
# If the data don't follow the structure in the class the function will fail
class ClimateDataValidation(pa.SchemaModel):
    """Schema for validating the climate data"""

    DATE: Series[np.datetime64] = pa.Field(description="date with format 1991/12/23")
    Tair_min: Series[float] = pa.Field(
        description="minimum air temperature of the day (degC)"
    )
    Tair_max: Series[float] = pa.Field(
        description=" maximum air temperature of the day (degC)"
    )
    Tair_mean: Series[float] = pa.Field(
        description="mean air temperature of the day (degC)"
    )
    RG_sum: Series[float] = pa.Field(ge=0, description="global radiation (MJ/m2)")

    PPT_sum: Series[float] = pa.Field(ge=0, description="precipitation (mm)")

    RHair_min: Series[float] = pa.Field(
        ge=0, le=100, description="minimum relative humidity of the day (%)"
    )
    RHair_max: Series[float] = pa.Field(
        ge=0,
        le=100,
        description="maximum relative humidity of the day (%)",
        coerce=True,
    )
    RHair_mean: Series[float] = pa.Field(
        ge=0, le=100, description="mean relative humidity of the day (%)"
    )
    WS_mean: Series[float] = pa.Field(
        ge=0, description="mean wind speed of the day (m/s)"
    )


@pa.check_types(lazy=True)
def create_climate_data(
    simulation_parameters: Dict,  # Dictionary created using the `create_simulation_parameters` function
    modeling_options: Dict,  # Dictionary created using the `create_modeling_options` function
    file_path: Path,  # Path to the input CSV climate file. i.e. path/to/file/climate.csv
    sep: str = ";",  # CSV file separator can be "," or ";"

) -> DataFrame[ClimateDataValidation]:
    """
    Create a climate data.frame to run SureauR.
    Read input climate data select the desired period and put it in the right format to run `run.SurEauR`
    Also check data consistency and input variables according to modeling options
    (see \code{create.modeling.options} and simulation parameters (see \code{create.simulation.parameters)
    """

    # Make sure that simulation_parameters and modeling_options are dictionaries -----------------------------
    assert isinstance(
        simulation_parameters, Dict
    ), f'simulation_parameters must be a dictionary not a {type(simulation_parameters)}'

    assert isinstance(
        modeling_options, Dict
    ), f'modeling_options must be a dictionary not a {type(modeling_options)}'


    # Read file if it exists and climateData not provided, error otherwise ------------------------------------

    if os.path.exists(file_path):
        try:
            climate_data = pd.read_csv(
                file_path, sep=sep, header=0, parse_dates=["DATE"], dayfirst=True
            )

        except pa.errors.SchemaErrors as err:
            print(err)
    else:
        print(f"file: {file_path}, does not exist, check presence or spelling")


    # Create climate data based on constant_climate parameter ------------------------------------------------

    if modeling_options['constant_climate'] is False:

        # Break DATE into year,month, day_of_year, day_of_month columns

        # Create function for extracting the day of the year (from 0 to 365)
        def get_day_of_year(date):
            return pd.Period(date, freq="H").day_of_year

        # Map function over each row and create new column
        climate_data["day_of_year"] = pd.DataFrame(
            map(get_day_of_year, climate_data["DATE"])
            )

        # Get the day of the month (from 1 to 31)
        climate_data["day_of_month"] = pd.DatetimeIndex(climate_data["DATE"]).day

        # Get the month (from 1 to 12)
        climate_data["month"] = pd.DatetimeIndex(climate_data["DATE"]).month

        # Get the year
        climate_data["year"] = pd.DatetimeIndex(climate_data["DATE"]).year

        # Filter data based on start_year_simulation and end_year_simulation parameters
        # specified in similation_parameters dictionary
        climate_data =  climate_data.loc[(climate_data['year'] >= simulation_parameters["start_year_simulation"]) &
                                (climate_data['year'] <= simulation_parameters["end_year_simulation"])]

        print(f'{climate_data.shape[0]} days were selected in the input climate file, covering the period: {climate_data["year"].min()} - {climate_data["year"].max()}')

        return climate_data


    if modeling_options['constant_climate'] is True:

        # Use a List Comprehension to create a sequence of dates with the format Day/Month/Year
        date_ref = [each_date.strftime('%d-%m-%Y') for each_date in pd.date_range(start= f'01/01/{simulation_parameters["start_year_simulation"]}',
                                                                                  end = f'31/12/{simulation_parameters["end_year_simulation"]}',
                                                                                  freq= 'D')]


        # Get the first row of the climate_data
        constant_climate_data = climate_data.loc[:0]

        # Repeat it based on the lenght of date_ref. This is done for creating a constant climate
        constant_climate_data = constant_climate_data.loc[constant_climate_data.index.repeat(len(date_ref))]

        # Substitute the old dates with the new ones
        constant_climate_data.DATE = pd.to_datetime(date_ref, format='%d-%m-%Y')

        # Break DATE into year,month, day_of_year, day_of_month columns -----------------------------------------------

        # Create function for extracting the day of the year (from 0 to 365)
        def get_day_of_year(date):
            return pd.Period(date, freq="H").day_of_year

        # Map function over each row and create new column
        constant_climate_data["day_of_year"] = pd.DataFrame(
            map(get_day_of_year, constant_climate_data["DATE"])
            )

        # Get the day of the month (from 1 to 31)
        constant_climate_data["day_of_month"] = pd.DatetimeIndex(constant_climate_data["DATE"]).day

        # Get the month (from 1 to 12)
        constant_climate_data["month"] = pd.DatetimeIndex(constant_climate_data["DATE"]).month

        # Get the year
        constant_climate_data["year"] = pd.DatetimeIndex(constant_climate_data["DATE"]).year

        print(f'{constant_climate_data.shape[0]} days of the period: {constant_climate_data["year"].min()} - {constant_climate_data["year"].max()} have the same climatic conditions' )

        return constant_climate_data



In [5]:
simulation_parameters_dict = create_simulation_parameters(
    main_dir="./test_folder",
    start_year_simulation=1991,
    end_year_simulation=1991,
    output_type="a",
    output_path="./test_folder",
    overwrite=True,
    resolution_output="subdaily",
)

In [6]:
modeling_options_dict = create_modeling_options(
    time_step_for_evapo=2,
    reset_swc=True,
    avoid_water_soil_transfer=True,
    constant_climate=True,
    defoliation=True,
    soil_evapo=True,
    threshold_mortality=51,
    etp_formulation="pt",
    rn_formulation="linear",
    comp_options_for_evapo="custom",
    stomatal_reg_formulation="turgor",
    transpiration_model="jarvis",
    numerical_scheme="implicit",
    pedo_transfer_formulation="vg",
)

In [7]:
create_climate_data(modeling_options=modeling_options_dict,
                    simulation_parameters=simulation_parameters_dict,
                    file_path="./test_folder/climat_example.csv",
                    sep=";")


365 days of the period: 1991 - 1991 have the same climatic conditions


Unnamed: 0,DATE,Tair_min,Tair_max,Tair_mean,RG_sum,RHair_min,RHair_max,RHair_mean,WS_mean,PPT_sum
0,1990-01-01,-4.4,-2.1,-3.25,1.95,96.915145,96.915145,96.915145,0.3,0.0
1,1990-01-02,-3.9,0.0,-1.95,3.22,96.915145,96.915145,96.915145,0.4,0.0
2,1990-01-03,-1.9,0.5,-0.70,1.03,96.915145,96.915145,96.915145,1.5,0.0
3,1990-01-04,-2.0,-0.4,-1.20,1.13,96.915145,96.915145,96.915145,1.2,0.0
4,1990-01-05,-0.7,2.8,1.05,1.14,96.537691,96.537691,96.537691,1.4,0.4
...,...,...,...,...,...,...,...,...,...,...
1091,1992-12-27,-5.8,0.4,-2.70,4.79,60.000000,93.000000,76.500000,2.4,0.0
1092,1992-12-28,-4.5,0.8,-1.85,5.08,49.000000,84.000000,66.500000,3.0,0.0
1093,1992-12-29,-6.3,0.3,-3.00,4.75,47.000000,77.000000,62.000000,2.7,0.0
1094,1992-12-30,-8.4,-1.1,-4.75,4.51,56.000000,91.000000,73.500000,1.1,0.0
