# Climate Profiles 2

### Step 0: Set-Up
Import the [climakitae](https://github.com/cal-adapt/climakitae) library and other dependencies.

In [3]:
from typing import Tuple
from typing import Any, Dict

import numpy as np
import pandas as pd
import xarray as xr
from tqdm.auto import tqdm  # Progress bar

import climakitae as ck
from climakitae.explore.standard_year_profile import get_climate_profile, export_profile_to_csv, retrieve_profile_data, _filter_by_ssp, _handle_approach_params
from climakitae.util.warming_levels import get_gwl_at_year
from climakitae.explore.typical_meteorological_year import TMY
from climakitae.core.data_interface import (
    get_data_options,
    get_subsetting_options,
    get_data,
)

import warnings
warnings.filterwarnings("ignore")

# import functions needed for testing
from climakitae.core.constants import UNSET
from climakitae.core.paths import VARIABLE_DESCRIPTIONS_CSV_PATH
from climakitae.explore.typical_meteorological_year import is_HadISD
from climakitae.explore.standard_year_profile import (
    _get_clean_standardyr_filename,
    _check_stations,
    _check_cached_area,
    _check_lat_lon,
    match_str_to_wl,
)
from climakitae.util.utils import read_csv_file
from climakitae.util.warming_levels import get_gwl_at_year

In [2]:
variable = "Air Temperature at 2m"
units = "degF"
qtile = 0.5
warming_levels = [1.5]
no_delta = True

# warming level window size
valid_window = 5
invalid_window_1 = 2
invalid_window_2 = 5.5

# station name options
station_list = [
    "Sacramento Executive Airport (KSAC)",
    "Santa Barbara Municipal Airport (KSBA)",
]
station_name = ["Sacramento Executive Airport (KSAC)"]
custom_name = ["Custom Station Name"]
mixed_list = [
    "Custom Station Name",
    "Santa Barbara Municipal Airport (KSBA)",
]
custom_list = [
    "Custom Name 1",
    "Custom Name 2",
]

# lat lon
# latitude = 34.4041
# longitude = -121.5160 # no data found for these lat and lon bounds!
latitude = 34.19966
longitude = -118.36543  # no data found for these lat and lon bounds!

# cached area
area_name = "Los Angeles County"

### Time-based file modification

Valid inputs to test
- "approach" = "Time" with a "centered_year" between 2015 and 20199
- no "warming_level" input

Invalid inputs to test
1. "approach" = "Time" with a "centered_year" between 2015 and 2099, with "warming_level" input
2. "approach" = "Time" and no "centered_year" provided
3. "centered_year" outside of 2015-2099
4. "approach" that is not "Time" or "Warming Level"

profile_selections = {
    "variable": variable,
    "resolution": "9 km",
    "q": qtile,
    #"warming_level": [1.5],
    "units": units,
    "no_delta": no_delta,
    # approach
    "approach": "Time",
    "centered_year": 2016,
    "time_profile_scenario": "SSP 3-7.0",
    # warming level window
    "warming_level_window": 5,
    # Location options -- uncomment based on your desired location type
    # "stations": station_name,  # uncomment for a weather station
    "latitude": (
        latitude - 0.02,
        latitude + 0.02,
    ),  # uncomment for a using a custom coordinate location
    "longitude": (
        longitude - 0.02,
        longitude + 0.02,
    ),  # uncomment for a custom coordinate location
    # "cached_area": area_name, # uncomment for a cached area
}

results in:

No data found in bounds. Data variable: t2
Skipping spatial subsetting.

In [16]:
profile_selections = {
    "variable": "Precipitation (total)",
    "resolution": "45 km",
    "q": qtile,
    # "warming_level": [1.5],
    "units": units,
    "no_delta": True,
    # approach
    "approach": "Time",
    "centered_year": 2016,
    "time_profile_scenario": "SSP 5-8.5",
    # warming level window
    "warming_level_window": 5,
    # Location options -- uncomment based on your desired location type
    # "stations": station_name,  # uncomment for a weather station
    # "latitude": (
    #     latitude - 0.02,
    #     latitude + 0.02,
    # ),  # uncomment for a using a custom coordinate location
    # "longitude": (
    #     longitude - 0.02,
    #     longitude + 0.02,
    # ),  # uncomment for a custom coordinate location
    "cached_area": area_name,  # uncomment for a cached area
}

In [17]:
profile = get_climate_profile(**profile_selections)

üìä Retrieving climate data...


Data retrieval:   0%|          | 0/1 [00:00<?, ?dataset/s]

You have chosen to produce a time-based Standard Year climate profile centered around 2016 and using scenario SSP 5-8.5. 
Standard year functionality for time-based profiles identifies the closest warming level at that centered year for either 
the input SSP scenario or default 'SSP 3-7.0' if no scenario input is provided. 
The corresponding global warming level for input centered year 2016 will now be determined and used to produce the profile.

Corresponding warming level for 'centered_year'=2016 is [1.14]. 
Now producing the Standard Year climate profile at this warming level.
Using default 'q': 0.5
‚öôÔ∏è  Computing climate profiles...
      üìä Processing 87,600 hours (10 years) of data
      üéØ Computing 50th percentile for each hour of year
      ‚öôÔ∏è Computing quantiles for 1 warming level(s) and 1 simulation(s)


      Computing profiles:   0%|          | 0/1 [00:00<?, ?combo/s]

      ‚úÖ Profile computation complete! Final shape: (365, 24)
         With index: Day of Year, columns: ['Hour', 'Simulation']
         Units: degF
   ‚úì No baseline subtraction requested, returning raw future profile


In [18]:
profile

Hour,1,2,3,4,5,6,7,8,9,10,...,15,16,17,18,19,20,21,22,23,24
Simulation,CESM2-r11i1p1f1-ssp585,CESM2-r11i1p1f1-ssp585,CESM2-r11i1p1f1-ssp585,CESM2-r11i1p1f1-ssp585,CESM2-r11i1p1f1-ssp585,CESM2-r11i1p1f1-ssp585,CESM2-r11i1p1f1-ssp585,CESM2-r11i1p1f1-ssp585,CESM2-r11i1p1f1-ssp585,CESM2-r11i1p1f1-ssp585,...,CESM2-r11i1p1f1-ssp585,CESM2-r11i1p1f1-ssp585,CESM2-r11i1p1f1-ssp585,CESM2-r11i1p1f1-ssp585,CESM2-r11i1p1f1-ssp585,CESM2-r11i1p1f1-ssp585,CESM2-r11i1p1f1-ssp585,CESM2-r11i1p1f1-ssp585,CESM2-r11i1p1f1-ssp585,CESM2-r11i1p1f1-ssp585
Day of Year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Jan-01,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,3.793982e-07,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
Jan-02,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
Jan-03,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
Jan-04,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0,...,0.0,0.0,0.0,0.0,0.000002,0.0,0.0,0.0,0.0,0.0
Jan-05,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Dec-27,0.0,0.000002,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
Dec-28,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
Dec-29,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
Dec-30,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0


In [14]:
export_profile_to_csv(profile, **profile_selections)

How...I am so confused. Do SSPs correspond to certain models?

In [4]:
def retrieve_profile_data(**kwargs: Any) -> Tuple[xr.Dataset, xr.Dataset]:
    """
    Backend function for retrieving data needed for computing climate profiles.

    Reads in the full hourly data for the 8760 analysis, including all warming levels.

    Parameters
    ----------
    **kwargs : dict
        Keyword arguments for data selection. Allowed keys:
        - variable (Optional) : str, default "Air Temperature at 2m"
        - resolution (Optional) : str, default "3 km"
        - approach (Optional) : str, "Warming Level" or "Time"
        - centered (Optional) : int
        - warming_levels (Optional) : List[float], default [1.2]
        - warming_level_window (Optional): int in range [5,25]
        - cached_area (Optional) : str or List[str]
        - latitude (Optional) : float or tuple
        - longitude (Optional) : float or tuple
        - stations (Optional) : list[str], default None
        - units (Optional) : str, default "degF"
        - no_delta (optional) : bool, default False, if True, do not retrieve historical data, return raw future profile

    Returns
    -------
    Tuple[xr.Dataset, xr.Dataset]
        (historic_data, future_data, get_data_params) - Historical data at 1.2¬∞C warming,
        and future data at specified warming levels.
    Raises
    ------
    ValueError
        If invalid parameter keys are provided.

    Example
    -------
    >>> historic_data, future_data = retrieve_profile_data(
    ...     variable="Air Temperature at 2m",
    ...     resolution="45 km",
    ...     time_profile_scenario="SSP 2-4.5",
    ...     warming_level=[1.5, 2.0, 3.0],
    ...     units="degF"
    ... )

    >>> historic_data, future_data = retrieve_profile_data(
    ...     warming_level=[2.0]
    ... )

    Notes
    -----
    Historical data is always retrieved for warming level = 1.2¬∞C.
    Future data uses user-specified warming levels or defaults.

    The function prioritizes location parameters in the following order:
    1. cached_area
    2. latitude/longitude
    3. stations
    Each parameter will override the lower-priority ones if provided. So if cached_area
    is given, lat/lon and stations are ignored. If lat/lon are given, stations are
    ignored. If stations are given, they are used only if neither cached_area nor lat/lon
    are provided.

    If no location parameters are provided, a warning is issued about retrieving the
    entire CA dataset.
    """
    no_delta = kwargs.pop("no_delta", False)
    # Define allowed inputs with types and defaults
    # Compute units default separately to avoid runtime evaluation in dictionary
    units_default = (
        "degF"  # Default to degF if user hasn't specified both variable and units
        if kwargs.get("variable", None) is None and kwargs.get("units", None) is None
        else None  # otherwise default to None and let get_data decide
    )

    ALLOWED_INPUTS = {
        "variable": (str, "Air Temperature at 2m"),
        "resolution": (str, "3 km"),
        "approach": (str, "Warming Level"),
        "centered_year": (int, None),
        "time_profile_scenario": (str, "SSP 3-7.0"),
        "warming_level": (list, [1.2]),
        "warming_level_window": (int, None),
        "cached_area": ((str, list), None),
        "latitude": ((float, tuple), None),
        "longitude": ((float, tuple), None),
        "stations": (list, None),
        "units": (str, units_default),
    }

    # if the user does not enter warming level the analysis is a moot point
    # because the historical data is always at 1.2C
    REQUIRED_INPUTS = []
    for req in REQUIRED_INPUTS:
        if req not in kwargs:
            raise ValueError(f"Missing required input: '{req}'")

    # Validate input keys
    invalid_keys = set(kwargs.keys()) - set(ALLOWED_INPUTS.keys())
    if invalid_keys:
        raise ValueError(
            f"Invalid input(s): {list(invalid_keys)}. "
            f"Allowed inputs are: {list(ALLOWED_INPUTS.keys())}"
        )

    # Validate input types
    for key, value in kwargs.items():
        expected_type, _ = ALLOWED_INPUTS[key]
        # Handle union types (tuples of types)
        if isinstance(expected_type, tuple):
            if not isinstance(value, expected_type):
                type_names = [t.__name__ for t in expected_type]
                raise TypeError(
                    f"Parameter '{key}' must be of type {' or '.join(type_names)}, "
                    f"got {type(value).__name__}"
                )
        else:
            if not isinstance(value, expected_type):
                raise TypeError(
                    f"Parameter '{key}' must be of type {expected_type.__name__}, "
                    f"got {type(value).__name__}"
                )
        # check that warming_level_window is between 5 and 25
        if key == "warming_level_window":
            if value not in range(5, 26):
                raise ValueError(
                    f"Parameter '{key}' must be an integer between 5 and 25, "
                    f"got {value}"
                )
        # check that time_profile_scenario is within ["SSP 3-7.0", "SSP 2-4.5","SSP 5-8.5"]
        if key == "time_profile_scenario":
            if value not in ["SSP 3-7.0", "SSP 2-4.5", "SSP 5-8.5"]:
                raise ValueError(
                    f"Parameter '{key}' must be 'SSP 3-7.0', 'SSP 2-4.5', or 'SSP 5-8.5', "
                    f"received {value}."
                )

    # Validate and update approach parameters
    kwargs = _handle_approach_params(**kwargs)

    # Validate location parameters
    # the bahavior will be to use cached_area if provided
    # otherwise use lat/lon if provided
    # otherwise use stations if provided
    location_params = ["cached_area", "latitude", "longitude", "stations"]
    provided_location_params = [
        key for key in location_params if kwargs.get(key) is not None
    ]

    if "cached_area" in provided_location_params:
        # If cached_area is provided, unset lat/lon and stations
        if "latitude" in kwargs or "longitude" in kwargs:
            kwargs.pop("latitude", None)
            kwargs.pop("longitude", None)
            print("   ‚ö†Ô∏è  Note: Using cached_area, ignoring provided latitude/longitude")
        if "stations" in kwargs:
            kwargs.pop("stations", None)
            print("   ‚ö†Ô∏è  Note: Using cached_area, ignoring provided stations")
    elif (
        "latitude" in provided_location_params
        or "longitude" in provided_location_params
    ):
        # If lat/lon provided, unset stations
        if "stations" in kwargs:
            kwargs.pop("stations", None)
            print("   ‚ö†Ô∏è  Note: Using latitude/longitude, ignoring provided stations")
    elif "stations" in provided_location_params:
        # Stations provided - convert to lat/lon with buffer
        stations = kwargs.pop("stations")
        print(
            f"   üìç Converting {len(stations)} station(s) to lat/lon coordinates with ¬±0.02¬∞ buffer"
        )
        try:
            lat_bounds, lon_bounds = _convert_stations_to_lat_lon(stations, buffer=0.02)
            kwargs["latitude"] = lat_bounds
            kwargs["longitude"] = lon_bounds
            print(f"      Latitude range: {lat_bounds[0]:.4f} to {lat_bounds[1]:.4f}")
            print(f"      Longitude range: {lon_bounds[0]:.4f} to {lon_bounds[1]:.4f}")
        except ValueError as e:
            raise ValueError(f"Error converting stations to coordinates: {e}")
    else:
        # No location parameters provided - warn about entire CA dataset
        print(
            "   ‚ö†Ô∏è  WARNING: No location parameters provided (cached_area, latitude/longitude, or stations)"
        )
        print(
            "      The entire California dataset will be retrieved, which may be very large and slow."
        )
        print(
            "      Consider specifying a cached_area, lat/lon bounds, or specific stations for better performance."
        )

    # Set default parameters for data retrieval
    # Note: if stations were provided, they've been converted to lat/lon above
    get_data_params = {
        "variable": kwargs.get("variable", "Air Temperature at 2m"),
        "resolution": kwargs.get("resolution", "3 km"),
        "downscaling_method": "Dynamical",  # must be WRF, cannot be LOCA
        "timescale": "hourly",  # must be hourly for 8760 analysis
        "area_average": "Yes",
        "units": kwargs.get(
            "units",
            (
                "degF"  # Default to degF if user hasn't specified both variable and units
                if kwargs.get("variable", None) is None
                and kwargs.get("units", None) is None
                else None  # otherwise default to None and let get_data decide
            ),
        ),
        "approach": "Warming Level",
        "warming_level": [1.2],  # Historic global warming level
        "warming_level_window": kwargs.get(
            "warming_level_window", 15
        ),  # Use user input warming level window, if provided. Otherwise, default to 15.
        "cached_area": kwargs.get("cached_area", None),
        "latitude": kwargs.get("latitude", None),
        "longitude": kwargs.get("longitude", None),
    }

    historic_data = None
    if not no_delta:
        # Retrieve historical data at 1.2¬∞C warming level
        historic_data = get_data(**get_data_params)

    # Update with any user-provided parameters for future data retrieval
    get_data_params.update(kwargs)
    future_data = get_data(**get_data_params)


    return historic_data, future_data

In [7]:
# retrieve_params = {
#     "variable": variable,
#     "resolution": "3 km",
#     # "q": qtile,
#     #"warming_level": [1.13],
#     "units": units,
#     # "no_delta": False,
#     # # approach
#     "approach": "Time",
#     "centered_year": 2016,
#     "time_profile_scenario": "SSP 3-7.0",
#     # # warming level window
#     "warming_level_window": 5,
#     # # Location options -- uncomment based on your desired location type
#     # "stations": station_name,  # uncomment for a weather station
#     # "latitude": (
#     #     latitude - 0.02,
#     #     latitude + 0.02,
#     # ),  # uncomment for a using a custom coordinate location
#     # "longitude": (
#     #     longitude - 0.02,
#     #     longitude + 0.02,
#     # ),  # uncomment for a custom coordinate location
#     "cached_area": area_name,  # uncomment for a cached area
# }

retrieve_params = {
    'variable': 'Air Temperature at 2m',
    'resolution': '9 km',
    # "warming_level": [1.13],
    'units': 'degF',
    'no_delta': True,
    # approach
    "approach": "Time",
    "centered_year": 2016,
    #'time_profile_scenario': 'SSP 2-4.5',
    # warming level window
    "warming_level_window": 5,
    # Location options -- uncomment based on your desired location type
    # "stations": station_name,  # uncomment for a weather station
    # "latitude": (
    #     latitude - 0.02,
    #     latitude + 0.02,
    # ),  # uncomment for a using a custom coordinate location
    # "longitude": (
    #     longitude - 0.02,
    #     longitude + 0.02,
    # ),  # uncomment for a custom coordinate location
    "cached_area": area_name, # uncomment for a cached area
}

In [8]:
historic_data, future_data = retrieve_profile_data(**retrieve_params)

You have chosen to produce a time-based Standard Year climate profile centered around 2016 and using scenario SSP 3-7.0. 
Standard year functionality for time-based profiles identifies the closest warming level at that centered year for either 
the input SSP scenario or default 'SSP 3-7.0' if no scenario input is provided. 
The corresponding global warming level for input centered year 2016 will now be determined and used to produce the profile.

Corresponding warming level for 'centered_year'=2016 is [1.12]. 
Now producing the Standard Year climate profile at this warming level.


In [18]:
future_data

Unnamed: 0,Array,Chunk
Bytes,3.34 MiB,4.97 kiB
Shape,"(1, 87600, 10)","(1, 1273, 1)"
Dask graph,1490 chunks in 582 graph layers,1490 chunks in 582 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.34 MiB 4.97 kiB Shape (1, 87600, 10) (1, 1273, 1) Dask graph 1490 chunks in 582 graph layers Data type float32 numpy.ndarray",10  87600  1,

Unnamed: 0,Array,Chunk
Bytes,3.34 MiB,4.97 kiB
Shape,"(1, 87600, 10)","(1, 1273, 1)"
Dask graph,1490 chunks in 582 graph layers,1490 chunks in 582 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [23]:
assert isinstance(future_data,xr.DataArray)

In [20]:
filtered = _filter_by_ssp(future_data,scenario="SSP 2-4.5")

In [21]:
filtered

Unnamed: 0,Array,Chunk
Bytes,342.19 kiB,4.97 kiB
Shape,"(1, 87600, 1)","(1, 1273, 1)"
Dask graph,149 chunks in 583 graph layers,149 chunks in 583 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 342.19 kiB 4.97 kiB Shape (1, 87600, 1) (1, 1273, 1) Dask graph 149 chunks in 583 graph layers Data type float32 numpy.ndarray",1  87600  1,

Unnamed: 0,Array,Chunk
Bytes,342.19 kiB,4.97 kiB
Shape,"(1, 87600, 1)","(1, 1273, 1)"
Dask graph,149 chunks in 583 graph layers,149 chunks in 583 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [25]:
# Verify outcome: returns xr.Dataset
assert isinstance(filtered, xr.DataArray), "Should return a pandas DataFrame"

# Verify the result contains only the target simulation
simulations = np.array(["WRF_CESM2_r11i1p1f1_historical+ssp245"])
assert np.array_equal(
    filtered.simulation.values, simulations
), "Bias adjusted models are not equal"