In [None]:
from climakitae.core.data_interface import (
    get_data_options, 
    get_subsetting_options, 
    get_data
)
# import climakitae as ck

import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from matplotlib.backends.backend_pdf import PdfPages
import time
from pyproj import Transformer
import geopandas as gpd
from shapely.geometry import Point
import contextily as cx


print('done')

In [None]:
def climo_calc(ds, variable, warming_levels, percentiles):
    # Define the base month and center point (e.g., January 2000)
    base_month = 1  # January

    # Create a new coordinate for month
    months = (base_month + ds['time_delta'].values) % 12
    months[months == 0] = 12  # Ensure months are in the range [1, 12]
    ds = ds.assign_coords(month=('time_delta', months))

    # Group by the new month coordinate and calculate the mean
    print(f"Calculating monthly climatology for {variable}...")
    climatology_start_time = time.time()
    climatology = ds.groupby('month').mean(dim='time_delta', skipna=True)

    if variable == "Precipitation (total)":
        days_in_month = np.array([31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]) 

        # Convert monthly total precipitation to daily average within each month
        climatology = climatology / xr.DataArray(days_in_month, dims=["month"])
        climatology.attrs['extended_description'] = 'average daily precip'

        time_treatment = "avg."

    climatology.load()
    print(climatology)

    print(f"Monthly climatology for {variable} calculated in {time.time() - climatology_start_time:.2f} seconds.")

    # Calculate the specified percentiles
    print(f"Calculating percentiles for time series {variable}...")
    percentiles_start_time = time.time()
    percentile_values_ts = {p: ds.quantile(p / 100.0, dim='simulation', skipna=True) for p in percentiles}
    print(f"Percentiles for {variable} calculated in {time.time() - percentiles_start_time:.2f} seconds.")

    # Calculate the specified percentiles
    print(f"Calculating percentiles for climo {variable}...")
    percentiles_start_time = time.time()
    percentile_values = {p: climatology.quantile(p / 100.0, dim='simulation', skipna=True) for p in percentiles}
    print(f"Percentiles for {variable} calculated in {time.time() - percentiles_start_time:.2f} seconds.")


    print(f"Saving NetCDF files for {variable}...")
    netcdf_saving_start_time = time.time()
    for level in warming_levels:
        percentile_values[percentiles[0]].sel(warming_level=level).to_netcdf(f'{variable}_GWL{level}_{percentiles[0]}th.nc')
        percentile_values[percentiles[1]].sel(warming_level=level).to_netcdf(f'{variable}_GWL{level}_{percentiles[1]}th.nc')  
        percentile_values[percentiles[2]].sel(warming_level=level).to_netcdf(f'{variable}_GWL{level}_{percentiles[2]}th.nc')  
    print(f"NetCDF files for {variable} saved in {time.time() - netcdf_saving_start_time:.2f} seconds.")

    print(f"Finished processing {variable} in {time.time() - start_time:.2f} seconds.")
    

def process_variable(variable_unit):
    variable, unit,downscale,timescale,GWL = variable_unit
    print(f"Processing variable: {variable}")
    ds_start_time = time.time()
    ds = get_data(
        variable=variable,
        units=unit,
        downscaling_method=downscale,
        resolution="3 km",
        timescale=timescale,
        cached_area="Southern California Edison",
        approach="Warming Level",
        warming_level_window=15,
        warming_level=GWL
    )
    print(f"Data retrieved for {variable} in {time.time() - ds_start_time:.2f} seconds.")
    climo_calc(ds, variable, GWL, [10, 50, 90])

def get_multiple_data(variables_units):
    for variable_unit in variables_units:
        process_variable(variable_unit)

variables_units = [
    ("Precipitation (total)", "inches","Statistical","monthly",[1.0,1.5,2.0]),
    ("Maximum air temperature at 2m", "degF","Statistical","monthly",[1.0,1.5,2.0]),
    ("Minimum air temperature at 2m", "degF","Statistical","monthly",[1.0,1.5,2.0]),
]

get_multiple_data(variables_units)

In [None]:
_dir = os.getcwd()
print(_dir)
_loc_files = glob.glob(_dir + '/*.nc')
print(_loc_files)

In [None]:
# Initialize dictionaries to store data arrays
precipitation_data = []
min_temp_data = []
max_temp_data = []

# Process each file based on the variable name
for file_path in _loc_files:
    ds = xr.open_dataset(file_path)
    variable_name = list(ds.data_vars.keys())[0]
        
    if 'xarray_dataarray_variable' in variable_name:
        # Calculate the average over the month dimension for precipitation
        avg_precipitation = ds.mean(dim='month',skipna=True)
        variable_name2 = f'Avg. Daily Precip(in)GWL{ds['warming_level'].values}_{ds['quantile'].values*100}%'
        avg_precipitation = avg_precipitation.rename({variable_name: variable_name2})
        avg_precipitation=avg_precipitation.drop_vars(['quantile', 'warming_level'])
        precipitation_data.append(avg_precipitation)
        
    elif 'Minimum air temperature' in variable_name:
        # Calculate the minimum over the month dimension for minimum air temperature
        min_temp = ds.min(dim='month',skipna=True)
        variable_name2 =  f'Min air temp(DegF)GWL{ds['warming_level'].values}_{ds['quantile'].values*100}%'
        min_temp = min_temp.rename({variable_name: variable_name2})
        min_temp=min_temp.drop_vars(['quantile', 'warming_level'])
        min_temp_data.append(min_temp)
        
    elif 'Maximum air temperature' in variable_name:
        # Calculate the maximum over the month dimension for maximum air temperature
        max_temp = ds.max(dim='month',skipna=True)
        variable_name2 = f'Max air temp(DegF)GWL{ds['warming_level'].values}_{ds['quantile'].values*100}%'
        max_temp = max_temp.rename({variable_name: variable_name2})
        max_temp=max_temp.drop_vars(['quantile', 'warming_level'])
        max_temp_data.append(max_temp)
        

precipitation_data = xr.merge(precipitation_data)
precipitation_data_df = precipitation_data.to_dataframe().reset_index()
precipitation_data_df.dropna(inplace=True)

min_temp_data = xr.merge(min_temp_data)
min_temp_data_df = min_temp_data.to_dataframe().reset_index()
min_temp_data_df.dropna(inplace=True)

max_temp_data = xr.merge(max_temp_data)
max_temp_data_df = max_temp_data.to_dataframe().reset_index()
max_temp_data_df.dropna(inplace=True)



In [None]:
# Read the CSV file with subset of locations
locations_df = pd.read_csv('~/cae-notebooks/2026_CRE_Asset_Locs_modelLocs.csv')

# Subset the data based on 'model lat' and 'model lon'
subset_precipitation_df = precipitation_data_df.merge(locations_df, left_on=['lat', 'lon'], right_on=['model lat', 'model lon'])
subset_min_temp_df = min_temp_data_df.merge(locations_df, left_on=['lat', 'lon'], right_on=['model lat', 'model lon'])
subset_max_temp_df = max_temp_data_df.merge(locations_df, left_on=['lat', 'lon'], right_on=['model lat', 'model lon'])

subset_precipitation_df.to_csv('2026CAVA_CREAssets_Precip.csv')
subset_min_temp_df.to_csv('2026CAVA_CREAssets_MinTemp.csv')
subset_max_temp_df.to_csv('2026CAVA_CREAssets_MaxTemp.csv')