In [1]:
import xarray as xr
import os
import pandas as pd
from pathlib import Path

# from src.utils import compute_A

In [2]:
import numpy as np

def compute_A(ds, var, lat, lon, spatial_window_size, reduce='max'):
    # for longitudes take the value modulo 360
    if lon < 0:
        lon = lon % 360
    # get the index of the lat and lon
    lat_idx = np.abs(ds.lat.values - lat).argmin()
    lon_idx = np.abs(ds.lon.values - lon).argmin()

    # compute on observable A being the average temperature in the window over the last the temporal_window_size days
    A = ds.isel(lat=slice(lat_idx-spatial_window_size, lat_idx+spatial_window_size), 
                           lon=slice(lon_idx-spatial_window_size, lon_idx+spatial_window_size))
    if reduce == 'mean':
        A = A.resample(time='D').mean()
    elif reduce == 'max':
        A = A.resample(time='D').max()
    elif reduce == 'min':
        A = A.resample(time='D').min()
    elif reduce == 'None':
        pass
    else:
        raise ValueError("reduce must be either 'mean', 'max', 'min', or 'None'")
    A = A[var].mean(dim=['lat', 'lon'])# check window_size *4 or not #.rolling(time=temporal_window_size)
    return A

In [3]:
# Combine all files in path 
# select var 
var = 'tas'
sim_number = 1
path = f"/glade/derecho/scratch/awikner/PLASIM/data/2000_year_sims_new/sim{sim_number}/{var}/"
# select lat, lon and window size
spatial_window_size = 2
# temporal_window_size = 7
reduce = 'max'
# Select lat and lon of Chicago
lat = float(34)
lon = float(109) % 360 # for longitudes take the value modulo 360

# Chicago - lat = 41, lon = -88
# China - lat=34.00, lon=109.00
# India - lat=27, lon=72
# NW US - lat=45, lon=-120
# Saudi - lat=25, lon=47
# Europe - lat=48, lon=2


files = [f for f in os.listdir(path) if f.endswith('gaussian.nc')]
# files = files[:]
# Remove spin-off data: take file only if year >10
files = [f for f in files if int(f.split('_')[0])>2090]
# # order the files by year
files1 = sorted(files, key=lambda x: int(x.split('_')[0]))
files2 = sorted(files, key=lambda x: int(x.split('_')[0]))

# disable the printing of the warning
# # combine all files using compute_A as a preprocessing function 
A = xr.open_mfdataset([path+file for file in files1], preprocess=lambda ds: compute_A(ds, var, lat, lon, spatial_window_size, reduce), combine='nested',
                       concat_dim='time', parallel=False, decode_times=True, use_cftime=True)

A_df = A.to_dataframe()
# save the dataframe

# Make folder to save the data
SAVE_DIR = f'data/ground_truth/sim{sim_number}/'
try:
    os.mkdir(SAVE_DIR)
except OSError as error:
    print(error) 

# save the dataframe
file_name = f'{var}_lat.{int(lat)}_lon.{int(lon)}_spatial.{spatial_window_size}_reduce.{reduce}.csv'
if os.path.exists(SAVE_DIR + file_name):
    os.remove(SAVE_DIR + file_name)
A_df.to_csv(file_name)

[Errno 17] File exists: 'data/ground_truth/sim1/'
