In [1]:
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import pandas as pd

import glob
from mpl_toolkits.axes_grid1 import make_axes_locatable
import time

import preseason.tools as sf
import preseason.onset_demise as od
import preseason.plotting as pp


In [None]:
import psutil



total_memory = psutil.virtual_memory().total
memory_limit = int(total_memory * 0.15)  # Use 75% of total RAM, adjust as needed

In [None]:
### Allows us to use dask to speed up some calculations ###
from dask.distributed import Client, LocalCluster
cluster = LocalCluster(n_workers=4)
client = Client(cluster)

In [None]:
client

In [None]:
moisture_data_files[1:500]

In [4]:
### Selecting out Peru from the global data.
lat_radius = 40 # degrees lat/lon
lon_radius = 50
PERU_center = {'lat': slice(0+lat_radius, -20-lat_radius), 'lon': slice(277-lon_radius, 293+lon_radius)}


In [None]:
len(moisture_data_files)

In [None]:
import xarray as xr
import os

def check_lon_monotonic(file_path):
    try:
        # Open the netCDF file
        ds = xr.open_dataset(file_path)
        
        # Check if 'lon' or 'longitude' dimension exists
        lon_dim = 'lon' if 'lon' in ds.dims else 'longitude' if 'longitude' in ds.dims else None
        
        if lon_dim is None:
            print(f"Warning: No longitude dimension found in {file_path}")
            return False
        
        # Get the longitude values
        lon_values = ds[lon_dim].values
        
        # Check if longitude is monotonic
        is_monotonic = all(lon_values[i] <= lon_values[i+1] for i in range(len(lon_values)-1)) or \
                       all(lon_values[i] >= lon_values[i+1] for i in range(len(lon_values)-1))
        
        if not is_monotonic:
            print(f"Non-monotonic longitude found in {file_path}")
            return True
        
    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
    
    finally:
        # Close the dataset
        ds.close()
    
    return False

# Directory containing netCDF files
netcdf_dir = moisture_data_dir

# List to store files with non-monotonic longitude
non_monotonic_files = []

# Iterate through all netCDF files in the directory
for filename in os.listdir(netcdf_dir):
    if filename.endswith('.nc'):
        file_path = os.path.join(netcdf_dir, filename)
        if check_lon_monotonic(file_path):
            non_monotonic_files.append(file_path)

# Print the list of files with non-monotonic longitude
print("\nFiles with non-monotonic longitude:")
for file in non_monotonic_files:
    print(file)

print(f"\nTotal files with non-monotonic longitude: {len(non_monotonic_files)}")

In [None]:
moisture_data_files[0:850]

In [None]:
xr.open_mfdataset(moisture_data_files[1:850],parallel=True, chunks={'latitude': 25, 'longitude': 25, 'time': -1}, )

In [None]:
xr.open_dataset(moisture_data_files[700],chunks={'latitude': 25, 'longitude': 25, 'time': -1})

In [None]:
moisture_data_dir = '/data/deluge/reanalysis/REANALYSIS/ERA5/3D/4xdaily/q/'
moisture_data_files = glob.glob(moisture_data_dir+'q.[12]*')

ds_q = xr.open_mfdataset(moisture_data_files[1:850],parallel=True, chunks={'latitude': 25, 'longitude': 25, 'time': -1})

q_data = ds_q['q'].sel(level=500, latitude = PERU_center['lat'], longitude = PERU_center['lon']) 



q_data = q_data.compute()

In [None]:
q_data = q_data.resample(time='1d').mean(dim='time').compute()

In [None]:
q_data.to_netcdf('~/data/q.1x.nc')

In [None]:
uwind_data_dir = '/data/deluge/reanalysis/REANALYSIS/ERA5/3D/4xdaily/uwnd/'
uwnd_files = glob.glob(uwind_data_dir+'uwnd.[12]*')

ds_uwnd = xr.open_mfdataset(uwnd_files,parallel=True, chunks={'latitude': 25, 'longitude': 25, 'time': -1}, decode_cf=False)

uwnd_data = ds_uwnd['uwnd'].sel(level=500, latitude = PERU_center['lat'], longitude = PERU_center['lon'])

uwnd_data = uwnd_data.resample(time='1d').mean(dim='time')





uwnd_data = uwnd_data.compute()



In [None]:
uwnd_files

In [None]:
uwnd_data.to_netcdf('~/data/uwnd_4x.nc')

In [None]:
vwind_data_dir = '/data/deluge/reanalysis/REANALYSIS/ERA5/3D/4xdaily/vwnd/'
vwnd_files = glob.glob(vwind_data_dir+'vwnd.[12]*')

ds_vwnd = xr.open_mfdataset(vwnd_files,parallel=True, chunks={'latitude': 25, 'longitude': 25, 'time': -1}, decode_cf=False)

vwnd_data = ds_vwnd['vwnd'].sel(level=500, latitude = PERU_center['lat'], longitude = PERU_center['lon']) 

vwnd_data = vwnd_data.compute()

In [None]:
vwnd_data.to_netcdf('~/data/vwnd_4x.nc')

In [None]:
q_data = xr.open_dataset('~/data/q.4x.nc', chunks={'time': -1})#.resample(time='1d').mean().compute()
uwnd_data = xr.open_dataset('~/data/uwnd_4x.nc', chunks={'time': -1})#.resample(time='1d').mean().compute()
vwnd_data = xr.open_dataset('~/data/vwnd_4x.nc', chunks={'time': -1})#.resample(time='1d').mean().compute()



In [None]:
uwnd_data = uwnd_data.sel(time=q_data['time'])
vwnd_data = vwnd_data.sel(time=q_data['time'])

In [None]:
uwnd_data

In [None]:
test = q_data['q'].resample(time='1d').mean(dim='time').compute()

In [None]:
# Extract variables
q = q_data['q']
u = uwnd_data['uwnd']
v = vwnd_data['vwnd']

# Calculate moisture fluxes
qu = q * u
qv = q * v



In [None]:
qv.compute()

In [None]:
# Calculate gradients
dqu_dx = qu.differentiate('longitude')
dqv_dy = qv.differentiate('latitude')

# Calculate moisture flux divergence
moisture_flux_divergence = dqu_dx + dqv_dy

# Interpret results
moisture_convergence = xr.where(moisture_flux_divergence < 0, moisture_flux_divergence, 0)
moisture_divergence = xr.where(moisture_flux_divergence > 0, moisture_flux_divergence, 0)

In [None]:
pp.plot_spatial_data(composites_early_demise,vmin=-50, vmax=50, cmap='RdBu_r', var='Geopotential Anomaly', title='Early Demise Composite')

In [None]:
pp.plot_spatial_data(composites_late_demise,vmin=-40, vmax=40, cmap='RdBu_r', var='Geopotential Anomaly', title='Late Demise Composite')

In [None]:


def create_composites(ds, dates, period):
    composites = {}
    composite_data = []
    for date in dates:
        start_date = date - pd.Timedelta(period,'w')
        end_date = date - pd.Timedelta(1,"d")
        
        subset = ds.sel(time=slice(start_date, end_date))
        
        composite = subset.mean(dim='time')
        composite_data.append(composite)
    
    composites = xr.concat(composite_data, dim='time')
    composites = composites.mean(dim='time')
    return composites

# Create composites for each time period
#composites_1week = create_composites(ds, dates, period='1w')
#composites_2weeks = create_composites(ds, dates, period='2w')
#composites_3weeks = create_composites(ds, dates, period='3w')
#composites_1month = create_composites(ds, dates, period='1M')

In [None]:
late_dates