In [1]:
import pandas as pd
import matplotlib.colors as mcolors
import matplotlib.ticker as ticker
import cartopy.crs as ccrs
from scipy.interpolate import griddata
from matplotlib.patches import Rectangle
import cartopy.feature as cfeature
import glob
from datetime import date
import os
import re
from datetime import datetime, timedelta
from haversine import haversine, Unit
import metpy
import metpy.constants as mpconst
import metpy.units as units
from scipy.interpolate import interp1d
from scipy.optimize import bisect
import numpy as np
import pickle
import numpy as np
import math
import pint
import xarray as xr
import netCDF4 as nc
from matplotlib.colors import TwoSlopeNorm
import re
import matplotlib.pyplot as plt
from numpy import load
from numpy import asarray
from numpy import save
import pytz
from scipy import stats
from sklearn.linear_model import LinearRegression
from collections import Counter
import pymannkendall as mk
from pydoc import help
import warnings
import seaborn as sns
%matplotlib inline

# Set display options to show all rows and columns in dataframe
# pd.set_option('display.max_rows', None)

# pd.set_option('display.max_columns', None)

# np.set_printoptions(threshold=np.inf)
warnings.filterwarnings("ignore")

In [27]:
with open('soundings_fig2&3.pdkl', 'rb') as file:
    
    soundings = pickle.load(file)

In [28]:
soundings[(soundings['APE']==True) & (soundings['wet_coupling']==True)].shape

(78, 27)

In [4]:
directory_path = "../../data/rong4/Data/ERA5/3hourly/quvw_US/specific_humidity"

def extract_year_and_month(filename):
    # Assuming the year is always after the ".specific_humidity." part
    
    parts = filename.split('.')
    
    for part in parts:
        
        if part.isdigit() and len(part) == 6:  # Check if it's a year
            
            year = int(part[0:4])
            
            month = int(part[4:6])
            
            return year, month
    
    return None, None

files = [
    
    os.path.join(directory_path, file)
    
    for file in os.listdir(directory_path)
    
    if file.endswith(".nc")
    
    and 2001 <= extract_year_and_month(file)[0] <= 2018
    
    and 5 <= extract_year_and_month(file)[1] <= 9
]

dfs = []

for ds in sorted(files):
    
    ds = xr.open_dataset(ds)
    
    dfs.append(ds)

In [5]:
all_q_data = []

level_min = 600

level_max = 800

_12lst = 18

_15lst = 21

for df in dfs:
    
    # Select the levels within the desired range
    df_level = df.sel(level=slice(level_min, level_max))
    
    # Filter for the specific time
    df_filtered = df_level.sel(time=df_level['time'].dt.hour == _12lst)
    
    # Extract time, latitude, and longitude data
    times = df_filtered['time'].data
    
    # Extract q values, which will now include multiple levels
    q_values = df_filtered['q'].data
    
    for t_idx in range(q_values.shape[0]):
        
        time_value = times[t_idx]
        
        #q_values[t_idx] has shape (6, 101, 241), code below calculates elementwise (per lat/lon) average of q in LT layer 
    
        q_per_time = np.mean(q_values[t_idx], axis=0)
        
        all_q_data.append((time_value, q_per_time))


In [6]:
#mean_lt_q is the average 800-600 hPa LT humidity
qdf = pd.DataFrame(all_q_data, columns=['utc','ltq'])

#converting to g/kg
qdf['ltq'] = qdf['ltq']*1000

#converting to LST to avoid confusion
qdf['lst'] = qdf['utc'] - timedelta(hours=6)

qdf['lst'] = pd.to_datetime(qdf['lst'])

qdf['date'] = qdf['lst'].dt.date

qdf.drop(columns=['utc'], inplace=True)

qdf = qdf.set_index('date')

In [7]:
lons = dfs[0]['longitude'].data

lats = dfs[0]['latitude'].data

lon_grid, lat_grid = np.meshgrid(lons, lats)

# Flatten the latitude and longitude grids
lat_flat = lat_grid.ravel()

lon_flat = lon_grid.ravel()

def reshape_df(df):
    # Explode the 'humidity' column to have each value in a separate row
    exploded_df = df.explode('ltq').reset_index(drop=True)
    
    # Now, explode each array in 'humidity' into separate rows (for each lat-lon point)
    exploded_df['ltq'] = exploded_df['ltq'].apply(lambda x: x.ravel())

    # Expand the DataFrame so that each row corresponds to a lat-lon point
    expanded_df = exploded_df.explode('ltq').reset_index(drop=True)

    # Assign lat and lon values
    expanded_df['latitude'] = np.tile(lat_flat, len(df))
    
    expanded_df['longitude'] = np.tile(lon_flat, len(df))
    
    return expanded_df

# Assuming 'df' is your DataFrame with a column named 'humidity'
qdf_reshaped = reshape_df(qdf)

In [8]:
qdf_reshaped['day'] = qdf_reshaped['lst'].dt.date

qdf_reshaped['month_day'] = qdf_reshaped['day'].apply(lambda x: x.strftime('%m-%d'))

In [9]:
#climatology calculation

clim_q = qdf_reshaped.groupby(['month_day', 'latitude', 'longitude']).agg({
    
    'ltq': ['mean', 'std', 'count'] 
        
}).reset_index()

clim_q.set_index('month_day', inplace=True)

qdf_reshaped.set_index('month_day', inplace=True)

In [10]:
#standardized anomaly calculation

clim_q.columns = ['_'.join(col).strip() if col[1] else col[0] for col in clim_q.columns.values]

anom_q = pd.merge(qdf_reshaped, clim_q, on=['month_day', 'latitude', 'longitude'], how='inner')

anom_q['q_stnd_anom'] = (anom_q['ltq'] - anom_q['ltq_mean'])/anom_q['ltq_std']

anom_q = anom_q.set_index('day')

In [29]:
q_anom_sound = pd.merge(anom_q, soundings, left_index=True, right_index=True, how='inner')

In [30]:
ltq_dryapes = q_anom_sound[(q_anom_sound['dry_coupling']==True) & (q_anom_sound['APE']==True)]

ltq_wetapes = q_anom_sound[(q_anom_sound['wet_coupling']==True) & (q_anom_sound['APE']==True)]

ltq_dryapes.index.name = 'day'

ltq_wetapes.index.name = 'day'

In [31]:
#changing shape so as easier to plot

ltq_dryapes = ltq_dryapes.drop(columns=['day'])

ltq_dryapes = ltq_dryapes.pivot_table(index='day', columns=['latitude', 'longitude'], values=['q_stnd_anom'])

ltq_dryapes.reset_index(inplace=True)

#flattening the MultiIndex column names

ltq_dryapes.columns = [''.join(map(str, col)) for col in ltq_dryapes.columns]

ltq_dryapes = ltq_dryapes.set_index('day')

In [32]:
#changing shape so as easier to plot

ltq_wetapes = ltq_wetapes.drop(columns=['day'])

ltq_wetapes = ltq_wetapes.pivot_table(index='day', columns=['latitude', 'longitude'], values=['q_stnd_anom'])

ltq_wetapes.reset_index(inplace=True)

#flattening the MultiIndex column names

ltq_wetapes.columns = [''.join(map(str, col)) for col in ltq_wetapes.columns]

ltq_wetapes = ltq_wetapes.set_index('day')

In [33]:
ltq_dryapes.shape[0], ltq_wetapes.shape[0]

(91, 78)

In [34]:
with open('ltq_wetapes.pdkl', 'wb') as f:
    
    pickle.dump(ltq_wetapes, f)
    
with open('ltq_dryapes.pdkl', 'wb') as f:
    
    pickle.dump(ltq_dryapes, f)