In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import matplotlib.cm as cm
import matplotlib.colors as mcolors

# ERA5 data load and processing

In [None]:
# WIND SPEED U 10M
ds_u10m_2014 = xr.open_dataset('/home/gopika/Bela/GISE/uv_10m/uv_10m_2014.nc')
ds_u10m_2015 = xr.open_dataset('/home/gopika/Bela/GISE/uv_10m/uv_10m_2015.nc')
ds_u10m_2016 = xr.open_dataset('/home/gopika/Bela/GISE/uv_10m/uv_10m_2016.nc')
ds_u10m_2017 = xr.open_dataset('/home/gopika/Bela/GISE/uv_10m/uv_10m_2017.nc')
ds_u10m_2018 = xr.open_dataset('/home/gopika/Bela/GISE/uv_10m/uv_10m_2018.nc')
ds_u10m_2019 = xr.open_dataset('/home/gopika/Bela/GISE/uv_10m/uv_10m_2019.nc')
ds_u10m_2020 = xr.open_dataset('/home/gopika/Bela/GISE/uv_10m/uv_10m_2020.nc')
ds_u10m_2021 = xr.open_dataset('/home/gopika/Bela/GISE/uv_10m/uv_10m_2021.nc')
ds_u10m_2022 = xr.open_dataset('/home/gopika/Bela/GISE/uv_10m/uv_10m_2022.nc')
ds_u10m_2023 = xr.open_dataset('/home/gopika/Bela/GISE/uv_10m/uv_10m_2023.nc')

In [None]:
ds_zonal_2014 = ds_u10m_2014.u10
ds_zonal_2015 = ds_u10m_2015.u10
ds_zonal_2016 = ds_u10m_2016.u10
ds_zonal_2017 = ds_u10m_2017.u10
ds_zonal_2018 = ds_u10m_2018.u10
ds_zonal_2019 = ds_u10m_2019.u10
ds_zonal_2020 = ds_u10m_2020.u10
ds_zonal_2021 = ds_u10m_2021.u10
ds_zonal_2022 = ds_u10m_2022.u10
ds_zonal_2023 = ds_u10m_2023.u10

ds_merid_2014 = ds_u10m_2014.v10
ds_merid_2015 = ds_u10m_2015.v10
ds_merid_2016 = ds_u10m_2016.v10
ds_merid_2017 = ds_u10m_2017.v10
ds_merid_2018 = ds_u10m_2018.v10
ds_merid_2019 = ds_u10m_2019.v10
ds_merid_2020 = ds_u10m_2020.v10
ds_merid_2021 = ds_u10m_2021.v10
ds_merid_2022 = ds_u10m_2022.v10
ds_merid_2023 = ds_u10m_2023.v10

In [None]:
datasets = [ds_zonal_2014, ds_zonal_2015, ds_zonal_2016, ds_zonal_2017, ds_zonal_2018, ds_zonal_2019, ds_zonal_2020, ds_zonal_2021, ds_zonal_2022, ds_zonal_2023, ds_merid_2014, ds_merid_2015, ds_merid_2016, ds_merid_2017, ds_merid_2018, ds_merid_2019, ds_merid_2020, ds_merid_2021, ds_merid_2022, ds_merid_2023]
datasets_jai_u10 = []
datasets_jai_v10 = []
for i in range(0,10):
    ds_jai_point = datasets[i].sel(latitude=26.91, longitude=70.90, method = 'nearest')
    datasets_jai_u10.append(ds_jai_point)
for i in range(10,20):
    ds_jai_point = datasets[i].sel(latitude=26.91, longitude=70.90, method = 'nearest')
    datasets_jai_v10.append(ds_jai_point)


In [None]:
for i in range(len(datasets_jai_u10)):
    datasets_jai_u10[i]['valid_time'] = datasets_jai_u10[i]['valid_time'] + pd.Timedelta(hours=5, minutes=30)
for i in range(len(datasets_jai_v10)):
    datasets_jai_v10[i]['valid_time'] = datasets_jai_v10[i]['valid_time'] + pd.Timedelta(hours=5, minutes=30)

In [None]:
datasets_jai_v10[1]

In [None]:
datasets_jai_ws10=[]
for i in range(10):
    ws = (datasets_jai_u10[i]**2 + datasets_jai_v10[i]**2)**(1/2)
    datasets_jai_ws10.append(ws)

In [None]:
datasets_jai_ws10[1]

In [None]:
df_era5 = pd.DataFrame()

In [None]:
years = list(range(2014, 2024))

In [None]:
max_len = max(len(ds.values) for ds in datasets_jai_ws10)

for i, year in enumerate(years):
    vals = datasets_jai_ws10[i].values
    padded = np.pad(vals, (0, max_len - len(vals)), constant_values=np.nan)
    df_era5[f"{year}_ws"] = padded

In [None]:
df_era5

In [None]:
jumps = [1,2,3,4,5,6,7,8]

In [None]:
for year in years:
    for jump in jumps:
        df_era5[f'roc_ws_{year}_{jump}'] = df_era5[f'{year}_ws'].shift(-1*jump) - df_era5[f'{year}_ws']

In [None]:
df_era5

In [None]:
sequence = np.arange(0.5, 24, 1)
shifted = np.roll(sequence, -5)
df_era5["hours"] = np.resize(shifted, len(df_era5))

In [None]:
df_era5

# Obs data

In [None]:
df_obs = pd.read_excel("/home/gopika/Bela/GISE/JSM_SH_WS_2017_infused.xlsx", sheet_name="Sheet1")

In [None]:
hourly_means_df_obs = df_obs.select_dtypes(include='number').groupby(df_obs.reset_index().index // 4).mean()

In [None]:
for jump in jumps:
    hourly_means_df_obs[f'roc_ws_jump_{jump}'] = hourly_means_df_obs['WS'].shift(-1*jump) - hourly_means_df_obs['WS']

In [None]:
hourly_means_df_obs

# Plotting

In [None]:
months = ['January','February','March','April','May','June',
          'July','August','September','October','November','December']

days_nonleap = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
days_leap = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]


In [None]:
def freedman_diaconis_bins(data):
    data = np.asarray(data)
    q25, q75 = np.percentile(data, [25, 75])
    iqr = q75 - q25
    bin_width = 2 * iqr / (len(data) ** (1/3))
    if bin_width == 0:
        return 10  # fallback if data is uniform
    bins = int(np.ceil((data.max() - data.min()) / bin_width))
    return bins

### all years ERA5 for different jumps

In [None]:
for jump in jumps:
    for m in range(len(months)):
        plt.figure(figsize=(10,6))
        cmap = cm.rainbow
        norm = mcolors.Normalize(vmin=min(years), vmax=max(years))
        high_ramps_hour_lists = []
        for year in years:
            if year==2016 or year==2020:
                days = days_leap
            else:
                days = days_nonleap
            strow = sum(days[:m])*24
            endrow = sum(days[:m+1])*24
            era5_data = df_era5[f'roc_ws_{year}_{jump}'][strow:endrow].dropna()
            n_bins = freedman_diaconis_bins(era5_data)
            #print(n_bins)
            
            # Define bin edges
            bins = np.linspace(era5_data.min(),era5_data.max(),n_bins)
            
            # Compute histogram counts (not densities)
            era5_counts, bin_edges = np.histogram(era5_data, bins=bins)
            
            # Convert counts to probabilities (i.e., normalize to sum = 1)
            era5_probs = era5_counts / era5_counts.sum()
    
            # Compute bin centers for plotting
            bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
    
            # Plotting as line plot
            plt.plot(bin_centers, era5_probs, label=f'{year}', color=cmap(norm(year)), linewidth=2, marker='o', alpha=0.5)
    
            #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
            # Compute mean and std for ERA5 ramps (December)
            era5_mean = era5_data.mean()
            era5_std  = era5_data.std()
    
            # Define standard deviation flanks
            era5_lower = era5_mean - era5_std
            era5_upper = era5_mean + era5_std

            # Apply the dynamic thresholding based on ±1σ
            era5_slice = df_era5.iloc[strow:endrow]  # subset for that month/year

            high_ramps = era5_slice['hours'][
                (era5_slice[f'roc_ws_{year}_{jump}'] < era5_lower) |
                (era5_slice[f'roc_ws_{year}_{jump}'] > era5_upper)
            ].tolist()
            
            high_ramps_hour_lists.append(high_ramps)
            #print(year) #%%%%%% r e g u l a r c h e c k
        

         # # # # # # # 
        strow = sum(days[:m])*24
        endrow = sum(days[:m+1])*24
        era5_data = df_era5[f'roc_ws_2017_{jump}'][strow:endrow].dropna()
        n_bins = freedman_diaconis_bins(era5_data)
        #print(n_bins)
        
        # Define bin edges
        bins = np.linspace(era5_data.min(),era5_data.max(),n_bins)
        
        # Compute histogram counts (not densities)
        era5_counts, bin_edges = np.histogram(era5_data, bins=bins)
        
        # Convert counts to probabilities (i.e., normalize to sum = 1)
        era5_probs = era5_counts / era5_counts.sum()

        # Compute bin centers for plotting
        bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2

        # Plotting as line plot
        plt.plot(bin_centers, era5_probs, color='black', linewidth=2, linestyle='--', marker='o', ms=8, alpha=0.6)
        
        # OBSERVATIONAL PLOT
        strow = sum(days_nonleap[:m])*24
        endrow = sum(days_nonleap[:m+1])*24
        obs_data = hourly_means_df_obs[f'roc_ws_jump_{jump}'][strow:endrow].dropna()
        n_bins = freedman_diaconis_bins(obs_data)
        #print(n_bins)
        
        # Define bin edges
        bins = np.linspace(obs_data.min(),obs_data.max(),n_bins)
        
        # Compute histogram counts (not densities)
        obs_counts, bin_edges = np.histogram(obs_data, bins=bins)
        
        # Convert counts to probabilities (i.e., normalize to sum = 1)
        obs_probs = obs_counts / obs_counts.sum()

        # Compute bin centers for plotting
        bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2

        # Plotting as line plot
        plt.plot(bin_centers, obs_probs, label='Obs, 2017', color='black', linewidth=2, marker='d', markerfacecolor='none',ms=8,markeredgewidth=2)

        # Plot settings
        plt.xlabel(f'Wind speed ramp (m/s) over {jump}h', fontsize=15)
        plt.ylabel('Kernel Density', fontsize=15)
        plt.title(f'{months[m]}', fontsize=15)
        plt.xticks(fontsize=15)
        plt.yticks(fontsize=15)
        plt.xlim(-10,10)
        plt.ylim(-0.01, 0.175)
        #plt.legend(fontsize=12)
        plt.grid(True)
        plt.savefig(f"/home/gopika/Bela/GISE/dpi_paper_plots/wind_ramps/ramps_{months[m]}_jump_{jump}_pdf.png", dpi=600, bbox_inches = 'tight')

        #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

        # Compute mean and stddev for OBSV ramps
        obs_mean = obs_data.mean()
        obs_std  = obs_data.std()

        # Define standard deviation flanks
        obs_lower = obs_mean - obs_std
        obs_upper = obs_mean + obs_std

        # Apply the dynamic thresholding based on ±1σ
        obs_slice = hourly_means_df_obs.iloc[strow:endrow]  # subset for that month/year

        high_ramps = obs_slice['roundedhr'][
            (obs_slice[f'roc_ws_jump_{jump}'] < obs_lower) |
            (obs_slice[f'roc_ws_jump_{jump}'] > obs_upper)
        ].tolist()

        
        plt.figure(figsize=(10,6))
        cmap = cm.rainbow
        norm = mcolors.Normalize(vmin=min(years), vmax=max(years))
        for i in range(len(years)):
            hourlist = np.array(high_ramps_hour_lists[i])
            bins = np.arange(0, 25, 2)  # 24 bins for 24 hours
            era5_counts, bin_edges = np.histogram(hourlist, bins=bins)
            era5_probs = era5_counts / era5_counts.sum()
            bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
            plt.plot(bin_centers, era5_probs, label=f'{years[i]}', color=cmap(norm(years[i])), linewidth=1, marker='o', alpha=0.5)

        p=3
        hourlist = np.array(high_ramps_hour_lists[p])
        bins = np.arange(0, 25, 2)  # 24 bins for 24 hours
        era5_counts, bin_edges = np.histogram(hourlist, bins=bins)
        era5_probs = era5_counts / era5_counts.sum()
        bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
        plt.plot(bin_centers, era5_probs, label=f'{years[p]}', color='black', linewidth=2, linestyle='--', marker='o', ms=8, alpha=0.6)
        
        
        hourlist = np.array(high_ramps)
        obs_counts, bin_edges = np.histogram(hourlist, bins=bins)
        obs_probs = obs_counts / obs_counts.sum()
        bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
        plt.plot(bin_centers, obs_probs, label='Obs, 2017', color='black', linewidth=2, marker='d', markerfacecolor='none',ms=10,markeredgewidth=2)
        
        plt.xlabel('Start time (hour of day)', fontsize=15)
        plt.ylabel('Kernel Density', fontsize=15)
        plt.title(f'High Wind Ramps (±1σ) over {jump}h, {months[m]}', fontsize=15)
        plt.xticks(range(0, 25, 2), fontsize=15)  # Show 0–24 ticks
        plt.yticks(fontsize=15)
        plt.ylim(0,0.25)
        #plt.legend(fontsize=12)
        plt.grid(True)
        plt.savefig(f"/home/gopika/Bela/GISE/dpi_paper_plots/wind_ramps/hours_{months[m]}_jump_{jump}_pdf.png", dpi=600, bbox_inches = 'tight')
        #print(months[m]) #%%%%%% r e g u l a r c h e c k

        


## common binning

In [None]:
def freedman_diaconis_binwidth(data):
    data = np.asarray(data)
    data = data[~np.isnan(data)]
    q25, q75 = np.nanpercentile(data, [25, 75])
    iqr = q75 - q25
    bin_width = 2 * iqr / (len(data) ** (1/3))
    if bin_width == 0:
        return 10  # fallback if data is uniform
    #bins = int(np.ceil((data.max() - data.min()) / bin_width))
    return bin_width

In [None]:
for jump in jumps:
    for m in range(len(months)):
        plt.figure(figsize=(10,6))
        cmap = cm.rainbow
        norm = mcolors.Normalize(vmin=min(years), vmax=max(years))
        high_ramps_hour_lists = []
        bin_widths = []
        era5_data_years = []
        for year in years:
            if year==2016 or year==2020:
                days = days_leap
            else:
                days = days_nonleap
            strow = sum(days[:m])*24
            endrow = sum(days[:m+1])*24
            era5_data = df_era5[f'roc_ws_{year}_{jump}'][strow:endrow].dropna()
            era5_data_years.append(era5_data)
            bin_width = freedman_diaconis_binwidth(era5_data)
            bin_widths.append(bin_width)
        # determinig bin_width for observational data
        strow = sum(days_nonleap[:m])*24
        endrow = sum(days_nonleap[:m+1])*24
        obs_data = hourly_means_df_obs[f'roc_ws_jump_{jump}'][strow:endrow].dropna()
        bin_width = freedman_diaconis_binwidth(obs_data)
        bin_widths.append(bin_width)

        # what is the bin width for this month?
        month_bin_width = min(bin_widths)
        # computing month bins
        month_data_years = era5_data_years + [obs_data]
        pooled_min = min(np.nanmin(d) for d in month_data_years)
        pooled_max = max(np.nanmax(d) for d in month_data_years)

        n_bins = int(np.ceil((pooled_max - pooled_min) / month_bin_width))

        bins = np.linspace(pooled_min, pooled_max, n_bins)

        z=0
        # plotting
        for era5_data in era5_data_years:
            
            # Compute histogram counts (not densities)
            era5_counts, bin_edges = np.histogram(era5_data, bins=bins)
            
            # Convert counts to probabilities (i.e., normalize to sum = 1)
            era5_probs = era5_counts / era5_counts.sum()
    
            # Compute bin centers for plotting
            bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
    
            # Plotting as line plot
            plt.plot(bin_centers, era5_probs, label=f'{years[z]}', color=cmap(norm(years[z])), linewidth=2, marker='o', alpha=0.5)
    
            #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
            # Compute mean and std for ERA5 ramps (December)
            era5_mean = era5_data.mean()
            era5_std  = era5_data.std()
    
            # Define standard deviation flanks
            era5_lower = era5_mean - era5_std
            era5_upper = era5_mean + era5_std

            # Apply the dynamic thresholding based on ±1σ
            era5_slice = df_era5.iloc[strow:endrow]  # subset for that month/year

            high_ramps = era5_slice['hours'][
                (era5_slice[f'roc_ws_{year}_{jump}'] < era5_lower) |
                (era5_slice[f'roc_ws_{year}_{jump}'] > era5_upper)
            ].tolist()
            
            high_ramps_hour_lists.append(high_ramps)
            #print(year) #%%%%%% r e g u l a r c h e c k
            z=z+1

        # # # # # # # 

        era5_data_2017 = era5_data_years[3]
        
        # Compute histogram counts (not densities)
        era5_counts, bin_edges = np.histogram(era5_data_2017, bins=bins)
        
        # Convert counts to probabilities (i.e., normalize to sum = 1)
        era5_probs = era5_counts / era5_counts.sum()

        # Compute bin centers for plotting
        bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2

        # Plotting as line plot
        plt.plot(bin_centers, era5_probs, color='black', linewidth=2, linestyle='--', marker='o', ms=8, alpha=0.6)
       
        # OBSERVATIONAL PLOT
        
        # Compute histogram counts (not densities)
        obs_counts, bin_edges = np.histogram(obs_data, bins=bins)
        
        # Convert counts to probabilities (i.e., normalize to sum = 1)
        obs_probs = obs_counts / obs_counts.sum()

        # Compute bin centers for plotting
        bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2

        # Plotting as line plot
        plt.plot(bin_centers, obs_probs, label='Obs, 2017', color='black', linewidth=2, marker='d', markerfacecolor='none',ms=8,markeredgewidth=2)

        # Plot settings
        plt.xlabel(f'Wind speed ramp (m/s) over {jump}h', fontsize=15)
        plt.ylabel('Kernel Density', fontsize=15)
        plt.title(f'{months[m]}', fontsize=15)
        plt.xticks(fontsize=15)
        plt.yticks(fontsize=15)
        plt.xlim(-10,10)
        plt.ylim(-0.01, 0.175)
        #plt.legend(fontsize=12)
        plt.grid(True)
        plt.savefig(f"/home/gopika/Bela/GISE/dpi_paper_plots/wind_ramps/common_bins/ramps_{months[m]}_jump_{jump}_pdf.png", dpi=600, bbox_inches = 'tight')

        #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

        # Compute mean and stddev for OBSV ramps
        obs_mean = obs_data.mean()
        obs_std  = obs_data.std()

        # Define standard deviation flanks
        obs_lower = obs_mean - obs_std
        obs_upper = obs_mean + obs_std

        # Apply the dynamic thresholding based on ±1σ
        obs_slice = hourly_means_df_obs.iloc[strow:endrow]  # subset for that month/year

        high_ramps = obs_slice['roundedhr'][
            (obs_slice[f'roc_ws_jump_{jump}'] < obs_lower) |
            (obs_slice[f'roc_ws_jump_{jump}'] > obs_upper)
        ].tolist()

        
        plt.figure(figsize=(10,6))
        cmap = cm.rainbow
        norm = mcolors.Normalize(vmin=min(years), vmax=max(years))
        for i in range(len(years)):
            hourlist = np.array(high_ramps_hour_lists[i])
            bins = np.arange(0, 25, 2)  # 24 bins for 24 hours
            era5_counts, bin_edges = np.histogram(hourlist, bins=bins)
            era5_probs = era5_counts / era5_counts.sum()
            bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
            plt.plot(bin_centers, era5_probs, label=f'{years[i]}', color=cmap(norm(years[i])), linewidth=1, marker='o', alpha=0.5)

        p=3
        hourlist = np.array(high_ramps_hour_lists[p])
        bins = np.arange(0, 25, 2)  # 24 bins for 24 hours
        era5_counts, bin_edges = np.histogram(hourlist, bins=bins)
        era5_probs = era5_counts / era5_counts.sum()
        bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
        plt.plot(bin_centers, era5_probs, label=f'{years[p]}', color='black', linewidth=2, linestyle='--', marker='o', ms=8, alpha=0.6)
        
        
        hourlist = np.array(high_ramps)
        obs_counts, bin_edges = np.histogram(hourlist, bins=bins)
        obs_probs = obs_counts / obs_counts.sum()
        bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
        plt.plot(bin_centers, obs_probs, label='Obs, 2017', color='black', linewidth=2, marker='d', markerfacecolor='none',ms=10,markeredgewidth=2)
        
        plt.xlabel('Start time (hour of day)', fontsize=15)
        plt.ylabel('Kernel Density', fontsize=15)
        plt.title(f'High Wind Ramps (±1σ) over {jump}h, {months[m]}', fontsize=15)
        plt.xticks(range(0, 25, 2), fontsize=15)  # Show 0–24 ticks
        plt.yticks(fontsize=15)
        plt.ylim(0,0.25)
        #plt.legend(fontsize=12)
        plt.grid(True)
        plt.savefig(f"/home/gopika/Bela/GISE/dpi_paper_plots/wind_ramps/common_bins/hours_{months[m]}_jump_{jump}_pdf.png", dpi=600, bbox_inches = 'tight')
        #print(months[m]) #%%%%%% r e g u l a r c h e c k

        


### combining many different ramp durations

### trial

In [None]:
np.random.seed(0)
data1 = np.random.rand(8,20) * 100   # frequencies (for colors)
data2 = np.random.rand(8,20)         # probabilities (for contours)

# Create figure
plt.figure(figsize=(12,6))

# Heatmap from data1
im = plt.imshow(data1, aspect='auto', cmap='viridis', origin='lower')

# Add colorbar for heatmap
plt.colorbar(im, label="Frequency")

# Overlay contours from data2
X, Y = np.meshgrid(np.arange(data2.shape[1]), np.arange(data2.shape[0]))
contours = plt.contour(X, Y, data2, colors='white', linewidths=1.2)

# Optional: add labels to contour lines
plt.clabel(contours, inline=True, fontsize=8, fmt="%.2f")

# Axis labels and title
plt.xlabel("Intensity bin index")
plt.ylabel("Ramp duration index")
plt.title("Heatmap (frequencies) with Contours (probabilities)")

plt.show()


In [None]:
year = 2017
month_min_bw_era5 = []
month_max_bw_era5 = []
month_min_bw_obs = []
month_max_bw_obs = []
bins_era5_more = []
bins_era5_less = []
bins_obs_more = []
bins_obs_less = []
for m in range(len(months)):
    era5_data_jumps = []
    obs_data_jumps = []
    bin_widths_era5 = []
    bin_widths_obs = []
    for jump in jumps:
        strow = sum(days_nonleap[:m])*24
        endrow = sum(days_nonleap[:m+1])*24
        
        era5_data = df_era5[f'roc_ws_{year}_{jump}'][strow:endrow].dropna()
        era5_data_jumps.append(era5_data)
        bin_width_jump = freedman_diaconis_binwidth(era5_data)
        bin_widths_era5.append(bin_width_jump)

        obs_data = hourly_means_df_obs[f'roc_ws_jump_{jump}'][strow:endrow].dropna()
        obs_data_jumps.append(obs_data)
        bin_width_jump = freedman_diaconis_binwidth(obs_data)
        bin_widths_obs.append(bin_width_jump)
        
    print(f"era5 {months[m]} {bin_widths_era5}")
    print(f"obs {months[m]} {bin_widths_obs}")
    
    month_min_bw_era5.append(min(bin_widths_era5))
    month_max_bw_era5.append(max(bin_widths_era5))
    nbins_era5_more = int(20/min(bin_widths_era5))
    nbins_era5_less = int(20/max(bin_widths_era5))
    bins_era5_more.append(np.linspace(-5, 5, nbins_era5_more))
    bins_era5_less.append(np.linspace(-5, 5, nbins_era5_less))
    
    month_min_bw_obs.append(min(bin_widths_obs))
    month_max_bw_obs.append(max(bin_widths_obs))
    nbins_obs_more = int(20/min(bin_widths_obs))
    nbins_obs_less = int(20/max(bin_widths_obs))
    bins_obs_more.append(np.linspace(-5, 5, nbins_obs_more))
    bins_obs_less.append(np.linspace(-5, 5, nbins_obs_less))

print(f"bin_era5_more {bins_era5_more}")
print(f"bin_era5_less {bins_era5_less}")
print(f"bin_obs_more {bins_obs_more}")
print(f"bin_obs_less {bins_obs_less}")

print(len(bins_era5_more[1]))
print(len(bins_era5_less[1]))
print(len(bins_obs_more[1]))
print(len(bins_obs_less[1]))

In [None]:
year = 2017
for m in range(len(months)):
    month_jumps_countlist_era5 = []
    month_jumps_problist_era5 = []
    month_jumps_countlist_obs = []
    month_jumps_problist_obs = []
    for jump in jumps:
        strow = sum(days_nonleap[:m])*24
        endrow = sum(days_nonleap[:m+1])*24
        
        era5_data = df_era5[f'roc_ws_{year}_{jump}'][strow:endrow].dropna()
        era5_counts, bin_edges = np.histogram(era5_data, bins=bins_era5_more[m]) # MORE
        era5_probs = era5_counts / era5_counts.sum()
        #bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
        month_jumps_countlist_era5.append(era5_counts)
        month_jumps_problist_era5.append(era5_probs)

        obs_data = hourly_means_df_obs[f'roc_ws_jump_{jump}'][strow:endrow].dropna()
        obs_counts, bin_edges = np.histogram(obs_data, bins=bins_obs_more[m])
        obs_probs = obs_counts / obs_counts.sum()
        #bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
        month_jumps_countlist_obs.append(obs_counts)
        month_jumps_problist_obs.append(obs_probs)
        
    # ERA5 heatmap
    data1 = np.array(month_jumps_countlist_era5)
    data2 = np.array(month_jumps_problist_era5)
    bin_centres = (bins_era5_more[m][:-1] + bins_era5_more[m][1:]) / 2
    # plot
    plt.figure(figsize=(18,3))
    im = plt.imshow(data1, aspect='auto', cmap='cubehelix_r', origin='lower',
                extent=[min(bin_centres), max(bin_centres), 0.5, 8.5])
    plt.colorbar(im, label="Frequency", pad=0.01)
    for y in np.arange(1.5, 8, 1):
        plt.hlines(y, xmin=min(bin_centres), xmax=max(bin_centres), colors='gray', linestyles='-', linewidth=0.9, alpha=0.7)
    for x in np.arange(-4, 5, 1):
        plt.vlines(x, ymin=0.5, ymax=8.5, colors='gray', linestyles='-', linewidth=0.9, alpha=0.7)
    X, Y = np.meshgrid(bin_centres, np.arange(1,9))
    contours = plt.contour(X, Y, data2, colors='black', linewidths=1.2)
    plt.clabel(contours, inline=True, fontsize=8, fmt="%.3f")
    plt.xlabel("Wind ramp intensity (m/s)")
    plt.ylabel("Ramp duration (hours)")
    plt.title(f"{months[m]} - ERA5")
    plt.savefig(f"/home/gopika/Bela/GISE/dpi_paper_plots/wind_ramps/heatmap_contours/finer_ramps_{months[m]}_era5.png", dpi=300)

    # OBSV heatmap
    data1 = np.array(month_jumps_countlist_obs)
    data2 = np.array(month_jumps_problist_obs)
    bin_centres = (bins_obs_more[m][:-1] + bins_obs_more[m][1:]) / 2 # MORE
    # plot
    plt.figure(figsize=(18,3))
    im = plt.imshow(data1, aspect='auto', cmap='cubehelix_r', origin='lower',
                extent=[min(bin_centres), max(bin_centres), 0.5, 8.5])
    plt.colorbar(im, label="Frequency", pad=0.01)
    for y in np.arange(1.5, 8, 1):
        plt.hlines(y, xmin=min(bin_centres), xmax=max(bin_centres),colors='gray', linestyles='-', linewidth=0.9, alpha=0.7)
    for x in np.arange(-4, 5, 1):
        plt.vlines(x, ymin=0.5, ymax=8.5, colors='gray', linestyles='-', linewidth=0.9, alpha=0.7)
    X, Y = np.meshgrid(bin_centres, np.arange(1,9))
    contours = plt.contour(X, Y, data2, colors='black', linewidths=1.2)
    plt.clabel(contours, inline=True, fontsize=8, fmt="%.3f")
    plt.xlabel("Wind ramp intensity (m/s)")
    plt.ylabel("Ramp duration (hours)")
    plt.title(f"{months[m]} - Observations")
    plt.savefig(f"/home/gopika/Bela/GISE/dpi_paper_plots/wind_ramps/heatmap_contours/finer_ramps_{months[m]}_obs.png", dpi=300)

In [None]:
month_jumps_problist_obs

In [None]:
year = 2017
for m in range(len(months)):
    month_jumps_countlist_era5 = []
    month_jumps_problist_era5 = []
    month_jumps_countlist_obs = []
    month_jumps_problist_obs = []
    for jump in jumps:
        strow = sum(days_nonleap[:m])*24
        endrow = sum(days_nonleap[:m+1])*24
        
        era5_data = df_era5[f'roc_ws_{year}_{jump}'][strow:endrow].dropna()
        era5_counts, bin_edges = np.histogram(era5_data, bins=bins_era5_less[m]) # MORE
        era5_probs = era5_counts / era5_counts.sum()
        #bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
        month_jumps_countlist_era5.append(era5_counts)
        month_jumps_problist_era5.append(era5_probs)

        obs_data = hourly_means_df_obs[f'roc_ws_jump_{jump}'][strow:endrow].dropna()
        obs_counts, bin_edges = np.histogram(obs_data, bins=bins_obs_less[m]) # MORE
        obs_probs = obs_counts / obs_counts.sum()
        #bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
        month_jumps_countlist_obs.append(obs_counts)
        month_jumps_problist_obs.append(obs_probs)
        
    # ERA5 heatmap
    data1 = np.array(month_jumps_countlist_era5)
    data2 = np.array(month_jumps_problist_era5)
    bin_centres = (bins_era5_less[m][:-1] + bins_era5_less[m][1:]) / 2 # MORE
    # plot
    plt.figure(figsize=(18,3))
    im = plt.imshow(data1, aspect='auto', cmap='terrain_r', origin='lower',
                extent=[min(bin_centres), max(bin_centres), 0.5, 8.5])
    plt.colorbar(im, label="Frequency", pad=0.01)
    for y in np.arange(1.5, 8, 1):
        plt.hlines(y, xmin=min(bin_centres), xmax=max(bin_centres), colors='gray', linestyles='-', linewidth=0.9, alpha=0.7)
    for x in np.arange(-4, 5, 1):
        plt.vlines(x, ymin=0.5, ymax=8.5, colors='gray', linestyles='-', linewidth=0.9, alpha=0.7)
    X, Y = np.meshgrid(bin_centres, np.arange(1,9))
    contours = plt.contour(X, Y, data2, colors='black', linewidths=1.2)
    plt.clabel(contours, inline=True, fontsize=8, fmt="%.3f")
    plt.xlabel("Wind ramp intensity (m/s)")
    plt.ylabel("Ramp duration (hours)")
    plt.title(f"{months[m]} - ERA5")
    plt.savefig(f"/home/gopika/Bela/GISE/dpi_paper_plots/wind_ramps/heatmap_contours/coarser_ramps_{months[m]}_era5.png", dpi=300)

    # OBSV heatmap
    data1 = np.array(month_jumps_countlist_obs)
    data2 = np.array(month_jumps_problist_obs)
    bin_centres = (bins_obs_less[m][:-1] + bins_obs_less[m][1:]) / 2 # MORE
    # plot
    plt.figure(figsize=(18,3))
    im = plt.imshow(data1, aspect='auto', cmap='terrain_r', origin='lower',
                extent=[min(bin_centres), max(bin_centres), 0.5, 8.5])
    plt.colorbar(im, label="Frequency", pad=0.01)
    for y in np.arange(1.5, 8, 1):
        plt.hlines(y, xmin=min(bin_centres), xmax=max(bin_centres),colors='gray', linestyles='-', linewidth=0.9, alpha=0.7)
    for x in np.arange(-4, 5, 1):
        plt.vlines(x, ymin=0.5, ymax=8.5, colors='gray', linestyles='-', linewidth=0.9, alpha=0.7)
    X, Y = np.meshgrid(bin_centres, np.arange(1,9))
    contours = plt.contour(X, Y, data2, colors='black', linewidths=1.2)
    plt.clabel(contours, inline=True, fontsize=8, fmt="%.3f")
    plt.xlabel("Wind ramp intensity (m/s)")
    plt.ylabel("Ramp duration (hours)")
    plt.title(f"{months[m]} - Observations")
    plt.savefig(f"/home/gopika/Bela/GISE/dpi_paper_plots/wind_ramps/heatmap_contours/coarser_ramps_{months[m]}_obs.png", dpi=300)

### when are these ramps most probable to occur?

In [None]:
hour_bins = np.arange(0, 25, 2)  # 24 bins for 24 hours
bin_centres = (hour_bins[:-1] + hour_bins[1:]) / 2
bin_centres

In [None]:
for m in range(len(months)):
    era5_highramp_hrcounts = []
    era5_highramp_hrprobs = []
    obs_highramp_hrcounts = []
    obs_highramp_hrprobs = []
    for jump in jumps:
        strow = sum(days_nonleap[:m])*24
        endrow = sum(days_nonleap[:m+1])*24

        era5_data = df_era5[f'roc_ws_{year}_{jump}'][strow:endrow].dropna()
        era5_mean = era5_data.mean()
        era5_std  = era5_data.std()
        # Define standard deviation flanks
        era5_lower = era5_mean - era5_std
        era5_upper = era5_mean + era5_std
        # Apply the dynamic thresholding based on ±1σ
        era5_slice = df_era5.iloc[strow:endrow]
        high_ramps = era5_slice['hours'][
            (era5_slice[f'roc_ws_{year}_{jump}'] < era5_lower) |
            (era5_slice[f'roc_ws_{year}_{jump}'] > era5_upper)
        ].tolist()
        hourlist = np.array(high_ramps)
        era5_counts, bin_edges = np.histogram(hourlist, bins=hour_bins)
        era5_probs = era5_counts / era5_counts.sum()
        era5_highramp_hrcounts.append(era5_counts)
        era5_highramp_hrprobs.append(era5_probs)

        obs_data = hourly_means_df_obs[f'roc_ws_jump_{jump}'][strow:endrow].dropna()
        obs_mean = obs_data.mean()
        obs_std  = obs_data.std()
        # Define standard deviation flanks
        obs_lower = obs_mean - obs_std
        obs_upper = obs_mean + obs_std
        # Apply the dynamic thresholding based on ±1σ
        obs_slice = hourly_means_df_obs.iloc[strow:endrow]
        high_ramps = obs_slice['roundedhr'][
            (obs_slice[f'roc_ws_jump_{jump}'] < obs_lower) |
            (obs_slice[f'roc_ws_jump_{jump}'] > obs_upper)
        ].tolist()
        hourlist = np.array(high_ramps)
        obs_counts, bin_edges = np.histogram(hourlist, bins=hour_bins)
        obs_probs = obs_counts / obs_counts.sum()
        obs_highramp_hrcounts.append(obs_counts)
        obs_highramp_hrprobs.append(obs_probs)
    
    # ERA5 heatmap
    data1 = np.array(era5_highramp_hrcounts)
    data2 = np.array(era5_highramp_hrprobs)
    # plot
    plt.figure(figsize=(6,3))
    im = plt.imshow(data1, aspect='auto', cmap='terrain_r', origin='lower',
                extent=[min(bin_centres), max(bin_centres), 0.5, 8.5])
    plt.colorbar(im, label="Frequency", pad=0.03)
    for y in np.arange(1.5, 8, 1):
        plt.hlines(y, xmin=min(bin_centres), xmax=max(bin_centres), colors='gray', linestyles='-', linewidth=0.9, alpha=0.7)
    #for x in bin_centres:
        plt.vlines(x, ymin=0.5, ymax=8.5, colors='gray', linestyles='-', linewidth=0.9, alpha=0.7)
    X, Y = np.meshgrid(bin_centres, np.arange(1,9))
    contours = plt.contour(X, Y, data2, colors='black', linewidths=1.2)
    plt.clabel(contours, inline=True, fontsize=8, fmt="%.3f")
    plt.xlabel("Hour of day")
    plt.ylabel("Ramp duration (hours)")
    plt.title(f"{months[m]} - ERA5")
    plt.savefig(f"/home/gopika/Bela/GISE/dpi_paper_plots/wind_ramps/heatmap_contours/hours_{months[m]}_era5.png", dpi=300)

    # OBSV heatmap
    data1 = np.array(obs_highramp_hrcounts)
    data2 = np.array(obs_highramp_hrprobs)
    # plot
    plt.figure(figsize=(6,3))
    im = plt.imshow(data1, aspect='auto', cmap='terrain_r', origin='lower',
                extent=[min(bin_centres), max(bin_centres), 0.5, 8.5])
    plt.colorbar(im, label="Frequency", pad=0.03)
    for y in np.arange(1.5, 8, 1):
        plt.hlines(y, xmin=min(bin_centres), xmax=max(bin_centres),colors='gray', linestyles='-', linewidth=0.9, alpha=0.7)
    #for x in bin_centres:
        plt.vlines(x, ymin=0.5, ymax=8.5, colors='gray', linestyles='-', linewidth=0.9, alpha=0.7)
    X, Y = np.meshgrid(bin_centres, np.arange(1,9))
    contours = plt.contour(X, Y, data2, colors='black', linewidths=1.2)
    plt.clabel(contours, inline=True, fontsize=8, fmt="%.3f")
    plt.xlabel("Hour of day")
    plt.ylabel("Ramp duration (hours)")
    plt.title(f"{months[m]} - Observations")
    plt.savefig(f"/home/gopika/Bela/GISE/dpi_paper_plots/wind_ramps/heatmap_contours/hours_{months[m]}_obs.png", dpi=300)
