# Import

In [2]:
import xarray as xr
import h5py
import numpy as np
import pandas as pd
import pylab as plt
from datetime import datetime , timedelta
from tqdm import tqdm

In [3]:
from matplotlib import colors
import cartopy
import cartopy.crs as ccrs
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import matplotlib.ticker as mticker
import matplotlib.colors
import matplotlib.pyplot as plt

# Func

In [4]:
def get_storm_information(storm_mask_3d, wind_speed ,soil_m, evi_season ,dates, hrs):
    """
    The function takes a 3d (spatiotemporal): storm mask, soil moisture, wind, and EVI datasets. 
    It calculates storm properties: source area, defined by first x hrs, storm coverage, and distance traveled
    the maximum wind speed, minimum soil moisture, and minimum EVI over the storm area.

    Parameters
    ----------
    storm_mask_3d : numpy.ndarray (time, lat , lon)
        A 3D (spatiotemporal) boolean mask with the same shape as the `soil_m`, `wind_speed`, and `evi_data` arrays, where
        True values represent the presence of the storm.

    wind_speed : numpy.ndarray (time, lat , lon)
        A 3D (spatiotemporal) array representing wind speed data.

    soil_m : numpy.ndarray (time, lat , lon)
        A 3D (spatiotemporal) array representing soil moisture data.

    evi_season : numpy.ndarray (time, lat , lon)
        A 3D (spatiotemporal) array representing Enhanced Vegetation Index (EVI) data.
    
    dates : numpy.ndarray
        A numpy array of datetime64 dates corresponding to each time step in the dataset.

    hrs : int
        An integer representing the number of hours to define a source area.


    Returns
    -------
    storm_source_2d : numpy.ndarray
        A 2D array representing the storm source area, as a slice of the original `storm_mask_3d` array. With shape (lat , lon)

    storm_coverage : numpy.ndarray
        A 2D array representing the storm coverage, indicating areas affected by the storm, as a slice of the original `storm_mask_3d` array. With shape (lat , lon).

    distance_traveled_km : float
        The distance traveled by the storm in kilometers. Defined as the distance from the centeriod of the source to the farthest point from it in the storm.
        
    duration : int
        The duration of the storm in hours.    
        
    start : numpy.datetime64
        The start date and time of the storm.

    end : numpy.datetime64
        The end date and time of the storm.

    wind_speed_storm_2d : numpy.ndarray
        A 2D array representing the maximum wind speed during the first x hours, as a slice of the `wind_speed` array. With shape (lat , lon).

    soil_m_storm_2d : numpy.ndarray
        A 2D array representing the minimum soil moisture during the first x hours, as a slice of the `soil_m` array. With shape (lat , lon).

    evi_storm_2d : numpy.ndarray
        A 2D array representing the minimum Enhanced Vegetation Index (EVI) during the first x hours, as a slice of the `evi_season` array. With shape (lat , lon).

    Notes
    ------
    The function takes the first `hrs` and applies to `storm_mask_3d` along the 0-th (time) axis to extract source area.
    It then uses the source area to extracts the `soil_m`, `wind_speed`, and `evi_season` arrays.

    """
    

    
    times = np.unique(np.nonzero(storm_mask_3d)[0])
    first_inds = times[:hrs]
    storm_source_2d = np.logical_or.reduce(storm_mask_3d[first_inds] , axis=0).astype(float)
    
    # storm properties
    start= dates[times[0]]
    end = dates[times[-1]]
    time_difference = end - start
    duration =  time_difference.astype(int)//60
    #duration = len(np.unique(np.nonzero(storm_mask_3d)[0]))
    storm_coverage = np.logical_or.reduce(storm , axis=0).astype(int)
    distance_traveled_km = calc_distance_center_to_edge( storm_source_2d ,  storm_coverage )
    
    # wind
    wind_speed_storm_3d = wind_speed.copy()
    wind_speed_storm_3d[~storm_mask_3d]=np.nan
    wind_speed_storm_3d=wind_speed_storm_3d[first_inds]
    wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
    
    # soil m
    soil_m_storm_3d = soil_m.copy()
    soil_m_storm_3d[~storm_mask_3d]=np.nan
    soil_m_storm_3d=soil_m_storm_3d[first_inds]
    soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
    
    #evi
    evi_storm_3d = evi_season.copy()
    evi_storm_3d[~storm_mask_3d]=np.nan
    evi_storm_3d= evi_storm_3d[first_inds]
    evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d

    return storm_source_2d , storm_coverage , distance_traveled_km , duration , start, end , wind_speed_storm_2d , soil_m_storm_2d , evi_storm_2d


In [5]:
def calc_distance_center_to_edge(source , full_area):
    """
    Calculates the distance from the center of a given area to the edge of the area, as well as the coordinates of the center.

    Args:
    source (ndarray): A 2D array containing a binary mask of the area to calculate the distance for.
    full_area (ndarray): A 2D array containing a binary mask of the full area that the source area is a part of.

    Returns:
    float: The maximum distance in km from the center of the source area to the edge of the full_area.
    """
    if np.sum(source)==0:
        return (np.nan, np.nan)
    y = int(np.median(np.nonzero(source)[0]))
    x = int(np.median(np.nonzero(source)[1]))
    # Find the coordinates of all 1s in the matrix
    coords = np.argwhere(full_area == 1)
    # Calculate the Euclidean distance between the specified 1 and the farthest 1
    distances = np.linalg.norm(coords - np.array([y, x]), axis=1)
    max_distance = np.max(distances)

    return max_distance*25

In [6]:
def fix_dates(dates, year, season):
    '''
    Adds the specified year to the dates.

    Parameters
    ----------
    dates : list
        A list of dates in string format.

    year : int
        The year to be added to the dates.

    season : str
        The season for which the dates are being fixed.

    Returns
    -------
    dates_ : numpy.ndarray
        An array of numpy.datetime64 objects with the fixed dates.

    Notes
    -----
    This function assumes that the input dates are in the format 'MM-DD'.
    If the season is 'DJF' (December-January-February), the function handles the year transition correctly.
    The output dates are in numpy.datetime64 format.
    '''
    dates = [d.decode() for d in dates]
    new_dates = []
    # Adds year to data
    if season == "DJF":
        for date in dates:
            if date.startswith('12'):
                new_date = date.replace('12-', str(int(year)-1)+'-12-')
                new_dates.append(new_date)
            elif date.startswith('01'):
                new_date = date.replace('01-', str(year)+'-01-')
                new_dates.append(new_date)
            else:
                new_date = date.replace('02-', str(year)+'-02-')
                new_dates.append(new_date)
    else:
        for date in dates:
            new_date = str(year) +'-'+ date
            new_dates.append(new_date)

            
    dates_ = np.array([np.datetime64(d) for d in new_dates])
    return dates_

In [7]:
def get_start_end(year, season):
    '''
    Returns the start and end dates for the specified year and season.

    Parameters
    ----------
    year : int
        The year for which the start and end dates are required.

    season : str
        The season for which the start and end dates are required.

    Returns
    -------
    start : pandas.Timestamp
        The start date of the specified season.

    end : pandas.Timestamp
        The end date of the specified season.

    Notes
    -----
    The start and end dates are returned as pandas.Timestamp objects.
    '''
    season_to_date_range = {
        "DJF": [pd.to_datetime(str(year-1)+ "-12"), pd.to_datetime(str(year)+ "-03")],  # December of previous year through February
        "MAM": [pd.to_datetime(str(year)+ "-03"), pd.to_datetime(str(year)+ "-06")],  # March through May
        "JJA": [pd.to_datetime(str(year)+ "-06"), pd.to_datetime(str(year)+ "-09")],  # June through August
        "SON": [pd.to_datetime(str(year)+ "-09"), pd.to_datetime(str(year)+ "-12")]  # September through November
    }
    # Get the corresponding date range for the input season
    start , end = season_to_date_range[season]
    return start , end

# Apply

In [8]:
smap=xr.open_dataset('D:/SMAP_Data_Processing/smap_8_day_resampled.nc')

In [9]:
evi_data=xr.open_dataset('D:/modis/modis_evi_2017_2022.nc')

In [10]:
evi_data

In [11]:
lons = evi_data.longitude.values
lats = evi_data.latitude.values

In [12]:
print("Current time:", datetime.now().strftime("%H:%M:%S"))
# List of years to consider
years = [2018,2019,2020,2021,2022]

# Loop over the years
for year in tqdm(years):
    print(year)
    # Loop over the seasons
    for season in ["DJF","MAM","JJA","SON"]:
        print(season)
        # Open the h5 file containing dust data for the current year and season, and read the dates
        hf = h5py.File("dust_" + str(year)+"_" + season + ".h5")
        dates= hf["dates"][:]
        hf.close()

        # Open the h5 file containing dust labels for the current year and season, and read the labels
        hf2= h5py.File("dust_"+ str(year)+ "_" + season+ "_Labels.h5")
        labels = hf2["labels"][:]
        hf2.close()

        # Adjust dates to match the year and season
        fixed_dates = fix_dates(dates , year , season)

        # Open the NetCDF file containing wind data, select data matching the adjusted dates, and transpose the array
        winds = xr.open_dataset('D:/ERA_wind/ERA_5_'+str(year)+'_'+season+'_resampled.nc')
        wind_speed = winds.sel(time=fixed_dates)
        wind_speed = wind_speed.wind_speed.values
        wind_speed = np.transpose(wind_speed, (2, 0, 1))

        # Determine start and end dates for the season, select and resample soil moisture data, and transpose the array
        start , end = get_start_end(year,season)
        sm_season = smap.sel(time=slice(start, end)).resample(time='1H').pad()
        sm_season = sm_season.sel(time = fixed_dates).soil_moisture.values
        sm_season =np.transpose(sm_season, (2, 0, 1))
        
        #
        evi_season = evi_data.sel(time=slice(start- timedelta(days=20), end +  timedelta(days=20))).resample(time='1H').nearest()
        evi_season = evi_season.sel(time = fixed_dates).EVI.values
        evi_season[evi_season<0]=np.nan
        
        #Check Shapes
        print( "Shapes",  labels.shape , sm_season.shape , wind_speed.shape)
        
        # Initialization of matrices to hold sum and count of wind speed and soil moisture for the current year and season
        storm_ids = []
        
        sources=[]
        coverages=[]
        distances = []
        durations = []
        starts=[]
        ends=[]
        
        ws_arrays = []
        sm_arrays = []
        evi_arrays =[]
        # Loop over the unique dust storm labels
        for i in np.unique(labels)[:-1]:
            # Create a mask for the current dust storm
            storm = labels==i

            # Calculate wind speed and soil moisture for the current dust storm
            storm_source_2d , storm_coverage , distance_traveled_km , storm_duration , start, end , wind_speed_storm_2d , soil_m_storm_2d , evi_storm_2d = get_storm_information(storm, wind_speed ,sm_season, evi_season ,fixed_dates, 3)
            # Update the sum and count matrices for wind speed
            storm_ids.append(i)
            sources.append(storm_source_2d)
            coverages.append(storm_coverage)
            distances.append(distance_traveled_km)
            durations.append(storm_duration)
            starts.append(start)
            ends.append(end)
            sm_arrays.append(soil_m_storm_2d)
            ws_arrays.append(wind_speed_storm_2d)
            evi_arrays.append(evi_storm_2d)
            
        # Loop finished
        
        
        ds = xr.Dataset(
        data_vars={
            "source": (("id", "lat", "lon"),  np.stack(sources) ),
            "coverage": (("id", "lat", "lon"), np.stack(coverages)),
            "wind_speed": (("id", "lat", "lon"),  np.stack(ws_arrays)),
            "soil_moisture": (("id", "lat", "lon"),  np.stack(sm_arrays)),
            "evi": (("id", "lat", "lon"),  np.stack(evi_arrays)),
            "distance": (("id"),  np.array(distances)),
            "duration": (("id"),  np.array(durations)),
            "start_time": (("id"),  np.array(starts)),
            "end_time": (("id"),  np.array(ends)),
            

        },
        coords={
            "storm_id": np.array(storm_ids).astype(int),
            "latitude": (("lat", "lon"), lats),
            "longitude": (("lat", "lon"), lons),
            },
        )
        
        ds.to_netcdf("storm_properties_" + season + "_"+ str(year) +".nc")

        # # Add season_year
        # season_year_list.append(season+"_"+str(year) )
        # print("Current time:", datetime.now().strftime("%H:%M:%S"))
        # print("\n \n")

Current time: 22:11:02


  0%|                                                                                            | 0/5 [00:00<?, ?it/s]

2018
DJF
Shapes (2150, 148, 357) (2150, 148, 357) (2150, 148, 357)


  wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
  soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
  evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d


MAM
Shapes (2205, 148, 357) (2205, 148, 357) (2205, 148, 357)


  wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
  soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
  evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d


JJA
Shapes (2196, 148, 357) (2196, 148, 357) (2196, 148, 357)


  wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
  soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
  evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d


SON
Shapes (2158, 148, 357) (2158, 148, 357) (2158, 148, 357)


  wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
  soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
  evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d
 20%|███████████████▌                                                              | 1/5 [2:13:18<8:53:14, 7998.60s/it]

2019
DJF
Shapes (2151, 148, 357) (2151, 148, 357) (2151, 148, 357)


  wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
  soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
  evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d


MAM
Shapes (2205, 148, 357) (2205, 148, 357) (2205, 148, 357)


  wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
  soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
  evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d


JJA
Shapes (2124, 148, 357) (2124, 148, 357) (2124, 148, 357)


  wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
  soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
  evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d


SON
Shapes (2181, 148, 357) (2181, 148, 357) (2181, 148, 357)


  wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
  soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
  evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d
 40%|███████████████████████████████▏                                              | 2/5 [4:16:38<6:22:19, 7646.52s/it]

2020
DJF
Shapes (2173, 148, 357) (2173, 148, 357) (2173, 148, 357)


  wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
  soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
  evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d


MAM
Shapes (2204, 148, 357) (2204, 148, 357) (2204, 148, 357)


  wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
  soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
  evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d


JJA
Shapes (2196, 148, 357) (2196, 148, 357) (2196, 148, 357)


  wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
  soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
  evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d


SON
Shapes (2170, 148, 357) (2170, 148, 357) (2170, 148, 357)


  wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
  soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
  evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d
 60%|██████████████████████████████████████████████▊                               | 3/5 [6:26:04<4:16:41, 7700.84s/it]

2021
DJF
Shapes (2154, 148, 357) (2154, 148, 357) (2154, 148, 357)


  wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
  soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
  evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d


MAM
Shapes (2189, 148, 357) (2189, 148, 357) (2189, 148, 357)


  wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
  soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
  evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d


JJA
Shapes (2204, 148, 357) (2204, 148, 357) (2204, 148, 357)


  wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
  soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
  evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d


SON
Shapes (2168, 148, 357) (2168, 148, 357) (2168, 148, 357)


  wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
  soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
  evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d
 80%|██████████████████████████████████████████████████████████████▍               | 4/5 [8:32:54<2:07:45, 7665.18s/it]

2022
DJF
Shapes (2140, 148, 357) (2140, 148, 357) (2140, 148, 357)


  wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
  soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
  evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d


MAM
Shapes (2205, 148, 357) (2205, 148, 357) (2205, 148, 357)


  wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
  soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
  evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d


JJA
Shapes (2195, 148, 357) (2195, 148, 357) (2195, 148, 357)


  wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
  soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
  evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d


SON
Shapes (2179, 148, 357) (2179, 148, 357) (2179, 148, 357)


  wind_speed_storm_2d = np.nanmax(wind_speed_storm_3d, axis=0) * storm_source_2d
  soil_m_storm_2d = np.nanmin(soil_m_storm_3d, axis=0) * storm_source_2d
  evi_storm_2d = np.nanmin(evi_storm_3d , axis=0) * storm_source_2d
100%|███████████████████████████████████████████████████████████████████████████████| 5/5 [10:50:30<00:00, 7806.06s/it]


# Plotting

In [None]:
ds['wind_speed_average'] = ds['wind_speed_sum'] / ds['wind_speed_count']
ds['soil_moisture_average'] = ds['soil_moisture_sum'] / ds['soil_moisture_count']

In [None]:
grouped = ds.groupby("time.month")
ds_mean = grouped.mean()

In [None]:
ds_sum = grouped.sum()

In [None]:
ds_mean

In [None]:
def cartopy_ax_2(crs, ax):
    """
    Returns a Cartopy axis with features such as land, ocean, coastline, and borders added to it.
    The axis is also configured with gridlines and formatted tick labels.
    Args:
    crs (cartopy.crs.CRS): The coordinate system of the axis.
    ax (matplotlib.axes.Axes): The axis to be modified.
    
    Returns:
    matplotlib.axes.Axes: The modified axis.
    """
    #ax = plt.axes(projection=crs)
    #ax.add_feature(cartopy.feature.LAND)
    ax.add_feature(cartopy.feature.OCEAN)
    ax.add_feature(cartopy.feature.COASTLINE,linewidth=0.5)
    ax.add_feature(cartopy.feature.BORDERS, linestyle=':',linewidth=0.3)
    return ax

In [None]:
crs = ccrs.PlateCarree()

In [None]:
bounds = (-11.625, 77.375, 5.625, 42.375)

In [None]:
plt.figure(figsize=(22, 15))
plt.suptitle("Hours of Dust Source" , y=0.9)
ax1 = plt.subplot(3, 2, 1, projection=crs)


#ax1 = cartopy_ax_2(crs, ax1)
ax1.add_feature(cartopy.feature.OCEAN)
ax1.add_feature(cartopy.feature.COASTLINE,linewidth=0.5)
ax1.add_feature(cartopy.feature.BORDERS, linestyle=':',linewidth=0.3)
ax1.imshow( ds_mean.sel(month=1).dust.values , transform=crs, extent=bounds)
ax1.set_title("DJF" , size= 14)
# gl = ax1.gridlines(draw_labels=True, linewidth=1, color="black", alpha=0, linestyle="--")
# gl.top_labels = gl.right_labels= False
# gl.bottom_labels = False 
# gl.yformatter = LATITUDE_FORMATTER
# gl.ylocator = mticker.FixedLocator([10, 20, 30, 40])


# ax2 = plt.subplot(3, 2, 2, projection=crs)
# ax2 = cartopy_ax_2(crs,ax2)
# ax2.set_extent(bounds, crs)
# ax2.imshow(ds_mean.sel(month=4).dust.values , transform=crs, extent=bounds, cmap=c_map, vmax=max_val )
# ax2.set_title("MAM", size= 14)


# ax3 = plt.subplot(3, 2, 3, projection=crs)
# ax3 = cartopy_ax_2(crs,ax3)
# ax3.set_extent(bounds, crs)
# ax3.set_title("JJA", size= 14)
# m = ax3.imshow(ds_mean.sel(month=7).dust.values , transform=crs, extent=bounds, cmap=c_map ,vmax=max_val )
# gl = ax3.gridlines(draw_labels=True, linewidth=1, color="black", alpha=0, linestyle="--")
# gl.top_labels = gl.right_labels = False
# gl.xformatter = LONGITUDE_FORMATTER
# gl.yformatter = LATITUDE_FORMATTER
# gl.xlocator = mticker.FixedLocator([0, 20,40,60]) #])40, 60, 80, -140, -120])
# gl.ylocator = mticker.FixedLocator([10, 20, 30, 40])
# #plt.colorbar(shrink = 0.67, extend="max")
# #
# ax4 = plt.subplot(3, 2, 4, projection=crs)
# ax4 = cartopy_ax_2(crs,ax4)
# ax4.set_extent(bounds, crs)
# ax4.set_title("SON", size= 14)
# ax4.imshow(ds_mean.sel(month=10).dust.values , transform=crs, extent=bounds, cmap=c_map ,vmax=max_val )
# gl = ax4.gridlines(draw_labels=True, linewidth=1, color="black", alpha=0, linestyle="--")
# gl.top_labels = gl.right_labels = False
# gl.xformatter = LONGITUDE_FORMATTER
# gl.left_labels = False 
# gl.xlocator = mticker.FixedLocator([0, 20,40,60]) #])40, 60, 80, -140, -120])



# cax = plt.subplot(3, 2, 5)#, projection=crs)
# cbar = plt.colorbar(m , shrink = 0.8, extend="max", cax=cax, orientation='horizontal')
# cbar.set_label('sum of source active hours', size= 14)
# # Set the position of the colorbar Axes object
# # Set the position of the colorbar Axes object


# plt.subplots_adjust(hspace=0.01, wspace=0.01)
# #plt.tight_layout()
# cax.set_position([0.235, 0.33, 0.54, 0.05])
# cax.set_aspect(2)

In [None]:
years = [2018,2019,2020,2021,2022]
ws_sum_list =[]
ws_count_list = []
sm_sum_list = []
sm_count_list = []
for year in years:
    for season in ["DJF","MAM","JJA","SON"]:
        # READ DUST
        hf = h5py.File("dust_" + str(year)+"_" + season + ".h5")
        dates= hf["dates"][:]
        hf.close()
        #
        hf2= h5py.File("dust_"+ str(year)+ "_" + season+ "_Labels.h5")
        labels = hf2["labels"][:]
        hf2.close()
        fixed_dates = fix_dates(dates , year , season)
        # Read winds
        winds = xr.open_dataset('D:/ERA_wind/ERA_5_'+year+'_'+season+'_resampled.nc')
        wind_speed = winds.sel(time=fixed_dates)
        wind_speed = wind_speed.wind_speed.values
        wind_speed = np.transpose(wind_speed, (2, 0, 1))
        #Read Soil_Moisture
        start , end = get_start_end(year,season)
        sm_season = smap.sel(time=slice(start, end)).resample(time='1H').pad()
        sm_season = sm_season.sel(time = fixed_dates).soil_moisture.values
        sm_season =np.transpose(sm_season, (2, 0, 1))
        # Calc data 
        wind_sum = np.zeros((148,357))
        wind_count = np.zeros((148,357))
        sm_sum = np.zeros((148,357))
        sm_count = np.zeros((148,357))
        
        for i in np.unique(labels)[:-1]:
            storm = labels==i
            ws,sm  = get_wind_sm_for_storm(storm, sm_season, wind_speed)
            #
            wind_sum = wind_sum + ws
            wind_count= wind_count + ((ws>0).astype(int))
            #
            if np.nansum(sm) >0:
                sms_sums = sms_sums + sm
                sms_counts= sms_counts + ((sm>0).astype(int))
        ws_sum_list.append(wind_sum)
        ws_count_list.append(wind_count)
        sm_sum_list.append(sm_sum)
        sm_count_list.append(sm_count)

# Test run

In [None]:
year = 2018
season = "DJF"

In [None]:
%%time
hf = h5py.File("dust_" + str(year)+"_" + season + ".h5")
dates= hf["dates"][:]
hf.close()
#
hf2= h5py.File("dust_"+ str(year)+ "_" + season+ "_Labels.h5")
denoised_labels = hf2["labels"][:]
hf2.close()

In [None]:
fixed_dates = fix_dates(dates,year , season)

In [None]:
fixed_dates

In [None]:
smap=xr.open_dataset('D:/SMAP_Data_Processing/smap_8_day_resampled.nc')

In [None]:
winds =xr.open_dataset('D:/ERA_wind/ERA_5_'+str(year)+'_'+season+'_resampled.nc')

In [None]:
evi=xr.open_dataset('D:/modis/modis_evi_2017_2022.nc')

In [None]:
smap

In [None]:
evi

In [None]:
start , end = get_start_end(year,season)


In [None]:
end

In [None]:
evi_season = evi.sel(time=slice(start- timedelta(days=20), end +  timedelta(days=20))).resample(time='1H').nearest()

In [None]:
evi_season

In [None]:
%%time
evi_season = evi_season.sel(time = fixed_dates).EVI.values

In [None]:
plt.imshow(evi_season[0])
plt.colorbar()

In [None]:
sm_season = smap.sel(time=slice(start, end)).resample(time='1H').pad()

In [None]:
%%time
sm_season = sm_season.sel(time = fixed_dates).soil_moisture.values

In [None]:
sm_season =np.transpose(sm_season, (2, 0, 1))

In [None]:
#np.unique(denoised_labels)

In [None]:
wind_speed = winds.sel(time=fixed_dates)

In [None]:
%%time
wind_speed = wind_speed.wind_speed.values

In [None]:
wind_speed = np.transpose(wind_speed, (2, 0, 1))

In [None]:
wind_speed.shape

In [None]:
denoised_labels.shape

In [None]:
#storm_mask_3d= (denoised_labels==27)

In [None]:
storm_ids = []

sources=[]
coverages=[]
distances = []
durations = []
starts=[]
ends=[]

ws_arrays = []
sm_arrays = []
evi_arrays =[]
# Loop over the unique dust storm labels
for i in np.unique(denoised_labels)[:-1]:
    # Create a mask for the current dust storm
    storm = denoised_labels==i

    # Calculate wind speed and soil moisture for the current dust storm
    storm_source_2d , storm_coverage , distance_traveled_km , storm_duration , start, end , wind_speed_storm_2d , soil_m_storm_2d , evi_storm_2d = get_storm_information(storm, wind_speed ,sm_season, evi_season ,fixed_dates, 3)
    # Update the sum and count matrices for wind speed
    storm_ids.append(i)
    sources.append(storm_source_2d)
    coverages.append(storm_coverage)
    distances.append(distance_traveled_km)
    durations.append(storm_duration)
    starts.append(start)
    ends.append(end)
    sm_arrays.append(soil_m_storm_2d)
    ws_arrays.append(wind_speed_storm_2d)
    evi_arrays.append(evi_storm_2d)

# Loop finished


ds = xr.Dataset(
data_vars={
    "source": (("id", "lat", "lon"),  np.stack(sources) ),
    "coverage": (("id", "lat", "lon"), np.stack(coverages)),
    "wind_speed": (("id", "lat", "lon"),  np.stack(ws_arrays)),
    "soil_moisture": (("id", "lat", "lon"),  np.stack(sm_arrays)),
    "evi": (("id", "lat", "lon"),  np.stack(evi_arrays)),
    "distance": (("id"),  np.array(distances)),
    "duration": (("id"),  np.array(durations)),
    "start_time": (("id"),  np.array(starts)),
    "end_time": (("id"),  np.array(ends)),


},
coords={
    "storm_id": np.array(storm_ids),
    "latitude": (("lat", "lon"), lats),
    "longitude": (("lat", "lon"), lons),
    },
)

ds.to_netcdf("storm_properties_" + season + "_"+ str(year) +".nc")

In [None]:
ds = xr.Dataset(
data_vars={
    "source": (("id", "lat", "lon"),  np.stack(sources) ),
    "coverage": (("id", "lat", "lon"), np.stack(coverages)),
    "wind_speed": (("id", "lat", "lon"),  np.stack(ws_arrays)),
    "soil_moisture": (("id", "lat", "lon"),  np.stack(sm_arrays)),
    "evi": (("id", "lat", "lon"),  np.stack(evi_arrays)),
    "distance": (("id"),  np.array(distances)),
    "duration": (("id"),  np.array(durations)),
    "start_time": (("id"),  np.array(starts)),
    "end_time": (("id"),  np.array(ends)),


},
coords={
    "storm_id": np.array(storm_ids).astype(int),
    "latitude": (("lat", "lon"), lats),
    "longitude": (("lat", "lon"), lons),
    },
)

ds.to_netcdf("storm_properties_" + season + "_"+ str(year) +".nc")

In [None]:
 np.array(starts)

In [None]:
ds

In [None]:
plt.imshow(sources[0])

In [None]:
plt.imshow(ws_arrays[0])
plt.colorbar()

In [None]:
%%time

wss_sums = np.zeros((148,357))
wss_counts = np.zeros((148,357))
sms_sums = np.zeros((148,357))
sms_counts = np.zeros((148,357))

sms = []
for i in np.unique(denoised_labels[:-1]):
    storm = denoised_labels==i
    ws,sm  = get_wind_sm_for_storm(storm, sm_season, wind_speed)
    #
    print(np.nansum(sm))
    plt.imshow(sm)
    plt.show()
    #

    plt.imshow(sms_sums)
    plt.show()
    #
    wss_sums = wss_sums + ws
    wss_counts= wss_counts + ((ws>0).astype(int))
    #
    if np.nansum(sm) >0:
        sms_sums = sms_sums + sm
        sms_counts= sms_counts + ((sm>0).astype(int))

In [None]:
np.unique(denoised_labels[:-1])

In [None]:
#for i in np.unique(denoised_labels[:-1]):
storm = denoised_labels==6
# first_inds = np.unique(np.nonzero(storm_mask_3d)[0])[:hrs]
# storm_mask_2d = np.logical_or.reduce(storm_mask_3d[first_inds] , axis=0).astype(float)

In [None]:
np.sum(storm)

In [None]:
start= fixed_dates[np.unique(np.nonzero(storm)[0])[0]]
end =  fixed_dates[800]

In [None]:
start , end

In [None]:
time_difference

In [None]:
fixed_dates[start]

In [None]:
duration = len(np.unique(np.nonzero(storm)[0]))

In [None]:
storm_coverage = np.logical_or.reduce(storm , axis=0)

In [None]:
storm_coverage.astype(int)

In [None]:
plt.imshow(storm_coverage.astype(int))

In [None]:
plt.imshow(np.sum(storm,axis=0))

In [None]:
end