# Import

In [1]:
import xarray as xr
import numpy as np
import pandas as pd
from datetime import datetime
from tqdm import tqdm

# Functions

In [2]:
def calc_distance_center_to_edge(source , full_area):
    """
    Calculates the distance from the center of a given area to the edge of the area, as well as the coordinates of the center.

    Parameters:
    -----------
    source (ndarray): 
        A 2D array containing a binary mask of the area to calculate the distance for.
        full_area (ndarray): A 2D array containing a binary mask of the full area that the source area is a part of.

    Returns:
    -------
    float: 
        The maximum distance in km from the center of the source area to the edge of the full_area.

    Notes:
    ------
        Multiplied by 25 to convert from 0.25 degrees to Kilometers

    """
    if np.sum(source)==0:
        return (np.nan, np.nan)
    y = int(np.median(np.nonzero(source)[0]))
    x = int(np.median(np.nonzero(source)[1]))
    # Find the coordinates of all 1s in the matrix
    coords = np.argwhere(full_area == 1)
    # Calculate the Euclidean distance between the specified 1 and the farthest 1
    distances = np.linalg.norm(coords - np.array([y, x]), axis=1)
    max_distance = np.max(distances)

    return max_distance*25

In [3]:
def get_storm_information(storm_mask_3d, wind_speed , smap , evi  , dates , hrs):
    """
    Analyzes storm-related data from various spatiotemporal datasets. Determines the storm's source area, coverage, distance traveled, duration, and environmental parameters (wind speed, soil moisture, EVI) during the storm's initiation.

    Parameters
    ----------
    storm_mask_3d : numpy.ndarray
        3D boolean array (time, lat, lon) indicating storm presence.
    wind_speed : xarray.Dataset
        Xarray dataset containing 3D wind speed data (time, lat, lon).
    smap : xarray.Dataset
        Xarray dataset containing 3D soil moisture data (time, lat, lon).
    evi : xarray.Dataset
        Xarray dataset containing 3D Enhanced Vegetation Index (EVI) data (time, lat, lon).
    dates : numpy.ndarray
        Array of datetime64 objects for each time step.
    hrs : int
        Hours to determine the storm's source area.

    Returns
    -------
    storm_source_2d : numpy.ndarray
        2D array (lat, lon) of the storm's source area.
    storm_coverage : numpy.ndarray
        2D array (lat, lon) of the storm's coverage.
    distance_traveled_km : float
        Distance in kilometers the storm traveled.
    duration : int
        Duration of the storm in hours.
    start, end : numpy.datetime64
        Start and end datetime of the storm.
    wind_speed_storm_2d : numpy.ndarray
        2D array (lat, lon) of wind speed during the storm initiation.
    soil_m_storm_2d : numpy.ndarray
        2D array (lat, lon) of soil moisture during the storm initiation.
    evi_storm_2d : numpy.ndarray
        2D array (lat, lon) of EVI during the storm initiation.

    Notes
    -----
    The function focuses on the initial 'hrs' hours of the storm to determine its source and properties, then extends the analysis over the storm's entire duration.
    """
    

    
    times = np.unique(np.nonzero(storm_mask_3d)[0])
    first_inds = times[:hrs]
    storm_source_2d = np.logical_or.reduce(storm_mask_3d[first_inds] , axis=0) 
    
    # storm properties
    start= dates[times[0]]
    end = dates[times[-1]]
    time_difference = end.astype('datetime64[h]') - start.astype('datetime64[h]')
    duration =  time_difference.astype(int)
    storm_coverage = np.logical_or.reduce(storm_mask_3d , axis=0).astype(int)
    distance_traveled_km = calc_distance_center_to_edge( storm_source_2d ,  storm_coverage )
    
    # wind
    wind_speed = wind_speed.sel(time=slice(start, start + np.timedelta64(3, 'h'))).wind_speed.values.copy()
    wind_speed_storm_2d = storm_source_2d * np.nanmax(wind_speed ,axis=0) 
    wind_speed_storm_2d [~storm_source_2d]= np.nan
    # soil m
    soil_m = smap.sel(time = start.astype('datetime64[D]')).soil_moisture.values.copy()
    soil_m [~storm_source_2d]= np.nan
    
    #evi
    evi_day = evi.sel(time = start.astype('datetime64[D]')).evi.values.copy()
    evi_day [~storm_source_2d]= np.nan
    

    return storm_source_2d , storm_coverage , distance_traveled_km , duration , start, end , wind_speed_storm_2d , soil_m , evi_day


# Apply

In [4]:
smap=xr.open_dataset('DustSCAN_SMAP_2015_2022.nc')

In [5]:
smap_filled = smap.ffill(dim='time', limit=7) ## Forward fills up to 7 consecutive missing values along the time dimension in the smap data array.
full_time_range  = pd.date_range(start= smap_filled.time.values[0], end=smap_filled.time.values[-1], freq='D')
smap_filled = smap_filled.reindex(time=full_time_range)

In [6]:
evi =xr.open_dataset('DustSCAN_MODIS_EVI_2017_2022.nc')

In [7]:
evi_filled=evi.resample(time='1D').nearest() # Resamples the evi data array to a daily frequency, filling with the nearest values.

In [8]:
lons = evi.longitude.values
lats = evi.latitude.values

In [12]:
print("Current time:", datetime.now().strftime("%H:%M:%S"))
# List of years to consider
years = [2018, 2019, 2020, 2021, 2022 ]
ds_list=[]

ti = 3 # the number of hours used to define the source area of the plume
# Loop over the years
for year in tqdm(years):
    dust_scan = xr.open_dataset("DustSCAN_"+str(year)+".nc")
    labels = dust_scan["plume_id"].values
    dates= dust_scan["time"].values
    

    # Open the NetCDF file containing wind data, select data matching dust data dates
    era_5_wind = xr.open_dataset('DustSCAN_ERA5_'+str(year)+'.nc')
    era_5_wind['wind_speed'] = np.sqrt(era_5_wind['u_10']**2 + era_5_wind['v_10']**2)
    era_5_wind = era_5_wind.sel(time= dates)
 

    # Initialization of lists to hold values
    storm_ids = []
    sources=[]
    coverages=[]
    distances = []
    durations = []
    contributions =[]
    starts=[]
    ends=[]
    ws_arrays = []
    sm_arrays = []
    evi_arrays =[]
    
    # Loop over the unique dust storm labels, skipping the first which is 0 (no dust)
    for i in np.unique(labels)[1:]:
        # Create a mask for the current dust storm
        storm = labels==i
        # Calculate wind speed, soil moisture, and evi for the source area of the current dust storm.
        storm_source_2d , storm_coverage , distance_traveled_km , storm_duration , start, end , wind_speed_storm_2d , soil_m_storm_2d , evi_storm_2d = get_storm_information(storm, era_5_wind , smap_filled, evi_filled ,dates, ti)

        storm_ids.append(i)
        sources.append(storm_source_2d)
        coverages.append(storm_coverage)
        distances.append(distance_traveled_km)
        durations.append(storm_duration)
        starts.append(start)
        ends.append(end)
        sm_arrays.append(soil_m_storm_2d)
        ws_arrays.append(wind_speed_storm_2d)
        evi_arrays.append(evi_storm_2d)
        contributions.append(np.sum(storm))



    ds = xr.Dataset(
    data_vars={
        "source": (("plume_id", "lat", "lon"),  np.stack(sources) ),
        "coverage": (("plume_id", "lat", "lon"), np.stack(coverages)),
        "wind_speed": (("plume_id", "lat", "lon"),  np.stack(ws_arrays)),
        "soil_moisture": (("plume_id", "lat", "lon"),  np.stack(sm_arrays)),
        "evi": (("plume_id", "lat", "lon"),  np.stack(evi_arrays)),
        "distance": (("plume_id"),  np.array(distances)),
        "duration": (("plume_id"),  np.array(durations)),
        "contribution": (("plume_id"),  np.array(contributions)),
        "start_time": (("plume_id"),  np.array(starts)),
        "end_time": (("plume_id"),  np.array(ends)),


    },
    coords={
        "plume_id": np.array(storm_ids).astype(int),
        "latitude": (("lat", "lon"), lats),
        "longitude": (("lat", "lon"), lons),
        },
    )
    ds_list.append(ds)
ds = xr.concat(ds_list , dim="plume_id")

Current time: 18:31:07


  0%|          | 0/5 [00:00<?, ?it/s]

In [None]:
# Adding long names and descriptions
ds["source"].attrs["long_name"] = "Source Area"
ds["source"].attrs["description"] = "Source area of the plume. Defined as the area covered within the first " + str(ti) + " hours of the plume."

ds["coverage"].attrs["long_name"] = "Plume Coverage"
ds["coverage"].attrs["description"] = "The area covered by the plume. Reported as a binary map of all the pixels the plume reached."

ds["wind_speed"].attrs["long_name"] = "Source Wind Speed"
ds["wind_speed"].attrs["description"] = "Wind speed in the source area."

ds["soil_moisture"].attrs["long_name"] = "Soil Moisture"
ds["soil_moisture"].attrs["description"] = "Soil moisture in the source area."

ds["evi"].attrs["long_name"] = "Enhanced Vegetation Index"
ds["evi"].attrs["description"] = "Enhanced Vegetation Index in the source area."

ds["distance"].attrs["long_name"] = "Distance from Source"
ds["distance"].attrs["description"] = "Distance traveled by the plume. Calculated as the distance from the center to farthest point reached by the plume."

ds["duration"].attrs["long_name"] = "Plume Duration"
ds["duration"].attrs["description"] = "Duration of the plume in hours."

ds["contribution"].attrs["long_name"] = "Contribution"
ds["contribution"].attrs["description"] = "Sum of pixels identified during the lifetime of the plume."

ds["start_time"].attrs["long_name"] = "Start Time"
ds["start_time"].attrs["description"] = "Start time of the plume in UTC."

ds["end_time"].attrs["long_name"] = "End Time"
ds["end_time"].attrs["description"] = "End time of the plume in UTC."

ds["plume_id"].attrs["long_name"] = "Plume ID"
ds["plume_id"].attrs["description"] = "Unique identifier for each plume."


ds["latitude"].attrs = {
        'long_name': "Latitude",
        'units': "degrees_north",
        'description': "Latitude values",
    }

ds["longitude"].attrs = {
        'long_name': "Longitude",
        'units': "degrees_east",
        'description': "Longitude values",}


ds.attrs = {
'Conventions': 'CF-1.10',
'title' :'DustSCAN Plume Properties',
'description' :'SEVIRI derived dust plumes properties, integrated with SMAP soil moisture, MODIS Enhanced Vegetation Index, and ERA-5 wind speed.' ,
'source' :'DustSCAN: A Five Year (2018-2022) Hourly Dataset of Dust Plumes From SEVIRI.',
'version' : '1.0',
'reference' : 'doi: x',
'Authors' : 'Faisal AlNasser and Dara Entekahbi',
'history' : str(datetime.utcnow()) + ' Python',
}

In [None]:
ds.to_netcdf("DustSCAN_plume_properties.nc")