In [90]:
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import xarray as xr
from IPython.display import display

In [91]:
# load wildfire dataset
df = pd.read_csv("wildfires.csv")
print("loaded df")

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


loaded df


In [92]:
# define the dataset settings
mnet_datasets = [
    {
        'name': 'pdsi',
        'col': 'palmer_drought_severity_index',
        'ops': ['min', 'max', 'avg', 'count']
    }
]
# constant values
jan1st1900 = datetime(1900, 1, 1)
progress_count = 100

In [93]:
# main function for executing the different operations for each series
def execute_op(op, entries_df):
    if op == 'min':
        return entries_df.min()
    elif op == 'max':
        return entries_df.max()
    elif op == 'avg':
        return entries_df.mean()
    elif op == 'count':
        return len(entries_df)
    return np.nan

In [94]:
# process the data & output progress
years = df['FIRE_YEAR'].unique()
for y in years:
    # go through each dataset per year
    for _, ds_metadata in enumerate(mnet_datasets):
        mnet_ds_filename = 'mnet/%s_%d.nc' % (ds_metadata['name'], y)
        mnet_ds = xr.open_dataset(mnet_ds_filename, decode_cf=False)
        time_start = datetime.now().timestamp()
        processed_count, invalid_count = 0, 0
        
        df_y = df[df['FIRE_YEAR'] == y]
        print("opened up %s" % mnet_ds_filename)
        dh = display('opened up %s' % mnet_ds_filename, display_id=True)
        # loop thru rows 
        for index, row in df_y.iterrows():
            #print("> processing index %d for year %d and dataset %s" % (index, y, mnet_ds_filename))
            # date from year + day of year
            f_year, disc_doy, cont_doy, latitude, longitude = query['FIRE_YEAR'], row['DISCOVERY_DOY'], row['CONT_DOY'], row['LATITUDE'], row['LONGITUDE']
            if np.isnan(f_year) or np.isnan(disc_doy) or np.isnan(latitude) or np.isnan(longitude):
                invalid_count += 1
                print("\tskipping invalid index %d" % (index), f_year, disc_doy, cont_doy, latitude, longitude)
                continue
                
            d_start = datetime(f_year, 1, 1) + timedelta(days=np.float64(disc_doy))
            d_end = datetime(f_year, 1, 1) + (timedelta(days=np.float64(cont_doy)) if not np.isnan(cont_doy) else timedelta(days=np.float64(disc_doy)))
            delta_start = (d_start - jan1st1900).days
            delta_end = (d_end - jan1st1900).days
            lat = np.float64(latitude)
            lon = np.float64(longitude)
            # select relevant entries in mnet_dataset
            mnet_entries = mnet_ds.sel(day=[delta_start, delta_end], lon=lon, lat=lat, method="nearest")
            mnet_df = mnet_entries.to_dataframe()
            # compute operations for entries and store in augment_cols
            augment_cols = dict()
            for op in ds_metadata['ops']:
                col_key = "%s_%s" % (ds_metadata['name'], op)
                augment_cols[col_key] = execute_op(op, mnet_df[ds_metadata['col']])
                # update dataset with new column
                df.at[index, col_key] = augment_cols[col_key]
            processed_count += 1    
            #print("\tentry %d : %d mnet entries in %s: %s" % (index, mnet_df.shape[0], mnet_ds_filename, augment_cols))
            if processed_count % progress_count == 0:
                percentage = (processed_count + invalid_count) / df_y.shape[0]
                dh.update("[%d] '%s' dataset progress: %d / %d [%.2f] (%d invalids)" % (datetime.now().timestamp(), mnet_ds_filename, (processed_count + invalid_count), df_y.shape[0], percentage, invalid_count))
        time_end = datetime.now().timestamp()
        time_delta = time_end - time_start
        print("finished with %s, processed = %d, invalid = %d : took %d seconds" % (mnet_ds_filename, processed_count, invalid_count, time_delta))
    
output_f = "wildfire_augmented_%d.csv" % datetime.now().timestamp() 
print("done processing entries! output is %s" % output_f)
df.to_csv(output_f)

opened up mnet/pdsi_2005.nc


"[1606554369] 'mnet/pdsi_2005.nc' dataset progress: 600 / 88604 [0.01] (0 invalids)"

KeyboardInterrupt: 