Create a disdrometer dataset with a "days since precip" variable

In [1]:
import numpy as np
import xarray as xr
import pandas as pd
import datetime as dt
import pytz

from sublimpy import variables
from sublimpy import utils
from sublimpy import tidy

import matplotlib.pyplot as plt

import altair as alt
alt.data_transformers.enable('json')

DataTransformerRegistry.enable('json')

## Load disdrometer data

In [None]:
import act

In [None]:
# files = act.discovery.download_data(
#     os.getenv("ARM_USERNAME"),
#     os.getenv("ARM_TOKEN"),
#     'gucldM1.b1',
#     start_date,
#     end_date,
#     output='/data2/elilouis/sublimationofsnow/gucldM1.b1'
# )

import glob
files = glob.glob('/data2/elilouis/sublimationofsnow/gucldM1.b1/*.cdf')

In [None]:
disdro_ds = xr.open_mfdataset(files)['precip_rate']

In [None]:
disdro_ds

In [None]:
precip_rate_ds = disdro_ds.sel(time=slice("2022-11-30", "2023-04-10")).resample(time='5Min').mean()
precip_rate_ds

In [None]:
disdro_df = precip_rate_ds.to_dataframe().reset_index()
disdro_df = utils.modify_df_timezone(disdro_df, pytz.UTC, pytz.timezone('US/Mountain'))
disdro_df = disdro_df.set_index('time').loc[slice(start_date, end_date)]
# offset so the timestamp is in the middle, similar to SoS data
disdro_df.index = disdro_df.index + dt.timedelta(minutes=2, seconds=30)

In [None]:
disdro_df

In [None]:
disdro_df.to_parquet("precip_rate.parquet")

# Calculate "days since precip"

In [None]:
disdro_daily_max_precip_date_df = disdro_df.set_index(
    "time"
)[['precip_rate']].resample(
    "1440Min"
).max().reset_index()

disdro_daily_mean_precip_date_df = disdro_df.set_index(
    "time"
)[['precip_rate']].resample(
    "1440Min"
).max().reset_index()

s = disdro_daily_max_precip_date_df.groupby(disdro_daily_max_precip_date_df['precip_rate'].ne(0).cumsum())['time'].transform('first')
disdro_daily_max_precip_date_df['days_since_precip'] = (disdro_daily_max_precip_date_df['time'] - s).dt.days

In [None]:
daily_mean_precip_df = disdro_df.groupby("time")['precip_rate'].max()

daily_mean_precip_df = pd.DataFrame(daily_mean_precip_df.resample("1440Min").mean()*24/10) # resample to one day in cm/hr SWE

In [None]:
disdro_daily_max_precip_date_df['daily_precip (cm)'] =  daily_mean_precip_df['precip_rate'].values

## Cache Downloaded Disdrometer data

In [None]:
disdro_daily_max_precip_date_df.to_csv("disdro_daily_max_precip_date_df.csv")