# Overview
This notebook examines SOS Kettle Ponds surface data for a number of case study dates. It provides contextual and surface information for the doppler lidar data analyzed in other notebooks.

In [1]:
import pandas as pd
import numpy as np
import altair as alt
alt.data_transformers.enable('json')

from sublimpy import turbulence, tidy, utils
import matplotlib.pyplot as plt
import metpy.constants
import datetime as dt
import pytz

# Open data

## Open SOS Measurement Dataset

In [2]:
ls ../ | grep parquet

In [3]:
start_date = '20221130'
end_date = '20230619'
# tidy_data_fn = f'../paper1/tidy_df_{start_date}_{end_date}_noplanar_fit_clean.parquet'
tidy_data_fn = f'../paper1/tidy_df_{start_date}_{end_date}_noplanar_fit.parquet'
# open files
tidy_df = pd.read_parquet(tidy_data_fn)
# convert time column to datetime
tidy_df['time'] = pd.to_datetime(tidy_df['time'])
# limit data to our dates of interest, based on continuous snow cover at Kettle Ponds
tidy_df = tidy_df.set_index('time').sort_index().loc[start_date:end_date].reset_index()

tidy_df = utils.modify_df_timezone(tidy_df, pytz.UTC, 'US/Mountain')

# Get just a little SAIL sebs data

In [4]:
import act
USERNAME = os.getenv("ARM_USERNAME")
TOKEN = os.getenv("ARM_TOKEN")
files = act.discovery.download_arm_data(
    USERNAME, 
    TOKEN, 
    "gucsebsS3.b1", 
    "20230609", 
    "20230610", 
    output="/Users/elischwat/Development/data/sublimationofsnow/gucsebsS3.b1/"
)

[DOWNLOADING] gucsebsS3.b1.20230609.000000.cdf
[DOWNLOADING] gucsebsS3.b1.20230610.000000.cdf

If you use these data to prepare a publication, please cite:

Sullivan, R., Keeler, E., Pal, S., & Kyrouac, J. Surface Energy Balance System
(SEBS). Atmospheric Radiation Measurement (ARM) User Facility.
https://doi.org/10.5439/1984921



In [5]:
ds = act.io.read_arm_netcdf(files)
sail_df = ds[[
    'down_short_hemisp',
    'up_short_hemisp',
    'down_long',
    'up_long'
]].to_dataframe().reset_index()
sail_df = utils.modify_df_timezone(sail_df, 'UTC', 'US/Mountain')
sail_df = sail_df[sail_df.time.dt.day == 9]

In [7]:
case_study_src = tidy_df[tidy_df.time.dt.date.isin([
    # dt.date(2023, 3, 3),
    dt.date(2023, 4, 16),
    # dt.date(2023, 5, 2),
    dt.date(2023, 6, 9),
    # dt.date(2023, 6, 11)
])]
case_study_src['date'] = case_study_src['time'].dt.date.astype('str')
case_study_src['time_no_date'] = case_study_src['time'].apply(lambda dt: dt.replace(
    year = 2023,
    month = 1,
    day = 1
))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  case_study_src['date'] = case_study_src['time'].dt.date.astype('str')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  case_study_src['time_no_date'] = case_study_src['time'].apply(lambda dt: dt.replace(


# Plot time series

In [8]:
import pysolar.solar

# Gothic, CO
latitude_deg = 38.965703
longitude_deg = -106.993591

# Seattle
# latitude_deg = 47.619781
# longitude_deg = -122.298188

In [9]:
ideal_rad_df = pd.concat([
    pd.DataFrame({
        'time': list(case_study_src.query("date == '2023-03-03'").time.unique()),
        'rad': [
            pysolar.solar.radiation.get_radiation_direct(
                date.tz_localize('US/Mountain'), 
                pysolar.solar.get_altitude(latitude_deg, longitude_deg, date.tz_localize('US/Mountain'))
            )
            for date in list(case_study_src.query("date == '2023-03-03'").time.unique())
        ]
    }),
    pd.DataFrame({
        'time': list(case_study_src.query("date == '2023-05-02'").time.unique()),
        'rad': [
            pysolar.solar.radiation.get_radiation_direct(
                date.tz_localize('US/Mountain'), 
                pysolar.solar.get_altitude(latitude_deg, longitude_deg, date.tz_localize('US/Mountain'))
            )
            for date in list(case_study_src.query("date == '2023-05-02'").time.unique())
        ]
    }),
    pd.DataFrame({
        'time': list(case_study_src.query("date == '2023-06-09'").time.unique()),
        'rad': [
            pysolar.solar.radiation.get_radiation_direct(
                date.tz_localize('US/Mountain'), 
                pysolar.solar.get_altitude(latitude_deg, longitude_deg, date.tz_localize('US/Mountain'))
            )
            for date in list(case_study_src.query("date == '2023-06-09'").time.unique())
        ]
    })
])
ideal_rad_df['date'] = ideal_rad_df['time'].dt.date.astype('str')
ideal_rad_df['time_no_date'] = ideal_rad_df['time'].apply(lambda dt: dt.replace(
    year = 2023,
    month = 1,
    day = 1
))
alt.Chart(
    ideal_rad_df
).mark_line(strokeDash=[4,2]).encode(
    alt.X("time_no_date:T"),
    alt.Y("rad:Q"),
    alt.Color("date:N")
).properties(width = 500) + alt.Chart(
    case_study_src[
        case_study_src.variable == 'Rsw_in_9m_d'
    ]
).mark_line().encode(
    alt.X("time_no_date:T"),
    alt.Y("value:Q"),
    alt.Color("date:N")
).properties(width = 500)

  ideal_rad_df = pd.concat([


For the 6/9 case study, substitute in SAIL DEBS data because the SOS-KPS radiometers are missing lots of data on that date

In [10]:
case_study_src_w_sail_subs = case_study_src.copy(deep=True)

case_study_src_w_sail_subs.loc[
    (case_study_src_w_sail_subs.variable == 'Rsw_out_9m_d') & 
    (case_study_src_w_sail_subs.date == '2023-06-09'),
    'value'
] = sail_df['up_short_hemisp'].values
case_study_src_w_sail_subs.loc[
    (case_study_src_w_sail_subs.variable == 'Rsw_in_9m_d') & 
    (case_study_src_w_sail_subs.date == '2023-06-09'),
    'value'
] = sail_df['down_short_hemisp'].values

case_study_src_w_sail_subs.loc[
    (case_study_src_w_sail_subs.variable == 'Rlw_out_9m_d') & 
    (case_study_src_w_sail_subs.date == '2023-06-09'),
    'value'
] = sail_df['up_long'].values
case_study_src_w_sail_subs.loc[
    (case_study_src_w_sail_subs.variable == 'Rlw_in_9m_d') & 
    (case_study_src_w_sail_subs.date == '2023-06-09'),
    'value'
] = sail_df['down_long'].values

net_rad = case_study_src_w_sail_subs[(case_study_src_w_sail_subs.date == '2023-06-09')].query("variable == 'Rlw_in_9m_d'")['value'].values +\
case_study_src_w_sail_subs[(case_study_src_w_sail_subs.date == '2023-06-09')].query("variable == 'Rsw_in_9m_d'")['value'].values -\
case_study_src_w_sail_subs[(case_study_src_w_sail_subs.date == '2023-06-09')].query("variable == 'Rlw_out_9m_d'")['value'].values -\
case_study_src_w_sail_subs[(case_study_src_w_sail_subs.date == '2023-06-09')].query("variable == 'Rsw_out_9m_d'")['value'].values
case_study_src_w_sail_subs.loc[
    (case_study_src_w_sail_subs.variable == 'Rnet_9m_d') & 
    (case_study_src_w_sail_subs.date == '2023-06-09'),
    'value'
] = net_rad

In [11]:

sw_plot = alt.Chart(
    case_study_src_w_sail_subs[
        case_study_src_w_sail_subs.variable.isin(['Rsw_out_9m_d', 'Rsw_in_9m_d'])
    ]
).mark_line().encode(
    alt.X("time_no_date:T"),
    alt.Y("value:Q"),
    alt.Color("date:N"),
    alt.StrokeDash('variable:N')
).properties(width = 300, height = 150)

lw_plot = alt.Chart(
    case_study_src_w_sail_subs[
        case_study_src_w_sail_subs.variable.isin(['Rlw_out_9m_d', 'Rlw_in_9m_d'])
    ]
).mark_line().encode(
    alt.X("time_no_date:T"),
    alt.Y("value:Q"),
    alt.Color("date:N"),
    alt.StrokeDash('variable:N')
).properties(width = 300, height = 150)

net_plot = alt.Chart(
    case_study_src_w_sail_subs[
        case_study_src_w_sail_subs.variable == 'Rnet_9m_d'
    ]
).mark_line().encode(
    alt.X("time_no_date:T").title("time"),
    alt.Y("value:Q").title("Net radiation (w/m^2)"),
    alt.Color("date:N"),
    # alt.StrokeDash('variable:N')
).properties(width = 300, height = 150)

(sw_plot | lw_plot | net_plot).resolve_scale(strokeDash='independent', 
                                            #  color='independent'
                                             ).configure_legend(orient='top')

In [37]:
(
    alt.Chart(
        case_study_src_w_sail_subs[
            case_study_src_w_sail_subs.variable == 'Rnet_9m_d'
        ]
    ).mark_line().encode(
        alt.X("time_no_date:T").axis(format='%H').title("time of day"),
        alt.Y("value:Q").title("Net radiation (w/m^2)"),
        alt.Color("date:N"),
        # alt.StrokeDash('variable:N')
    ).properties(width = 175, height = 175) |\
    alt.Chart(
        case_study_src[
            case_study_src.variable == 'dir_10m_c'
        ]
    ).mark_circle(size=50).encode(
        alt.X("time_no_date:T").axis(format='%H').title("time of day"),
        alt.Y("value:Q").title("Wind direction (˚)"),
        alt.Color("date:N")
    ).properties(width = 175, height = 175) |\
    alt.Chart(
        case_study_src[case_study_src.variable == 'temp_gradient_3m_c']
    ).mark_line().encode(
        alt.X("time_no_date:T").axis(format='%H').title("time of day"),
        alt.Y("value:Q").title("Temperature gradient (˚C/m)"),
        alt.Color("date:N")
    ).properties(width = 175, height = 175)
).configure_legend(orient='top', columns=1)

In [13]:
alt.Chart(
    case_study_src[
        case_study_src.variable.isin(['Ri_3m_c', 'temp_gradient_3m_c']) 
    ]
).mark_line().encode(
    alt.X("time_no_date:T"),
    alt.Y("value:Q"),
    alt.Row("measurement:N"),
    alt.Color("date:N")
).properties(width = 500)

# Plot vertical profiles

In [14]:
profiles_src = case_study_src[
    case_study_src.measurement.isin(['potential temperature', 'surface potential temperature'])
].query("tower == 'c'")
profiles_src = profiles_src.set_index('time').groupby([
    pd.Grouper(freq='120Min'),
    'date',
    'variable',
    'height'
])[['value']].mean()
profiles_src = profiles_src.reset_index()
profiles_src.loc[:, 'time_of_day_str'] = profiles_src['time'].dt.strftime('%H%M')
profiles_src['time_category'] = pd.cut(
    profiles_src['time'].dt.hour,
    [-0.5, 6.5, 14.5, 23.5],
    labels = ['morning (0000-0600)', 'day (0700-1500)', 'night (1600-2300)']
)

In [15]:
alt.Chart(
    profiles_src
).mark_line().encode(
    alt.X("value:Q").sort('-y'),
    alt.Y("height:Q"),
    alt.Color("time_of_day_str:O").scale(scheme='rainbow'),
    alt.Column('time_category:O').sort(['morning', 'day', 'night']).title(None),
    alt.Row('date:O'),
    tooltip="time_of_day_str:O"
).properties(
    width = 150, height = 150
)

In [16]:
profiles_src = case_study_src[
    case_study_src.measurement.isin(['wind speed'])
].query("tower == 'c'")
profiles_src = profiles_src.set_index('time').groupby([
    pd.Grouper(freq='120Min'),
    'date',
    'variable',
    'height'
])[['value']].mean()
profiles_src = profiles_src.reset_index()
profiles_src.loc[:, 'time_of_day_str'] = profiles_src['time'].dt.strftime('%H%M')
profiles_src['time_category'] = pd.cut(
    profiles_src['time'].dt.hour,
    [-0.5, 6.5, 14.5, 23.5],
    labels = ['morning (0000-0600)', 'day (0700-1500)', 'night (1600-2300)']
)

In [17]:
alt.Chart(
    profiles_src
).mark_line().encode(
    alt.X("value:Q").sort('-y'),
    alt.Y("height:Q"),
    alt.Color("time_of_day_str:O").scale(scheme='rainbow'),
    alt.Column('time_category:O').sort(['morning', 'day', 'night']).title(None),
    alt.Row('date:O'),
    tooltip="time_of_day_str:O"
).properties(
    width = 150, height = 150
)

In [18]:
profiles_src = case_study_src[
    case_study_src.measurement.isin(['wind direction'])
].query("tower == 'c'")
profiles_src = profiles_src.set_index('time').groupby([
    pd.Grouper(freq='120Min'),
    'date',
    'variable',
    'height'
])[['value']].mean()
profiles_src = profiles_src.reset_index()
profiles_src.loc[:, 'time_of_day_str'] = profiles_src['time'].dt.strftime('%H%M')
profiles_src['time_category'] = pd.cut(
    profiles_src['time'].dt.hour,
    [-0.5, 6.5, 14.5, 23.5],
    labels = ['morning (0000-0600)', 'day (0700-1500)', 'night (1600-2300)']
)

In [19]:
alt.Chart(
    profiles_src
).mark_line().encode(
    alt.X("value:Q").sort('-y'),
    alt.Y("height:Q"),
    alt.Color("time_of_day_str:O").scale(scheme='rainbow'),
    alt.Column('time_category:O').sort(['morning', 'day', 'night']).title(None),
    alt.Row('date:O'),
    tooltip="time_of_day_str:O"
).properties(
    width = 150, height = 150
)

In [20]:
profiles_src = case_study_src[
    case_study_src.variable.isin([
        'w_tc__2m_c',
        'w_tc__3m_c',
        'w_tc__5m_c',
        'w_tc__10m_c',
        'w_tc__15m_c',
        'w_tc__20m_c',
    ])
].query("tower == 'c'")
profiles_src = profiles_src.set_index('time').groupby([
    pd.Grouper(freq='120Min'),
    'date',
    'variable',
    'height'
])[['value']].mean()
profiles_src = profiles_src.reset_index()
profiles_src.loc[:, 'time_of_day_str'] = profiles_src['time'].dt.strftime('%H%M')
profiles_src['time_category'] = pd.cut(
    profiles_src['time'].dt.hour,
    [-0.5, 6.5, 14.5, 23.5],
    labels = ['morning (0000-0600)', 'day (0700-1500)', 'night (1600-2300)']
)

In [21]:
alt.Chart(
    profiles_src
).mark_line().encode(
    alt.X("value:Q").sort('-y').scale(),
    alt.Y("height:Q"),
    alt.Color("time_of_day_str:O").scale(scheme='rainbow'),
    alt.Column('time_category:O').sort(['morning', 'day', 'night']).title(None),
    alt.Row('date:O'),
    tooltip="time_of_day_str:O"
).properties(
    width = 150, height = 150
)

In [22]:
profiles_src = case_study_src[
    case_study_src.variable.isin([
        'w_h2o__2m_c',
        'w_h2o__3m_c',
        'w_h2o__5m_c',
        'w_h2o__10m_c',
        'w_h2o__15m_c',
        'w_h2o__20m_c',
    ])
].query("tower == 'c'")
profiles_src = profiles_src.set_index('time').groupby([
    pd.Grouper(freq='120Min'),
    'date',
    'variable',
    'height'
])[['value']].mean()
profiles_src = profiles_src.reset_index()
profiles_src.loc[:, 'time_of_day_str'] = profiles_src['time'].dt.strftime('%H%M')
profiles_src['time_category'] = pd.cut(
    profiles_src['time'].dt.hour,
    [-0.5, 6.5, 14.5, 23.5],
    labels = ['morning (0000-0600)', 'day (0700-1500)', 'night (1600-2300)']
)

In [23]:
alt.Chart(
    profiles_src
).mark_line().encode(
    alt.X("value:Q").sort('-y').scale(domain=[-0.1, 0.1]),
    alt.Y("height:Q"),
    alt.Color("time_of_day_str:O").scale(scheme='rainbow'),
    alt.Column('time_category:O').sort(['morning', 'day', 'night']).title(None),
    alt.Row('date:O'),
    tooltip="time_of_day_str:O"
).properties(
    width = 150, height = 150
)

# Plot sos vertical wind profiles + doppler vertical wind profiles

In [24]:
dl_df = pd.read_parquet("/Users/elischwat/Development/data/sublimationofsnow/sail_processed/gucdlrhiM1.b1/").set_index([
    'z_binned',	'x_offset',	'scan_time'
])['streamwise_velocity'].reset_index()

dl_df = dl_df[dl_df.scan_time >= "2023-02-09 0000"]

In [25]:
dl_windspeed_hourly = dl_df.query("x_offset == +1000").set_index('scan_time').groupby(
    [pd.Grouper(freq='60Min'), 'z_binned']
).mean()[['mean']].rename(columns={'mean': 'dl wind speed'})
dl_windspeed_hourly.index = dl_windspeed_hourly.index.rename({'z_binned': 'height', 'scan_time': 'time'})

In [26]:
kps_windspeed_hourly = tidy_df[tidy_df.variable.isin([
    'spd_1m_c',
    'spd_2m_c',
    'spd_3m_c',
    'spd_5m_c',
    'spd_10m_c',
    'spd_15m_c',
    'spd_20m_c', 
])][['time', 'height', 'value']].set_index('time').groupby(
    [pd.Grouper(freq='60Min'), 'height']
).mean()
kps_windspeed_hourly = kps_windspeed_hourly.rename(columns={'value': 'kps wind speed'})

In [27]:
combo_df = pd.concat([kps_windspeed_hourly, dl_windspeed_hourly])
combo_df = combo_df.reset_index().set_index('time').sort_index()

In [28]:
alt.Chart(
    combo_df['20230303': '20230303'].query("height < 600").reset_index()
).transform_fold(
    ['kps wind speed', 'dl wind speed']
).mark_circle().encode(
    alt.X("value:Q").sort('-y').scale(domain = [-6,6], clamp=True),
    alt.Y("height:Q").scale(type='log'),
    alt.Color("key:N"),
    alt.Facet("time:T", columns=6).header(format='%H%M').title("time of day")
).properties(width = 100, height = 100)