In [1]:
# netcdf/numpy/xray/stats
import xarray as xr

# plotting packages
import seaborn as sns
from matplotlib.pyplot import subplots
import matplotlib.pyplot as plt
#from mpl_toolkits.basemap import Basemap


from sublimpy import tidy

from metpy.units import units
import metpy
import pint_xarray
import altair as alt
alt.data_transformers.enable('json')
import pytz

from sklearn.metrics import r2_score
import pandas as pd
import numpy as np
import datetime as dt

# Inputs

In [2]:
start_date = '20221130'
end_date = '20230509'

In [3]:
try:
    tidy_df_30Min = pd.read_parquet(f"../sos/tidy_df_30Min_{start_date}_{end_date}_noplanar_fit.parquet")
except FileNotFoundError:
    print("No file such file exists for these dates.")
tidy_df_30Min['time'] = pd.to_datetime(tidy_df_30Min['time'])

# Define case study dataframes

In [4]:
# Observation derived case studies
case_study_intervals_ls = [
    pd.Interval(pd.Timestamp(2023,1,31), pd.Timestamp(2023,2,4)),  # synoptically weak, clear skies and calm
    pd.Interval(pd.Timestamp(2023,2,19), pd.Timestamp(2023,2,23)), # synoptically active, blowing snow conditions
    pd.Interval(pd.Timestamp(2023,3,16), pd.Timestamp(2023,3,20)), # snyoptically weak, clear skies and calm
    # pd.Interval(pd.Timestamp(2023,3,20), pd.Timestamp(2023,3,24)) # synoptically active, some blowing snow conditions
]
days_from_ls = [30, 49, 74, 78]

# Methodologically derived case studies
# case_study_intervals_ls = [
#     pd.Interval(pd.Timestamp(2022,12,21), pd.Timestamp(2022,12,24)),
#     pd.Interval(pd.Timestamp(2023,2,19), pd.Timestamp(2023,2,22)),
#     pd.Interval(pd.Timestamp(2023,3,5), pd.Timestamp(2023,3,8)),
#     pd.Interval(pd.Timestamp(2023,4,2), pd.Timestamp(2023,4,5))
# ]
# days_from_ls = [-11, 49, 63, 91]

case_study_df_ls = []

for interval, days_from in zip(case_study_intervals_ls, days_from_ls):
    df = tidy_df_30Min[tidy_df_30Min['time'] < interval.right]
    df = df[df['time'] >= interval.left]
    df['time'] = df['time'] - dt.timedelta(days=days_from)
    case_study_df_ls.append(df)

## Calculate cumulative sublimation for each case study (Separately)

In [5]:
for i in range(0, len(case_study_df_ls)):
    for variable in [
        'w_h2o__3m_uw',
        'w_h2o__3m_ue',
        'w_h2o__3m_c',
        'w_h2o__3m_d'
    ]:

        case_study_df_ls[i] = tidy.tidy_df_add_variable(
            case_study_df_ls[i],
            np.cumsum(case_study_df_ls[i].query(f"variable == '{variable}'")['value']*60*30).values*1000/(1e6),
            "Cumulative sublimation (mm)",
            "Cumulative sublimation (mm)",
            int(variable.split('_')[-2].split('m')[0]),
            variable.split('_')[-1]
        )

In [6]:
for i in range(0, len(case_study_df_ls)):
    case_study_df_ls[i] = case_study_df_ls[i].assign(case_study = str(case_study_intervals_ls[i]))

## Combine case study datasets into one dataframe (for plotting)

In [7]:
case_study_compare_src = pd.concat(case_study_df_ls)

# Plot time series

In [19]:
base_nolabels_with_facet_header = alt.Chart(case_study_compare_src).encode(
    alt.X('time:T').axis(labels=False).title(None),
    alt.Column('case_study:O').header(labelFontSize=18).title(None)
).properties(width=400, height=100)

base_nolabels = alt.Chart(case_study_compare_src).encode(
    alt.X('time:T').axis(labels=False).title(None),
    alt.Column('case_study:O').header(labels=False).title(None)
).properties(width=400, height=100)

base = alt.Chart(case_study_compare_src).encode(
    alt.X('time:T').axis(format="%H00").title('time (hour)'),
    alt.Column('case_study:O').header(labels=False).title(None)
).properties(width=400, height=100)

(
    # CUMULATIVE SUBLIMATION PLOT
    base_nolabels_with_facet_header.transform_filter(
        alt.FieldOneOfPredicate('measurement', ['Cumulative sublimation (mm)'])
    ).transform_filter(
        alt.datum.height == 3
    ).transform_filter(
        alt.datum.tower == 'c'
    ).mark_line().encode(
        alt.Y("value:Q", title=['Cumulative', 'sublimation (mm)'], scale=alt.Scale(zero=False, nice=False))
    ) &
    # LATENT HEAT FLUX PLOT
    base_nolabels.transform_filter(
        alt.datum.variable == "w_h2o__3m_c"
    ).transform_window(
        rolling_mean = 'mean(value)',
        frame= [-6, 6],
        groupby = ["height"]
    ).mark_line().encode(
        alt.Y("rolling_mean:Q", title = ['Latent heat flux', '(g/m^2/s)'])
    ) & 
    # Richardson number plot
    base_nolabels.transform_filter(
        alt.FieldOneOfPredicate('variable', ['Ri_3m_c'])
    ).transform_window(
        rolling_avg = 'median(value)',
        frame=[-2,2],
        groupby = ["case_study"]
    ).mark_line().encode(
        alt.Y("rolling_avg:Q", title='Ri'),
    ) &
    # Surface and Air Temp Plot
    base_nolabels.transform_filter(
        alt.FieldOneOfPredicate('variable', ['T_3m_c', 'Tsurf_rad_d'])
    ).mark_line().encode(
        alt.Y("value:Q", title="Temperature (˚C)"),
        alt.StrokeDash("measurement:N")
    ) & 
    # blowing snow flux
    base_nolabels.transform_filter(
        alt.datum.measurement == 'snow flux'
    ).mark_line().encode(
        alt.Y("value:Q", title=['Blowing Snow', 'Flux (g/m^2)']),
        alt.Color("height:N")
    ) &
    # SW Incoming
    base.transform_filter(
        alt.datum.measurement == 'shortwave radiation incoming'
    ).mark_line().encode(
        alt.Y("value:Q", title=['SW Incoming', '(W/m^2)'])
    ) &
    # 
    base.transform_filter(
        alt.datum.measurement == 'RH'
    ).transform_filter(
        alt.FieldOneOfPredicate('height', [1,2,3,5,10,20])
    ).mark_line().encode(
        alt.Y("value:Q", title=['Relative Humidity (%)']),
        alt.Color("height:N")
    )
).resolve_scale(strokeDash='independent', color='independent').configure_axis(
    labelFontSize=14
)

In [9]:
alt.Chart(
    case_study_compare_src
).transform_filter(
    alt.FieldOneOfPredicate(
        "variable",
        ["w_h2o__3m_c", "u_h2o__3m_c", "v_h2o__3m_c"]
    )
).transform_window(
    rolling_mean = 'mean(value)',
    frame= [-6, 6],
    groupby = ["height"]
).mark_line().encode(
    alt.X('time:T').axis(format="%H00").title('time (hour)'),
    alt.Y("rolling_mean:Q", title = ['Latent heat flux', '(g/m^2/s)']),
    alt.Color("variable:N"),
    alt.Column('case_study:O').header(labels=False).title(None)
).properties(width=400, height=100)

# Plot Profiles

## With basic measurements

In [21]:
profiles_src = case_study_compare_src.copy()

In [29]:
profiles_src = profiles_src[profiles_src['time'].dt.minute == 0]
profiles_src = profiles_src[profiles_src['time'].dt.hour%4 == 0]
profiles_src['day'] = profiles_src['time'].dt.day
profiles_src['hour'] = profiles_src['time'].dt.hour

# profiles_src = profiles_src.query("hour <= 18")

t_profiles_src = profiles_src[profiles_src['variable'].isin([
    'Tsurf_c',
    'T_2m_c',
    'T_4m_c',
    'T_5m_c',
    'T_6m_c',
    'T_7m_c',
    'T_8m_c',
    'T_9m_c'
    'T_10m_c',
    'T_11m_c',
    'T_12m_c',
    'T_13m_c',
    'T_14m_c',
    'T_15m_c',
    'T_16m_c',
    'T_17m_c',
    'T_18m_c',
    'T_19m_c',
    'T_20m_c',
])]

spd_profiles_src = profiles_src[profiles_src['measurement'] == 'wind speed'].query("tower == 'c'")
q_profiles_src = profiles_src[profiles_src['measurement'] == 'mixing ratio'].query("tower == 'c'")

In [30]:
def profiles_chart(t_profiles_src, spd_profiles_src, q_profiles_src, title):
    return (
        alt.Chart(t_profiles_src).mark_line().encode(
            alt.X('value:Q').sort('-y').title("Temperature (˚C)").scale(domain=[-30, 5]),
            alt.Y("height:Q"),
            alt.Color("hour:N"),
            alt.Column('day')
        ).properties(height = 100, width = 150)
        &
        alt.Chart(spd_profiles_src).mark_line().encode(
            alt.X('value:Q').sort('-y').title('Wind speed (m/s)').scale(domain=[0,10]),
            alt.Y("height:Q"),
            alt.Color("hour:N"),
            alt.Column('day').header(None)
        ).properties(height = 100, width = 150)
        &
        alt.Chart(q_profiles_src).mark_line().encode(
            alt.X('value:Q').sort('-y').title('Mixing ratio (g/g)').scale(domain=[0, 0.006]),
            alt.Y("height:Q"),
            alt.Color("hour:N"),
            alt.Column('day').header(None)
        ).properties(height = 100, width = 150)
    ).properties(title=title)

In [31]:
(
    profiles_chart(
        t_profiles_src.query(f"case_study == '{case_study_compare_src.case_study.unique()[0]}'"), 
        spd_profiles_src.query(f"case_study == '{case_study_compare_src.case_study.unique()[0]}'"), 
        q_profiles_src.query(f"case_study == '{case_study_compare_src.case_study.unique()[0]}'"),
        "Case Study 1"
    ) | profiles_chart(
        t_profiles_src.query(f"case_study == '{case_study_compare_src.case_study.unique()[1]}'"), 
        spd_profiles_src.query(f"case_study == '{case_study_compare_src.case_study.unique()[1]}'"), 
        q_profiles_src.query(f"case_study == '{case_study_compare_src.case_study.unique()[1]}'"),
        "Case Study 2"
    ) | profiles_chart(
        t_profiles_src.query(f"case_study == '{case_study_compare_src.case_study.unique()[2]}'"), 
        spd_profiles_src.query(f"case_study == '{case_study_compare_src.case_study.unique()[2]}'"), 
        q_profiles_src.query(f"case_study == '{case_study_compare_src.case_study.unique()[2]}'"),
        "Case Study 3"
    )
).configure_axis(grid=False).configure_title(anchor='middle')

## With near-snow-surface temperatures

In [94]:
profiles_src = case_study_compare_src.query(f"case_study == '{case_study_compare_src.case_study.unique()[0]}'").copy()

In [95]:
alt.Chart(
    profiles_src.query("measurement == 'snow temperature'")
).mark_line().encode(
    alt.X("time:T"),
    alt.Y("value:Q"),
    alt.Color("height:Q"),
    alt.Facet("tower:O", columns=1),
    tooltip='height'
).properties(width = 800)


On tower d, use measurements from 1.1m and above. Snow is approximately 1m deep. Subtract 1m from all temperature measurements except surface

In [96]:
profiles_src = profiles_src[profiles_src['time'].dt.minute == 0]
profiles_src = profiles_src[profiles_src['time'].dt.hour%4 == 0]
profiles_src['day'] = profiles_src['time'].dt.day
profiles_src['hour'] = profiles_src['time'].dt.hour

t_profiles_src = profiles_src[profiles_src['variable'].isin([
    'Tsurf_d',
    'Tsnow_1_1m_d',
    'Tsnow_1_2m_d',
    'Tsnow_1_3m_d',
    'Tsnow_1_4m_d',
    'T_2m_c',
    'T_4m_c',
    'T_5m_c',
    'T_6m_c',
    'T_7m_c',
    'T_8m_c',
    'T_9m_c'
    'T_10m_c',
    'T_11m_c',
    'T_12m_c',
    'T_13m_c',
    'T_14m_c',
    'T_15m_c',
    'T_16m_c',
    'T_17m_c',
    'T_18m_c',
    'T_19m_c',
    'T_20m_c',
])]

t_profiles_src['height'] = t_profiles_src['height'].apply(lambda x: 0.005 if x == 0 else x - 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  t_profiles_src['height'] = t_profiles_src['height'].apply(lambda x: 0.005 if x == 0 else x - 1)


In [97]:
alt.Chart(t_profiles_src).mark_circle().encode(
    alt.X('value:Q').sort('-y').title("Temperature (˚C)"),
    alt.Y("height:Q").scale(type='log'),
    alt.Color("hour:N")
).properties(height = 150, width = 200).facet(column = 'day')