# Overview
This notebook examines SOS Kettle Ponds surface data for a number of case study dates. It provides contextual and surface information for the doppler lidar data analyzed in other notebooks.

In [1]:
import pandas as pd
import numpy as np
import altair as alt
alt.data_transformers.enable('json')

from sublimpy import turbulence, tidy, utils
import matplotlib.pyplot as plt
import metpy.constants
import datetime as dt
import pytz

# Open data

## Open SOS Measurement Dataset

In [4]:
ls ../ | grep parquet

In [2]:
start_date = '20221130'
end_date = '20230619'
# tidy_data_fn = f'../paper1/tidy_df_{start_date}_{end_date}_noplanar_fit_clean.parquet'
tidy_data_fn = f'../paper1/tidy_df_{start_date}_{end_date}_noplanar_fit.parquet'
# open files
tidy_df = pd.read_parquet(tidy_data_fn)
# convert time column to datetime
tidy_df['time'] = pd.to_datetime(tidy_df['time'])
# limit data to our dates of interest, based on continuous snow cover at Kettle Ponds
tidy_df = tidy_df.set_index('time').sort_index().loc[start_date:end_date].reset_index()

tidy_df = utils.modify_df_timezone(tidy_df, pytz.UTC, 'US/Mountain')

FileNotFoundError: [Errno 2] No such file or directory: '../paper1/tidy_df_20221130_20230619_noplanar_fit.parquet'

In [None]:
case_study_src = tidy_df[tidy_df.time.dt.date.isin([
    dt.date(2023, 3, 3),
    dt.date(2023, 4, 16),
    dt.date(2023, 5, 2),
    dt.date(2023, 6, 11)
])]
case_study_src['date'] = case_study_src['time'].dt.date.astype('str')
case_study_src['time_no_date'] = case_study_src['time'].apply(lambda dt: dt.replace(
    year = 2023,
    month = 1,
    day = 1
))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  case_study_src['date'] = case_study_src['time'].dt.date.astype('str')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  case_study_src['time_no_date'] = case_study_src['time'].apply(lambda dt: dt.replace(


In [None]:
alt.Chart(
    case_study_src[
        case_study_src.variable == 'dir_10m_c'
    ]
).mark_line().encode(
    alt.X("time_no_date:T"),
    alt.Y("value:Q"),
    alt.Color("date:N")
).properties(width = 500)

In [5]:
import pysolar.solar

# Gothic, CO
latitude_deg = 38.965703
longitude_deg = -106.993591

# Seattle
# latitude_deg = 47.619781
# longitude_deg = -122.298188

In [7]:
ideal_rad_df = pd.concat([
    pd.DataFrame({
        'time': list(case_study_src.query("date == '2023-04-16'").time.unique()),
        'rad': [
            pysolar.solar.radiation.get_radiation_direct(
                date.tz_localize('US/Mountain'), 
                pysolar.solar.get_altitude(latitude_deg, longitude_deg, date.tz_localize('US/Mountain'))
            )
            for date in list(case_study_src.query("date == '2023-04-16'").time.unique())
        ]
    }),
    pd.DataFrame({
        'time': list(case_study_src.query("date == '2023-06-11'").time.unique()),
        'rad': [
            pysolar.solar.radiation.get_radiation_direct(
                date.tz_localize('US/Mountain'), 
                pysolar.solar.get_altitude(latitude_deg, longitude_deg, date.tz_localize('US/Mountain'))
            )
            for date in list(case_study_src.query("date == '2023-06-11'").time.unique())
        ]
    })
])
ideal_rad_df['date'] = ideal_rad_df['time'].dt.date.astype('str')
ideal_rad_df['time_no_date'] = ideal_rad_df['time'].apply(lambda dt: dt.replace(
    year = 2023,
    month = 1,
    day = 1
))
alt.Chart(
    ideal_rad_df
).mark_line().encode(
    alt.X("time_no_date:T"),
    alt.Y("rad:Q"),
    alt.Color("date:N")
).properties(width = 500)

In [8]:
alt.Chart(
    case_study_src[
        case_study_src.variable == 'Rsw_in_9m_d'
    ]
).mark_line().encode(
    alt.X("time_no_date:T"),
    alt.Y("value:Q"),
    alt.Color("date:N")
).properties(width = 500)

In [9]:
alt.Chart(
    case_study_src[
        case_study_src.variable.isin(['Ri_3m_c', 'temp_gradient_3m_c']) 
    ]
).mark_line().encode(
    alt.X("time_no_date:T"),
    alt.Y("value:Q"),
    alt.Row("measurement:N"),
    alt.Color("date:N")
).properties(width = 500)

In [28]:
profiles_src = case_study_src[
    case_study_src.measurement.isin(['potential temperature', 'surface potential temperature'])
].query("tower == 'c'")
profiles_src = profiles_src.set_index('time').groupby([
    pd.Grouper(freq='120Min'),
    'date',
    'variable',
    'height'
])[['value']].mean()
profiles_src = profiles_src.reset_index()
profiles_src.loc[:, 'time_of_day_str'] = profiles_src['time'].dt.strftime('%H%M')
profiles_src['time_category'] = pd.cut(
    profiles_src['time'].dt.hour,
    [-0.5, 6.5, 14.5, 23.5],
    labels = ['morning (0000-0600)', 'day (0700-1500)', 'night (1600-2300)']
)

In [27]:
alt.Chart(
    profiles_src
).mark_line().encode(
    alt.X("value:Q").sort('-y'),
    alt.Y("height:Q"),
    alt.Color("time_of_day_str:O").scale(scheme='rainbow'),
    alt.Column('time_category:O').sort(['morning', 'day', 'night']).title(None),
    alt.Row('date:O'),
    tooltip="time_of_day_str:O"
).properties(
    width = 150, height = 150
)

In [29]:
profiles_src = case_study_src[
    case_study_src.measurement.isin(['wind speed'])
].query("tower == 'c'")
profiles_src = profiles_src.set_index('time').groupby([
    pd.Grouper(freq='120Min'),
    'date',
    'variable',
    'height'
])[['value']].mean()
profiles_src = profiles_src.reset_index()
profiles_src.loc[:, 'time_of_day_str'] = profiles_src['time'].dt.strftime('%H%M')
profiles_src['time_category'] = pd.cut(
    profiles_src['time'].dt.hour,
    [-0.5, 6.5, 14.5, 23.5],
    labels = ['morning (0000-0600)', 'day (0700-1500)', 'night (1600-2300)']
)

In [30]:
alt.Chart(
    profiles_src
).mark_line().encode(
    alt.X("value:Q").sort('-y'),
    alt.Y("height:Q"),
    alt.Color("time_of_day_str:O").scale(scheme='rainbow'),
    alt.Column('time_category:O').sort(['morning', 'day', 'night']).title(None),
    alt.Row('date:O'),
    tooltip="time_of_day_str:O"
).properties(
    width = 150, height = 150
)

In [31]:
profiles_src = case_study_src[
    case_study_src.measurement.isin(['wind direction'])
].query("tower == 'c'")
profiles_src = profiles_src.set_index('time').groupby([
    pd.Grouper(freq='120Min'),
    'date',
    'variable',
    'height'
])[['value']].mean()
profiles_src = profiles_src.reset_index()
profiles_src.loc[:, 'time_of_day_str'] = profiles_src['time'].dt.strftime('%H%M')
profiles_src['time_category'] = pd.cut(
    profiles_src['time'].dt.hour,
    [-0.5, 6.5, 14.5, 23.5],
    labels = ['morning (0000-0600)', 'day (0700-1500)', 'night (1600-2300)']
)

In [32]:
alt.Chart(
    profiles_src
).mark_line().encode(
    alt.X("value:Q").sort('-y'),
    alt.Y("height:Q"),
    alt.Color("time_of_day_str:O").scale(scheme='rainbow'),
    alt.Column('time_category:O').sort(['morning', 'day', 'night']).title(None),
    alt.Row('date:O'),
    tooltip="time_of_day_str:O"
).properties(
    width = 150, height = 150
)