In [1]:
import numpy as np
import argparse
import pandas as pd
import glob
import datetime
import matplotlib.pyplot as plt
import act
import os
import sys
from tempfile import TemporaryDirectory
from matplotlib import patheffects as pe
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

In [2]:
USERNAME = os.getenv("ARM_USERNAME")
TOKEN = os.getenv("ARM_TOKEN")
SAIL_DATA_STREAM = 'gucdlrhiM1.b1'
SNR_THRESHOLD = 0.008
DATE_FORMAT = "%Y-%m-%d"
MAX_RANGE = 2000
date = '2023-02-27'
startdate = date
# also get data from the next day - UTC conversion
enddate = (
    datetime.datetime.strptime(date, DATE_FORMAT) + datetime.timedelta(hours=2)
).strftime(DATE_FORMAT)

# Download data

In [3]:
with TemporaryDirectory() as temp_dir:
    act.discovery.download_arm_data(USERNAME, TOKEN, SAIL_DATA_STREAM, startdate, enddate, output=temp_dir)
    print("Data download complete")
    dl_rhi_files = glob.glob(''.join([temp_dir, '/', SAIL_DATA_STREAM,'*cdf']))
    print(len(dl_rhi_files))
    print("Opening files")
    dl_rhi = act.io.arm.read_arm_netcdf(dl_rhi_files)
    print("Converting to dataframe")
    src_rhi = dl_rhi.to_dataframe().reset_index()

[DOWNLOADING] gucdlrhiM1.b1.20230227.001514.cdf
[DOWNLOADING] gucdlrhiM1.b1.20230227.023335.cdf
[DOWNLOADING] gucdlrhiM1.b1.20230227.024834.cdf
[DOWNLOADING] gucdlrhiM1.b1.20230227.030024.cdf
[DOWNLOADING] gucdlrhiM1.b1.20230227.031834.cdf
[DOWNLOADING] gucdlrhiM1.b1.20230227.033006.cdf
[DOWNLOADING] gucdlrhiM1.b1.20230227.034834.cdf
[DOWNLOADING] gucdlrhiM1.b1.20230227.041833.cdf
[DOWNLOADING] gucdlrhiM1.b1.20230227.043335.cdf
[DOWNLOADING] gucdlrhiM1.b1.20230227.050023.cdf
[DOWNLOADING] gucdlrhiM1.b1.20230227.050341.cdf
[DOWNLOADING] gucdlrhiM1.b1.20230227.051513.cdf
[DOWNLOADING] gucdlrhiM1.b1.20230227.053335.cdf
[DOWNLOADING] gucdlrhiM1.b1.20230227.090024.cdf
[DOWNLOADING] gucdlrhiM1.b1.20230227.090341.cdf
[DOWNLOADING] gucdlrhiM1.b1.20230227.091513.cdf
[DOWNLOADING] gucdlrhiM1.b1.20230227.091833.cdf
[DOWNLOADING] gucdlrhiM1.b1.20230227.100024.cdf
[DOWNLOADING] gucdlrhiM1.b1.20230227.100341.cdf
[DOWNLOADING] gucdlrhiM1.b1.20230227.104834.cdf
[DOWNLOADING] gucdlrhiM1.b1.20230227.110

# Preprocess data

In [4]:
# Convert time zone
src_rhi['time'] = src_rhi['time'].dt.tz_localize('UTC').dt.tz_convert('US/Mountain')
src_rhi['time'] = pd.to_datetime(src_rhi['time'].dt.tz_localize(None))
# Shrink dataset by removing extra data
src_rhi = src_rhi.query(f"range < {MAX_RANGE}")
# Shrink dataset by isolating data to target day in local time
src_rhi = src_rhi[src_rhi['time'].dt.day == datetime.datetime.strptime(startdate, DATE_FORMAT).day]
# Filter with SNR
src_rhi['SNR'] = src_rhi['intensity'] - 1
src_rhi.loc[src_rhi.eval(f'SNR < {SNR_THRESHOLD}'), 'radial_velocity'] = np.nan
# Remove extraneous scan data
src_rhi = src_rhi.query("elevation != -0.01")
# Add useful columns
src_rhi['date'] = src_rhi['time'].dt.date
src_rhi['hour'] = src_rhi['time'].dt.hour
src_rhi['minute'] = src_rhi['time'].dt.minute
src_rhi['second'] = src_rhi['time'].dt.second
src_rhi['time_beginning_of_hour'] = src_rhi['time'].apply(lambda dt: dt.replace(minute=0, second=0, microsecond=0))
# RHI: convert polar coordinates to rectangular coords with the radar at (0,0)
src_rhi['x'] = src_rhi['range']*np.cos(np.deg2rad(src_rhi['elevation']))
src_rhi['z'] = src_rhi['range']*np.sin(np.deg2rad(src_rhi['elevation']))

# Separate cross vally and along valley scans

In [5]:
scan_azimuth_valley_wise = 149
scan_azimuth_valley_cross = 270

In [6]:
# Split dataset into valley-wise and cross-valley RHI scans
valley_rhi_df = src_rhi[np.abs(src_rhi['azimuth'] - scan_azimuth_valley_wise) < 1]
xvalley_rhi_df = src_rhi[np.abs(src_rhi['azimuth'] - scan_azimuth_valley_cross) < 1]

# Label the 4 cross-valley scans that happen each hour
# We do this by defining the "hourly seconds" (second for a data point where 0 seconds is 
# at the beginning of the hour)
# and saying that all data from after 913 (after 15:13 mm:ss) and before 1113 (before 18:13 
# mm:ss) is the first scan, and so on - this may be imperfect
xvalley_rhi_df['hourly_seconds'] = xvalley_rhi_df.apply(lambda row: row['minute']*60 + row['second'], axis=1)
xvalley_rhi_df['hourly_scan_n'] = pd.cut(
    xvalley_rhi_df['hourly_seconds'],
    [913, 1113, 2713, 2913, 3599],
    labels=['15.00','18.00','45.00','48.00']
)

# Label the 4 valley-wise scans that happen each hour
# similarly to above
valley_rhi_df['hourly_seconds'] = valley_rhi_df.apply(lambda row: row['minute']*60 + row['second'], axis=1)
valley_rhi_df['hourly_scan_n'] = pd.cut(
    valley_rhi_df['hourly_seconds'],
    [22, 214, 1804, 2014, 3599],
    labels=['00.00','03.00','30.00','33.00']
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  xvalley_rhi_df['hourly_seconds'] = xvalley_rhi_df.apply(lambda row: row['minute']*60 + row['second'], axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  xvalley_rhi_df['hourly_scan_n'] = pd.cut(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  valley_rhi_df['hourly_seconds'] = valley_rhi_df.apply

# Extract profile at given upvalley/downvalley distances

In [7]:
import datetime as dt
import altair as alt
alt.data_transformers.enable('json')

DataTransformerRegistry.enable('json')

In [17]:
src = valley_rhi_df[np.abs(valley_rhi_df['x'] + 1000) < 100]
src['scan_time'] = src.apply(lambda row : row['time_beginning_of_hour']+ dt.timedelta(minutes = float(row['hourly_scan_n'])), axis=1)
src = src[['scan_time', 'radial_velocity', 'z', 'x']]
src['z_binned'] = pd.cut(src.z, bins=np.linspace(0,2000,41)).apply(lambda bin: (bin.left + bin.right)/2)
src = src.groupby(["scan_time", 'z_binned']).median().reset_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  src['scan_time'] = src.apply(lambda row : row['time_beginning_of_hour']+ dt.timedelta(minutes = float(row['hourly_scan_n'])), axis=1)
  src = src.groupby(["scan_time", 'z_binned']).median().reset_index()


In [22]:
# rule = alt.Chart().transform_calculate(rule="0").mark_rule().encode(x='rule:Q')
alt.Chart(
    src[
        (src.scan_time.dt.hour >= 0)
        &
        (src.scan_time.dt.hour < 13)
    ].query("z <= 500").query("z >= 30")
).mark_line().encode(
    alt.X("radial_velocity:Q").sort('-y').scale(domain=[-12,12], clamp=True),
    alt.Y("z_binned:Q"),
    alt.Facet("scan_time:T", columns=10).header(format='%H:%M')
).properties(width = 100, height = 100)