In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 100)
import glob
import act
import matplotlib.pyplot as plt
import xarray as xr
import scipy.signal
import datetime as dt
import scipy
import altair as alt
import swifter

# User Inputs

In [2]:
# This data was put here and cleaned using the `doppler_lidar_mass_download.py` script
# input_directory = '/data2/elilouis/sublimationofsnow/gucdlfptM1.b1/winter/downsampled'
# input_directory = '/data2/elilouis/sublimationofsnow/gucdlfptM1.b1/spring/downsampled'
input_directory = '/data2/elilouis/sublimationofsnow/gucdlfptM1.b1/alldatatodate/downsampled/'

In [None]:
dl_fpt_files = glob.glob(os.path.join(input_directory, '*/*.cdf'), recursive=True)

In [None]:
dl_fpt = act.io.armfiles.read_netcdf(dl_fpt_files)

In [None]:
dl_fpt

In [None]:
dl_fpt_plotting = dl_fpt.resample(time='240Min').mean()


display = act.plotting.TimeSeriesDisplay(
    {'Gothic Doppler LiDAR': dl_fpt_plotting},
    subplot_shape=(1,),
    figsize=(10, 5),
)

display.plot(
    'radial_velocity',
    dsname='Gothic Doppler LiDAR',
    cmap='gist_ncar',
    vmin=-3,
    vmax=3,
    subplot_index=(0,)
)


In [None]:
# dl_fpt_plotting = dl_fpt.sel(time=slice('2022-01-01', '2022-01-31'))
dl_fpt_plotting = dl_fpt.sel(time=slice('2022-05-01', '2022-05-31'))
dl_fpt_plotting = dl_fpt_plotting.resample(time='60Min').mean()

display = act.plotting.TimeSeriesDisplay(
    {'Gothic Doppler LiDAR': dl_fpt_plotting},
    subplot_shape=(1,),
    figsize=(10, 5),
)

display.plot(
    'radial_velocity',
    dsname='Gothic Doppler LiDAR',
    cmap='gist_ncar',
    vmin=-3,
    vmax=3,
    subplot_index=(0,)
)


In [None]:
# dl_fpt_plotting = dl_fpt.sel(time=slice('2022-01-04', '2022-01-04'))
dl_fpt_plotting = dl_fpt.sel(time=slice('2022-05-05', '2022-05-05'))
dl_fpt_plotting = dl_fpt_plotting.resample(time='1Min').mean()

display = act.plotting.TimeSeriesDisplay(
    {'Gothic Doppler LiDAR': dl_fpt_plotting},
    subplot_shape=(1,),
    figsize=(10, 5),
)

display.plot(
    'radial_velocity',
    dsname='Gothic Doppler LiDAR',
    cmap='gist_ncar',
    vmin=-3,
    vmax=3,
    subplot_index=(0,)
)


# Create DF

In [None]:
src_fpt = dl_fpt.to_dataframe().reset_index()

# Save the DF to save time on future runs

In [None]:
src_fpt

In [None]:
src_fpt.to_pickle('vertical_staring.pkl')

In [3]:
src_fpt = pd.read_pickle('vertical_staring.pkl')

In [5]:
len(src_fpt['time'].dt.date.unique())

500

Make power spectral density functions of the wind speed measured by the vertical stares for:

1. All of the SAIL data to date
2. All data from last winter
3. All data from last spring

Let's see what the power spectral density function looks like by time period ranging from **twice the frequency of the vertical stares that the DL collects** out to a **period of one month**  

There should be a few peaks at daily and sub-daily intervals, Hopefully things at sub-hourly periods are not impacted. Hopefully there aren't too many gaps, because FFTs don't work with gaps.  I trust your judgement for something simple to fill in gaps.  You may consider doing this (https://stackoverflow.com/questions/23083649/fourier-transformation-with-missing-values).


Notes:
* ignore anything above 3km
* break time series into pieces 

# Bin the range/elevation values

In [18]:
src_fpt['range_group'] = pd.cut(
    src_fpt['range'],
    [0, 400, 800, 1200, 1600, 2000, 2400, 2800, 3200, 3600, 4000],
    labels=[200, 600, 1000, 1400, 1800, 2200, 2600, 3000, 3400, 3800]
)

In [19]:
src_fpt_range_grouped = src_fpt[['time', 'radial_velocity', 'range_group']].groupby(['time', 'range_group']).mean().reset_index()

# Create "modeled" dataset representing the proposed loss of data (for other scans)

In [20]:
src_fpt_range_grouped['minutes'] = src_fpt_range_grouped['time'].dt.minute

src_fpt_range_grouped_limited_data = src_fpt_range_grouped[~src_fpt_range_grouped['minutes'].isin([
    11,12,13,14,15,
    26,27,28,29,30,
    41,42,43,44,45,
    56,57,58,59,0
])]

In [21]:
src_fpt_range_grouped_limited_data

Unnamed: 0,time,range_group,radial_velocity,minutes
0,2021-09-01 00:01:00,200,0.270485,1
1,2021-09-01 00:01:00,600,1.203943,1
2,2021-09-01 00:01:00,1000,0.637992,1
3,2021-09-01 00:01:00,1400,-0.517454,1
4,2021-09-01 00:01:00,1800,-0.808086,1
...,...,...,...,...
6563995,2023-01-15 19:55:00,2200,1.210055,55
6563996,2023-01-15 19:55:00,2600,0.530030,55
6563997,2023-01-15 19:55:00,3000,1.860398,55
6563998,2023-01-15 19:55:00,3400,1.398165,55


# Calculate vertical profiles of velocity statistics

In [22]:
variance_all_data = pd.DataFrame(src_fpt_range_grouped.groupby('range_group')['radial_velocity'].apply(np.var)).reset_index().rename(columns={'radial_velocity': 'value'})
variance_all_data['statistic'] = 'variance'
variance_all_data['mode'] = 'more data'
skew_all_data = pd.DataFrame(src_fpt_range_grouped.groupby('range_group')['radial_velocity'].apply(scipy.stats.skew)).reset_index().rename(columns={'radial_velocity': 'value'})
skew_all_data['statistic'] = 'skew'
skew_all_data['mode'] = 'more data'
kurt_all_data = pd.DataFrame(src_fpt_range_grouped.groupby('range_group')['radial_velocity'].apply(scipy.stats.kurtosis)).reset_index().rename(columns={'radial_velocity': 'value'})
kurt_all_data['statistic'] = 'kurtosis'
kurt_all_data['mode'] = 'more data'

variance_less_data = pd.DataFrame(src_fpt_range_grouped_limited_data.groupby('range_group')['radial_velocity'].apply(np.var)).reset_index().rename(columns={'radial_velocity': 'value'})
variance_less_data['statistic'] = 'variance'
variance_less_data['mode'] = 'less data'
skew_less_data = pd.DataFrame(src_fpt_range_grouped_limited_data.groupby('range_group')['radial_velocity'].apply(scipy.stats.skew)).reset_index().rename(columns={'radial_velocity': 'value'})
skew_less_data['statistic'] = 'skew'
skew_less_data['mode'] = 'less data'
kurt_less_data = pd.DataFrame(src_fpt_range_grouped_limited_data.groupby('range_group')['radial_velocity'].apply(scipy.stats.kurtosis)).reset_index().rename(columns={'radial_velocity': 'value'})
kurt_less_data['statistic'] = 'kurtosis'
kurt_less_data['mode'] = 'less data'

statistics = pd.concat([variance_all_data, skew_all_data, kurt_all_data, variance_less_data, skew_less_data, kurt_less_data])

In [24]:
alt.Chart(statistics).mark_line().encode(
    alt.X('value:Q', sort='-y'),
    alt.Y('range_group:Q', title='Range (m)'),
    alt.Color('mode:N')
).properties(width=100, height=150).facet(
    column=alt.Column('statistic:N', sort=['variance', 'skew', 'kurtosis'])
).resolve_scale(x='independent').display(renderer='svg')

# Calculate power spectral density functions for each range bin

## Demean radial velocity, add epoch information

In [25]:
src_fpt_range_grouped['radial_velocity group mean'] = src_fpt_range_grouped.groupby('range_group')['radial_velocity'].transform('mean')
src_fpt_range_grouped['radial_velocity_demeaned'] = src_fpt_range_grouped['radial_velocity'] - src_fpt_range_grouped['radial_velocity group mean']
src_fpt_range_grouped['epoch'] = (src_fpt_range_grouped['time'] - dt.datetime(1970,1,1)).dt.total_seconds()

src_fpt_range_grouped_limited_data['radial_velocity group mean'] = src_fpt_range_grouped_limited_data.groupby('range_group')['radial_velocity'].transform('mean')
src_fpt_range_grouped_limited_data['radial_velocity_demeaned'] = src_fpt_range_grouped_limited_data['radial_velocity'] - src_fpt_range_grouped_limited_data['radial_velocity group mean']
src_fpt_range_grouped_limited_data['epoch'] = (src_fpt_range_grouped_limited_data['time'] - dt.datetime(1970,1,1)).dt.total_seconds()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  src_fpt_range_grouped_limited_data['radial_velocity group mean'] = src_fpt_range_grouped_limited_data.groupby('range_group')['radial_velocity'].transform('mean')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  src_fpt_range_grouped_limited_data['radial_velocity_demeaned'] = src_fpt_range_grouped_limited_data['radial_velocity'] - src_fpt_range_grouped_limited_data['radial_velocity group mean']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

## Calculate power spectrum for each range bin

In [26]:
total_measurement_time_seconds = (src_fpt_range_grouped['time'].max() - src_fpt_range_grouped['time'].min()).total_seconds()
half_sampling_rate_seconds = 120

power_spectrum_df = pd.DataFrame()
for range_group in src_fpt_range_grouped['range_group'].unique():
    src = src_fpt_range_grouped[src_fpt_range_grouped['range_group'] == range_group]
    src_less_data = src_fpt_range_grouped_limited_data[src_fpt_range_grouped_limited_data['range_group'] == range_group]
    freqs = np.linspace(1/total_measurement_time_seconds,1/half_sampling_rate_seconds, 500)
    pgram = scipy.signal.lombscargle(
        src[['epoch', 'radial_velocity_demeaned']].dropna()['epoch'],
        src[['epoch', 'radial_velocity_demeaned']].dropna()['radial_velocity_demeaned'],
        freqs
    )
    pgram_less_data = scipy.signal.lombscargle(
        src_less_data[['epoch', 'radial_velocity_demeaned']].dropna()['epoch'],
        src_less_data[['epoch', 'radial_velocity_demeaned']].dropna()['radial_velocity_demeaned'],
        freqs
    )
    src_df = pd.DataFrame({'freqs': freqs, 'pgram': pgram})
    src_df['mode'] = 'more data'
    src_df['range_group'] = range_group
    src_less_data_df = pd.DataFrame({'freqs': freqs, 'pgram': pgram_less_data})
    src_less_data_df['mode'] = 'less data'
    src_less_data_df['range_group'] = range_group
    power_spectrum_df = pd.concat([power_spectrum_df, src_df, src_less_data_df])

In [27]:
power_spectrum_df

Unnamed: 0,freqs,pgram,mode,range_group
0,2.306365e-08,2.966580,more data,200
1,1.672308e-05,85.603435,more data,200
2,3.342310e-05,77.697146,more data,200
3,5.012313e-05,11.078699,more data,200
4,6.682315e-05,5.315586,more data,200
...,...,...,...,...
495,8.266533e-03,0.241617,less data,3800
496,8.283233e-03,0.629783,less data,3800
497,8.299933e-03,1.019469,less data,3800
498,8.316633e-03,4.437347,less data,3800


In [28]:
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [29]:
alt.Chart(power_spectrum_df).mark_line(opacity=0.5).encode(
    alt.X('freqs:Q', scale=alt.Scale(type='log'), title='Frequency (hz)'),
    alt.Y('pgram:Q', scale=alt.Scale(type='log'), title='Power spectral density', axis=alt.Axis(grid=False)),
    alt.Color('mode'),
    alt.Facet('range_group:O', columns=3, title='Range (m)')
).properties(height=150, width=500)