# Time Series Analysis

### Loading Libraries

In [43]:
# ZipFiles & IO
import io
import zipfile

#URL
import urllib.request

# Time-Zone
import pytz

# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd
import polars as pl
import polars.selectors as cs

# Data Visualization
import hvplot
import seaborn as sns
import altair as alt
import matplotlib.pyplot as plt

# XGBoost
import xgboost as xgb

# Scikit-Learn
from sklearn import decomposition

# Date & Time
from datetime import datetime
from datetime import timedelta

ERROR! Session/line number was not unique in database. History logging moved to new session 12


In [44]:
hvplot.extension('matplotlib')

### Loading Dataset

#### URL Function

In [45]:
def download_and_modify_url(url, local_filename):
    urllib.request.urlretrieve(url, local_filename)
    
    with open(local_filename, 'r') as file:
        lines = file.readlines()
    
    with open(local_filename, 'w') as file:
        for i, line in enumerate(lines):
            if i < 34 or i == 35:
                continue
            file.write(line)

In [46]:
url = 'https://github.com/mattharrison/datasets/raw/master/data/dirtydevil.txt'

In [47]:
local_filename = '/Users/isisromero/desktop/polars/datasets/devilclean.txt'

In [48]:
download_and_modify_url(url, local_filename)

In [49]:
print(f"Saved & Modified File within {local_filename}")

Saved & Modified File within /Users/isisromero/desktop/polars/datasets/devilclean.txt


#### Tweaking Function

In [50]:
def tweak_river(df_):
    return (df_
            .select('agency_cd', 'site_no', 'tz_cd', 
                    pl.col('datetime').str.to_datetime(),
                    cfs=pl.col('144166_00060'),
                    gage_height=pl.col('144167_00065').cast(pl.Float64)
                   )
           )

In [51]:
raw = pl.read_csv('/Users/isisromero/desktop/polars/datasets/devilclean.txt', separator='\t')

In [52]:
dd = tweak_river(raw)

In [53]:
print(dd)

shape: (539_305, 6)
┌───────────┬─────────┬───────┬─────────────────────┬──────┬─────────────┐
│ agency_cd ┆ site_no ┆ tz_cd ┆ datetime            ┆ cfs  ┆ gage_height │
│ ---       ┆ ---     ┆ ---   ┆ ---                 ┆ ---  ┆ ---         │
│ str       ┆ i64     ┆ str   ┆ datetime[μs]        ┆ f64  ┆ f64         │
╞═══════════╪═════════╪═══════╪═════════════════════╪══════╪═════════════╡
│ USGS      ┆ 9333500 ┆ MDT   ┆ 2001-05-07 01:00:00 ┆ 71.0 ┆ null        │
│ USGS      ┆ 9333500 ┆ MDT   ┆ 2001-05-07 01:15:00 ┆ 71.0 ┆ null        │
│ USGS      ┆ 9333500 ┆ MDT   ┆ 2001-05-07 01:30:00 ┆ 71.0 ┆ null        │
│ USGS      ┆ 9333500 ┆ MDT   ┆ 2001-05-07 01:45:00 ┆ 70.0 ┆ null        │
│ USGS      ┆ 9333500 ┆ MDT   ┆ 2001-05-07 02:00:00 ┆ 70.0 ┆ null        │
│ …         ┆ …       ┆ …     ┆ …                   ┆ …    ┆ …           │
│ USGS      ┆ 9333500 ┆ MDT   ┆ 2020-09-28 08:30:00 ┆ 9.53 ┆ 6.16        │
│ USGS      ┆ 9333500 ┆ MDT   ┆ 2020-09-28 08:45:00 ┆ 9.2  ┆ 6.15        │
│ USG

### Dates Convertion

In [54]:
# Reading as a Date
print(pl.read_csv('/Users/isisromero/desktop/polars/datasets/devilclean.txt', separator='\t', try_parse_dates=True))

shape: (539_305, 8)
┌───────────┬─────────┬─────────────┬───────┬─────────────┬─────────────┬─────────────┬────────────┐
│ agency_cd ┆ site_no ┆ datetime    ┆ tz_cd ┆ 144166_0006 ┆ 144166_0006 ┆ 144167_0006 ┆ 144167_000 │
│ ---       ┆ ---     ┆ ---         ┆ ---   ┆ 0           ┆ 0_cd        ┆ 5           ┆ 65_cd      │
│ str       ┆ i64     ┆ datetime[μs ┆ str   ┆ ---         ┆ ---         ┆ ---         ┆ ---        │
│           ┆         ┆ ]           ┆       ┆ f64         ┆ str         ┆ str         ┆ str        │
╞═══════════╪═════════╪═════════════╪═══════╪═════════════╪═════════════╪═════════════╪════════════╡
│ USGS      ┆ 9333500 ┆ 2001-05-07  ┆ MDT   ┆ 71.0        ┆ A:[91]      ┆ null        ┆ null       │
│           ┆         ┆ 01:00:00    ┆       ┆             ┆             ┆             ┆            │
│ USGS      ┆ 9333500 ┆ 2001-05-07  ┆ MDT   ┆ 71.0        ┆ A:[91]      ┆ null        ┆ null       │
│           ┆         ┆ 01:15:00    ┆       ┆             ┆            

In [55]:
# Dates Proper Conversion, as follows:
format = '%Y-%m-%d %H:%M'

In [56]:
# Contrasting All Conversion
print(raw
      .select(original=pl.col('datetime'),
              to_datetime=pl.col('datetime').str.to_datetime(format),
              to_date=pl.col('datetime').str.to_date(format),
              strptime=pl.col('datetime').str.strptime(pl.Datetime, format),
              # Line below fails
              #cast=pl.col('datetime').cast(pl.Datetime)
             )
     )

shape: (539_305, 4)
┌──────────────────┬─────────────────────┬────────────┬─────────────────────┐
│ original         ┆ to_datetime         ┆ to_date    ┆ strptime            │
│ ---              ┆ ---                 ┆ ---        ┆ ---                 │
│ str              ┆ datetime[μs]        ┆ date       ┆ datetime[μs]        │
╞══════════════════╪═════════════════════╪════════════╪═════════════════════╡
│ 2001-05-07 01:00 ┆ 2001-05-07 01:00:00 ┆ 2001-05-07 ┆ 2001-05-07 01:00:00 │
│ 2001-05-07 01:15 ┆ 2001-05-07 01:15:00 ┆ 2001-05-07 ┆ 2001-05-07 01:15:00 │
│ 2001-05-07 01:30 ┆ 2001-05-07 01:30:00 ┆ 2001-05-07 ┆ 2001-05-07 01:30:00 │
│ 2001-05-07 01:45 ┆ 2001-05-07 01:45:00 ┆ 2001-05-07 ┆ 2001-05-07 01:45:00 │
│ 2001-05-07 02:00 ┆ 2001-05-07 02:00:00 ┆ 2001-05-07 ┆ 2001-05-07 02:00:00 │
│ …                ┆ …                   ┆ …          ┆ …                   │
│ 2020-09-28 08:30 ┆ 2020-09-28 08:30:00 ┆ 2020-09-28 ┆ 2020-09-28 08:30:00 │
│ 2020-09-28 08:45 ┆ 2020-09-28 08:45:00 ┆ 2

### Combining Columns to Create

In [57]:
print(raw
      .select(to_datetime=pl.col('datetime').str.to_datetime(format))
      .with_columns(month=pl.col('to_datetime').dt.strftime('%m'),
                    year=pl.col('to_datetime').dt.strftime('%Y'))
     )

shape: (539_305, 3)
┌─────────────────────┬───────┬──────┐
│ to_datetime         ┆ month ┆ year │
│ ---                 ┆ ---   ┆ ---  │
│ datetime[μs]        ┆ str   ┆ str  │
╞═════════════════════╪═══════╪══════╡
│ 2001-05-07 01:00:00 ┆ 05    ┆ 2001 │
│ 2001-05-07 01:15:00 ┆ 05    ┆ 2001 │
│ 2001-05-07 01:30:00 ┆ 05    ┆ 2001 │
│ 2001-05-07 01:45:00 ┆ 05    ┆ 2001 │
│ 2001-05-07 02:00:00 ┆ 05    ┆ 2001 │
│ …                   ┆ …     ┆ …    │
│ 2020-09-28 08:30:00 ┆ 09    ┆ 2020 │
│ 2020-09-28 08:45:00 ┆ 09    ┆ 2020 │
│ 2020-09-28 09:00:00 ┆ 09    ┆ 2020 │
│ 2020-09-28 09:15:00 ┆ 09    ┆ 2020 │
│ 2020-09-28 09:30:00 ┆ 09    ┆ 2020 │
└─────────────────────┴───────┴──────┘


In [58]:
print(raw
      .select(to_datetime=pl.col('datetime').str.to_datetime(format))
      .with_columns(month=pl.col('to_datetime').dt.strftime('%m'),year=pl.col('to_datetime').dt.strftime('%Y'))
      .select(pl.date(pl.col('year'), pl.col('month'), 1))
     )

shape: (539_305, 1)
┌────────────┐
│ date       │
│ ---        │
│ date       │
╞════════════╡
│ 2001-05-01 │
│ 2001-05-01 │
│ 2001-05-01 │
│ 2001-05-01 │
│ 2001-05-01 │
│ …          │
│ 2020-09-01 │
│ 2020-09-01 │
│ 2020-09-01 │
│ 2020-09-01 │
│ 2020-09-01 │
└────────────┘


### Changing Time-Zones

In [59]:
# Inspecting Current Time
print(tweak_river(raw)
      ['datetime']
      .dtype.time_zone
     )

None


In [60]:
# Checking PyZone Attributes
pytz.all_timezones[:5]

['Africa/Abidjan',
 'Africa/Accra',
 'Africa/Addis_Ababa',
 'Africa/Algiers',
 'Africa/Asmara']

In [61]:
# Dates Proper Conversion, as follows:
format = '%Y-%m-%d %H:%M'

In [64]:
print(raw
      .select(original=pl.col('datetime'), naive=pl.col('datetime').str.to_datetime(format), 
              utc=pl.col('datetime').str.to_datetime(format)
              .dt.replace_time_zone('UTC'), Denver=pl.col('datetime').str.to_datetime(format)
              .dt.replace_time_zone('UTC')
              .dt.convert_time_zone('America/Denver'), Denver2=(pl.col('datetime') + ' ' + (pl.col('tz_cd')
                .str.replace('MST', '-0700').str.replace('MDT', '-0600')))
              .str.to_datetime('%Y-%m-%d %H:%M %z')
              .dt.convert_time_zone('America/Denver'), 
              Denver3=(pl.col('datetime').str.to_datetime(format, time_zone='America/Denver', ambiguous='earliest'))
             )
     )

shape: (539_305, 6)
┌────────────┬─────────────────┬────────────────┬────────────────┬────────────────┬────────────────┐
│ original   ┆ naive           ┆ utc            ┆ Denver         ┆ Denver2        ┆ Denver3        │
│ ---        ┆ ---             ┆ ---            ┆ ---            ┆ ---            ┆ ---            │
│ str        ┆ datetime[μs]    ┆ datetime[μs,   ┆ datetime[μs,   ┆ datetime[μs,   ┆ datetime[μs,   │
│            ┆                 ┆ UTC]           ┆ America/Denver ┆ America/Denver ┆ America/Denver │
│            ┆                 ┆                ┆ ]              ┆ ]              ┆ ]              │
╞════════════╪═════════════════╪════════════════╪════════════════╪════════════════╪════════════════╡
│ 2001-05-07 ┆ 2001-05-07      ┆ 2001-05-07     ┆ 2001-05-06     ┆ 2001-05-07     ┆ 2001-05-07     │
│ 01:00      ┆ 01:00:00        ┆ 01:00:00 UTC   ┆ 19:00:00 MDT   ┆ 01:00:00 MDT   ┆ 01:00:00 MDT   │
│ 2001-05-07 ┆ 2001-05-07      ┆ 2001-05-07     ┆ 2001-05-06     ┆ 2001

In [65]:
# Dates Proper Conversion, as follows:
format = '%Y-%m-%d %H:%M'

In [66]:
print(raw
      .select(
          Denver2=(pl.col('datetime') + ' ' + (pl.col('tz_cd')
                                               .str.replace('MST', '-0700').str.replace('MDT', '-0600')))
          .str.to_datetime('%Y-%m-%d %H:%M %z')
          .dt.convert_time_zone('America/Denver'),            
          Denver3=(pl.col('datetime').str.to_datetime(format, time_zone='America/Denver', ambiguous='latest'))
      )
      .filter(pl.col('Denver3') != pl.col('Denver2'))
     )

shape: (56, 2)
┌──────────────────────────────┬──────────────────────────────┐
│ Denver2                      ┆ Denver3                      │
│ ---                          ┆ ---                          │
│ datetime[μs, America/Denver] ┆ datetime[μs, America/Denver] │
╞══════════════════════════════╪══════════════════════════════╡
│ 2003-10-26 01:00:00 MDT      ┆ 2003-10-26 01:00:00 MST      │
│ 2003-10-26 01:15:00 MDT      ┆ 2003-10-26 01:15:00 MST      │
│ 2003-10-26 01:30:00 MDT      ┆ 2003-10-26 01:30:00 MST      │
│ 2003-10-26 01:45:00 MDT      ┆ 2003-10-26 01:45:00 MST      │
│ 2007-11-04 01:00:00 MDT      ┆ 2007-11-04 01:00:00 MST      │
│ …                            ┆ …                            │
│ 2018-11-04 01:45:00 MDT      ┆ 2018-11-04 01:45:00 MST      │
│ 2019-11-03 01:00:00 MDT      ┆ 2019-11-03 01:00:00 MST      │
│ 2019-11-03 01:15:00 MDT      ┆ 2019-11-03 01:15:00 MST      │
│ 2019-11-03 01:30:00 MDT      ┆ 2019-11-03 01:30:00 MST      │
│ 2019-11-03 01:45:00 MDT

#### Adding Time-Zone within The `tweak` Function

In [67]:
def tweak_river(df_, cfs_col, gage_height_col):
    return (df_
        .select(
            'agency_cd', 'site_no', 
            cfs=pl.col(cfs_col),
            gage_height=pl.col(gage_height_col).cast(pl.Float64),
            datetime=(pl.col('datetime') + ' ' + (pl.col('tz_cd')
                .str.replace('MST', '-0700').str.replace('MDT', '-0600')))
                .str.to_datetime('%Y-%m-%d %H:%M %z')
                .dt.convert_time_zone('America/Denver')
            )
        )

In [68]:
dd = tweak_river(raw, cfs_col='144166_00060', gage_height_col='144167_00065')

### Time Aggregation

In [69]:
print(dd
      .with_columns(year=pl.col('datetime').dt.year())
      .group_by('year')
      .agg(pl.col(pl.Float64).mean())
     )

shape: (20, 3)
┌──────┬────────────┬─────────────┐
│ year ┆ cfs        ┆ gage_height │
│ ---  ┆ ---        ┆ ---         │
│ i32  ┆ f64        ┆ f64         │
╞══════╪════════════╪═════════════╡
│ 2017 ┆ 82.364265  ┆ 5.581915    │
│ 2002 ┆ 116.211979 ┆ NaN         │
│ 2001 ┆ 74.304452  ┆ NaN         │
│ 2007 ┆ 95.310375  ┆ 3.641389    │
│ 2019 ┆ 92.886454  ┆ 6.670158    │
│ …    ┆ …          ┆ …           │
│ 2013 ┆ 152.412803 ┆ 4.239074    │
│ 2003 ┆ 86.957083  ┆ NaN         │
│ 2009 ┆ 53.669322  ┆ 3.154503    │
│ 2012 ┆ 93.731029  ┆ 3.506091    │
│ 2014 ┆ 71.785238  ┆ 5.517218    │
└──────┴────────────┴─────────────┘


In [70]:
print(dd
      .with_columns(year=pl.col('datetime').dt.year())
      .group_by('year', maintain_order=True)
      .agg(pl.col(pl.Float64).mean())
     )

shape: (20, 3)
┌──────┬────────────┬─────────────┐
│ year ┆ cfs        ┆ gage_height │
│ ---  ┆ ---        ┆ ---         │
│ i32  ┆ f64        ┆ f64         │
╞══════╪════════════╪═════════════╡
│ 2001 ┆ 74.304452  ┆ NaN         │
│ 2002 ┆ 116.211979 ┆ NaN         │
│ 2003 ┆ 86.957083  ┆ NaN         │
│ 2004 ┆ 113.173465 ┆ NaN         │
│ 2005 ┆ 177.604843 ┆ NaN         │
│ …    ┆ …          ┆ …           │
│ 2016 ┆ 98.565988  ┆ 5.542945    │
│ 2017 ┆ 82.364265  ┆ 5.581915    │
│ 2018 ┆ 71.074642  ┆ 6.423744    │
│ 2019 ┆ 92.886454  ┆ 6.670158    │
│ 2020 ┆ 58.426221  ┆ 6.470503    │
└──────┴────────────┴─────────────┘


In [71]:
# Running Naïvely
(dd
 .group_by_dynamic(index_column='datetime', every='1y')
 .agg(pl.col(pl.Float64).mean())
)

InvalidOperationError: argument in operation 'group_by_dynamic' is not sorted, please sort the 'expr/series/column' first

In [72]:
print(dd
      .sort('datetime')  
      .group_by_dynamic(index_column='datetime', every='1y')
      .agg(pl.col(pl.Float64).mean())
     )

shape: (20, 3)
┌──────────────────────────────┬────────────┬─────────────┐
│ datetime                     ┆ cfs        ┆ gage_height │
│ ---                          ┆ ---        ┆ ---         │
│ datetime[μs, America/Denver] ┆ f64        ┆ f64         │
╞══════════════════════════════╪════════════╪═════════════╡
│ 2001-01-01 00:00:00 MST      ┆ 74.304452  ┆ null        │
│ 2002-01-01 00:00:00 MST      ┆ 116.211979 ┆ null        │
│ 2003-01-01 00:00:00 MST      ┆ 86.957083  ┆ null        │
│ 2004-01-01 00:00:00 MST      ┆ 113.173465 ┆ null        │
│ 2005-01-01 00:00:00 MST      ┆ 177.604843 ┆ null        │
│ …                            ┆ …          ┆ …           │
│ 2016-01-01 00:00:00 MST      ┆ 98.565988  ┆ 5.542945    │
│ 2017-01-01 00:00:00 MST      ┆ 82.364265  ┆ 5.581915    │
│ 2018-01-01 00:00:00 MST      ┆ 71.074642  ┆ 6.423744    │
│ 2019-01-01 00:00:00 MST      ┆ 92.886454  ┆ 6.670158    │
│ 2020-01-01 00:00:00 MST      ┆ 58.426221  ┆ 6.470503    │
└────────────────────────

In [73]:
print(dd
      .set_sorted('datetime')
      .group_by_dynamic(index_column='datetime', every='1y')
      .agg(pl.col(pl.Float64).mean())
     )

shape: (20, 3)
┌──────────────────────────────┬────────────┬─────────────┐
│ datetime                     ┆ cfs        ┆ gage_height │
│ ---                          ┆ ---        ┆ ---         │
│ datetime[μs, America/Denver] ┆ f64        ┆ f64         │
╞══════════════════════════════╪════════════╪═════════════╡
│ 2001-01-01 00:00:00 MST      ┆ 74.304452  ┆ null        │
│ 2002-01-01 00:00:00 MST      ┆ 116.211979 ┆ null        │
│ 2003-01-01 00:00:00 MST      ┆ 86.957083  ┆ null        │
│ 2004-01-01 00:00:00 MST      ┆ 113.173465 ┆ null        │
│ 2005-01-01 00:00:00 MST      ┆ 177.604843 ┆ null        │
│ …                            ┆ …          ┆ …           │
│ 2016-01-01 00:00:00 MST      ┆ 98.565988  ┆ 5.542945    │
│ 2017-01-01 00:00:00 MST      ┆ 82.364265  ┆ 5.581915    │
│ 2018-01-01 00:00:00 MST      ┆ 71.074642  ┆ 6.423744    │
│ 2019-01-01 00:00:00 MST      ┆ 92.886454  ┆ 6.670158    │
│ 2020-01-01 00:00:00 MST      ┆ 58.426221  ┆ 6.470503    │
└────────────────────────

### Time Intervals

In [75]:
# 2-Month Interval Period
print(dd
      .set_sorted('datetime')  
      .group_by_dynamic(index_column='datetime', every='2mo')
      .agg(pl.col(pl.Float64).mean())
     )

shape: (117, 3)
┌──────────────────────────────┬────────────┬─────────────┐
│ datetime                     ┆ cfs        ┆ gage_height │
│ ---                          ┆ ---        ┆ ---         │
│ datetime[μs, America/Denver] ┆ f64        ┆ f64         │
╞══════════════════════════════╪════════════╪═════════════╡
│ 2001-05-01 00:00:00 MDT      ┆ 41.999693  ┆ null        │
│ 2001-07-01 00:00:00 MDT      ┆ 165.864435 ┆ null        │
│ 2001-09-01 00:00:00 MDT      ┆ 43.783245  ┆ null        │
│ 2001-11-01 00:00:00 MST      ┆ 108.079392 ┆ null        │
│ 2002-01-01 00:00:00 MST      ┆ 132.790323 ┆ null        │
│ …                            ┆ …          ┆ …           │
│ 2020-01-01 00:00:00 MST      ┆ 125.290451 ┆ 6.925117    │
│ 2020-03-01 00:00:00 MST      ┆ 99.053008  ┆ 6.960658    │
│ 2020-05-01 00:00:00 MDT      ┆ 16.793101  ┆ 6.381607    │
│ 2020-07-01 00:00:00 MDT      ┆ 14.582522  ┆ 5.913772    │
│ 2020-09-01 00:00:00 MDT      ┆ 4.900842   ┆ 5.839076    │
└───────────────────────

In [76]:
# Every 3 Hours, 4 Minutes & 5 Seconds
print(dd
      .set_sorted('datetime')
      .group_by_dynamic(index_column='datetime', every='3h4m5s')
      .agg(pl.col(pl.Float64).mean())
     )

shape: (44_796, 3)
┌──────────────────────────────┬───────────┬─────────────┐
│ datetime                     ┆ cfs       ┆ gage_height │
│ ---                          ┆ ---       ┆ ---         │
│ datetime[μs, America/Denver] ┆ f64       ┆ f64         │
╞══════════════════════════════╪═══════════╪═════════════╡
│ 2001-05-06 23:03:20 MDT      ┆ 70.6      ┆ null        │
│ 2001-05-07 02:07:25 MDT      ┆ 67.916667 ┆ null        │
│ 2001-05-07 05:11:30 MDT      ┆ 64.461538 ┆ null        │
│ 2001-05-07 08:15:35 MDT      ┆ 65.333333 ┆ null        │
│ 2001-05-07 11:19:40 MDT      ┆ 73.333333 ┆ null        │
│ …                            ┆ …         ┆ …           │
│ 2020-09-27 20:17:10 MDT      ┆ 9.835833  ┆ 6.169167    │
│ 2020-09-27 23:21:15 MDT      ┆ 9.6675    ┆ 6.164167    │
│ 2020-09-28 02:25:20 MDT      ┆ 9.6675    ┆ 6.164167    │
│ 2020-09-28 05:29:25 MDT      ┆ 9.505385  ┆ 6.159231    │
│ 2020-09-28 08:33:30 MDT      ┆ 9.2       ┆ 6.15        │
└──────────────────────────────┴─────

##### Previos snippet: `check_sorted attributed has been removed`
```sh
print(dd
      .set_sorted('datetime')  
      .group_by_dynamic(index_column='datetime', every='7d', period='5d', start_by='monday', check_sorted=False)
      .agg(pl.col(pl.Float64).mean(),cfs_range=(pl.col('cfs').max() - pl.col('cfs').min()))
     )

In [79]:
print(dd
      .set_sorted('datetime')  
      .group_by_dynamic(index_column='datetime', every='7d', period='5d', start_by='monday')
      .agg(
          pl.col(pl.Float64).mean(),
          cfs_range=(pl.col('cfs').max() - pl.col('cfs').min())
      )
)

shape: (877, 4)
┌──────────────────────────────┬───────────┬─────────────┬───────────┐
│ datetime                     ┆ cfs       ┆ gage_height ┆ cfs_range │
│ ---                          ┆ ---       ┆ ---         ┆ ---       │
│ datetime[μs, America/Denver] ┆ f64       ┆ f64         ┆ f64       │
╞══════════════════════════════╪═══════════╪═════════════╪═══════════╡
│ 2001-05-07 00:00:00 MDT      ┆ 62.186975 ┆ null        ┆ 29.0      │
│ 2001-05-14 00:00:00 MDT      ┆ 50.529167 ┆ null        ┆ 20.0      │
│ 2001-05-21 00:00:00 MDT      ┆ 43.372917 ┆ null        ┆ 17.0      │
│ 2001-05-28 00:00:00 MDT      ┆ 36.9125   ┆ null        ┆ 27.0      │
│ 2001-06-04 00:00:00 MDT      ┆ 20.023438 ┆ null        ┆ 14.0      │
│ …                            ┆ …         ┆ …           ┆ …         │
│ 2020-08-31 00:00:00 MDT      ┆ 0.030521  ┆ 5.480208    ┆ 0.07      │
│ 2020-09-07 00:00:00 MDT      ┆ 0.006221  ┆ 5.48375     ┆ 0.02      │
│ 2020-09-14 00:00:00 MDT      ┆ 5.806146  ┆ 6.028563    ┆ 2.

In [81]:
# Weekend Flow
print(dd
      .set_sorted('datetime')
      .group_by_dynamic(index_column='datetime', every='7d', period='2d', start_by='saturday')
      .agg(pl.col(pl.Float64).mean(), cfs_range=(pl.col('cfs').max() - pl.col('cfs').min()))
     )

shape: (837, 4)
┌──────────────────────────────┬───────────┬─────────────┬───────────┐
│ datetime                     ┆ cfs       ┆ gage_height ┆ cfs_range │
│ ---                          ┆ ---       ┆ ---         ┆ ---       │
│ datetime[μs, America/Denver] ┆ f64       ┆ f64         ┆ f64       │
╞══════════════════════════════╪═══════════╪═════════════╪═══════════╡
│ 2001-05-12 00:00:00 MDT      ┆ 52.479167 ┆ null        ┆ 15.0      │
│ 2001-05-19 00:00:00 MDT      ┆ 49.177083 ┆ null        ┆ 18.0      │
│ 2001-05-26 00:00:00 MDT      ┆ 42.140625 ┆ null        ┆ 12.0      │
│ 2001-06-02 00:00:00 MDT      ┆ 25.578125 ┆ null        ┆ 9.0       │
│ 2001-06-30 00:00:00 MDT      ┆ 18.75     ┆ null        ┆ 6.0       │
│ …                            ┆ …         ┆ …           ┆ …         │
│ 2020-08-29 00:00:00 MDT      ┆ 0.05099   ┆ 5.473958    ┆ 0.06      │
│ 2020-09-05 00:00:00 MDT      ┆ 0.011912  ┆ 5.476875    ┆ 0.03      │
│ 2020-09-12 00:00:00 MDT      ┆ 5.930538  ┆ 6.003281    ┆ 3.

In [82]:
# Average Yearly Flow
print(dd
      .with_columns(year=pl.col('datetime').dt.year())
      .with_columns(year_mean_cfs=pl.col('cfs').mean().over('year'))
      .with_columns(pct_of_avg=(pl.col('cfs') / pl.col('year_mean_cfs')).mul(100).round(2))
     )

shape: (539_305, 8)
┌───────────┬─────────┬──────┬─────────────┬───────────────────┬──────┬───────────────┬────────────┐
│ agency_cd ┆ site_no ┆ cfs  ┆ gage_height ┆ datetime          ┆ year ┆ year_mean_cfs ┆ pct_of_avg │
│ ---       ┆ ---     ┆ ---  ┆ ---         ┆ ---               ┆ ---  ┆ ---           ┆ ---        │
│ str       ┆ i64     ┆ f64  ┆ f64         ┆ datetime[μs,      ┆ i32  ┆ f64           ┆ f64        │
│           ┆         ┆      ┆             ┆ America/Denver]   ┆      ┆               ┆            │
╞═══════════╪═════════╪══════╪═════════════╪═══════════════════╪══════╪═══════════════╪════════════╡
│ USGS      ┆ 9333500 ┆ 71.0 ┆ null        ┆ 2001-05-07        ┆ 2001 ┆ 74.304452     ┆ 95.55      │
│           ┆         ┆      ┆             ┆ 01:00:00 MDT      ┆      ┆               ┆            │
│ USGS      ┆ 9333500 ┆ 71.0 ┆ null        ┆ 2001-05-07        ┆ 2001 ┆ 74.304452     ┆ 95.55      │
│           ┆         ┆      ┆             ┆ 01:15:00 MDT      ┆      ┆

In [84]:
print(dd
      .with_columns(year=pl.col('datetime').dt.year(), quarter=pl.col('datetime').dt.quarter())
      .with_columns(pl.col(['cfs', 'gage_height']).mean().over('year').name.suffix('_mean_year'),
                    pl.col(['cfs', 'gage_height']).mean().over('quarter').name.suffix('_mean_quarter'))
     )

shape: (539_305, 11)
┌───────────┬─────────┬──────┬─────────────┬───┬────────────┬────────────┬────────────┬────────────┐
│ agency_cd ┆ site_no ┆ cfs  ┆ gage_height ┆ … ┆ cfs_mean_y ┆ gage_heigh ┆ cfs_mean_q ┆ gage_heigh │
│ ---       ┆ ---     ┆ ---  ┆ ---         ┆   ┆ ear        ┆ t_mean_yea ┆ uarter     ┆ t_mean_qua │
│ str       ┆ i64     ┆ f64  ┆ f64         ┆   ┆ ---        ┆ r          ┆ ---        ┆ rter       │
│           ┆         ┆      ┆             ┆   ┆ f64        ┆ ---        ┆ f64        ┆ ---        │
│           ┆         ┆      ┆             ┆   ┆            ┆ f64        ┆            ┆ f64        │
╞═══════════╪═════════╪══════╪═════════════╪═══╪════════════╪════════════╪════════════╪════════════╡
│ USGS      ┆ 9333500 ┆ 71.0 ┆ null        ┆ … ┆ 74.304452  ┆ null       ┆ 73.396286  ┆ 4.654955   │
│ USGS      ┆ 9333500 ┆ 71.0 ┆ null        ┆ … ┆ 74.304452  ┆ null       ┆ 73.396286  ┆ 4.654955   │
│ USGS      ┆ 9333500 ┆ 71.0 ┆ null        ┆ … ┆ 74.304452  ┆ null    

### Multiple Groupings with Time Series