# Timeseries

`strptime` formats in Rust can be found here: https://docs.rs/chrono/latest/chrono/format/strftime/index.html

In [2]:
import polars as pl
from polars import col
from datetime import datetime
from dateutil.relativedelta import relativedelta

In [9]:
df = pl.read_csv('https://github.com/kyleconroy/apple-stock/blob/master/apple_stock_data.csv?raw=true', 
                 parse_dates=False)

In [10]:
df

Date,Open,High,Low,Close,Volume,Adj Close
str,f64,f64,f64,f64,i64,f64
"""2012-03-30""",608.77,610.56,597.94,599.55,26050900,599.55
"""2012-03-29""",612.78,616.56,607.23,609.86,21668300,609.86
"""2012-03-28""",618.38,621.45,610.31,617.62,23385200,617.62
"""2012-03-27""",606.18,616.28,606.06,614.48,21628200,614.48
"""2012-03-26""",599.79,607.15,595.26,606.98,21259900,606.98
"""2012-03-23""",600.49,601.8,594.4,596.05,15359900,596.05
"""2012-03-22""",597.78,604.5,595.53,599.34,22281100,599.34
"""2012-03-21""",602.74,609.65,601.41,602.5,22958200,602.5
"""2012-03-20""",599.51,606.9,591.48,605.96,29166500,605.96
"""2012-03-19""",598.37,601.77,589.05,601.1,32187000,601.1


## Formatting dates from strings

In [13]:
df = df.with_column(col('Date').str.strptime(pl.Date, fmt='%Y-%m-%d'))
df

Date,Open,High,Low,Close,Volume,Adj Close
date,f64,f64,f64,f64,i64,f64
2012-03-30,608.77,610.56,597.94,599.55,26050900,599.55
2012-03-29,612.78,616.56,607.23,609.86,21668300,609.86
2012-03-28,618.38,621.45,610.31,617.62,23385200,617.62
2012-03-27,606.18,616.28,606.06,614.48,21628200,614.48
2012-03-26,599.79,607.15,595.26,606.98,21259900,606.98
2012-03-23,600.49,601.8,594.4,596.05,15359900,596.05
2012-03-22,597.78,604.5,595.53,599.34,22281100,599.34
2012-03-21,602.74,609.65,601.41,602.5,22958200,602.5
2012-03-20,599.51,606.9,591.48,605.96,29166500,605.96
2012-03-19,598.37,601.77,589.05,601.1,32187000,601.1


## Selecting dates

In [17]:
df.filter(col("Date").is_between(datetime(1991, 1, 1), datetime(1992, 1, 1))).sort('Date')

Date,Open,High,Low,Close,Volume,Adj Close
date,f64,f64,f64,f64,i64,f64
1991-01-02,42.75,44.0,42.0,43.5,5543600,10.29
1991-01-03,43.5,44.25,43.0,43.0,5365600,10.17
1991-01-04,43.0,44.25,43.0,43.25,5062400,10.23
1991-01-07,43.0,45.25,43.0,43.25,11111200,10.23
1991-01-08,43.75,43.88,42.5,43.25,7816400,10.23
1991-01-09,44.25,46.0,43.75,45.25,16692400,10.7
1991-01-10,45.75,47.25,45.75,47.13,15562400,11.15
1991-01-11,47.0,47.25,46.0,47.0,11003200,11.11
1991-01-14,46.0,46.75,46.0,46.25,7535600,10.94
1991-01-15,46.5,46.75,46.0,46.75,6870000,11.06


In [39]:
df.filter(col("Date")==datetime(1991, 1, 2))

Date,Open,High,Low,Close,Volume,Adj Close
date,f64,f64,f64,f64,i64,f64
1991-01-02,42.75,44.0,42.0,43.5,5543600,10.29


## The `.dt` namespace

In [37]:
df.select([
    col('Date'),
    col('Date').dt.day().alias('day'),
    col('Date').dt.weekday().alias('weekday'),
])

Date,day,weekday
date,u32,u32
2012-03-30,30,4
2012-03-29,29,3
2012-03-28,28,2
2012-03-27,27,1
2012-03-26,26,0
2012-03-23,23,4
2012-03-22,22,3
2012-03-21,21,2
2012-03-20,20,1
2012-03-19,19,0


## Fixed and rolling groupby

`groupby_dynamic` requires the dataframe to be sorted on date. `groupby_dynamic` has three parameters:
- `every`: interval of the window
- `period`: length of the window
- `offset`: offset of the window

For example:

- every: 1 day -> "1d"
- period: 1 day -> "1d"

```
this creates adjacent windows of the same size
|--|
   |--|
      |--|
```

- every: 1 day -> "1d"
- period: 2 days -> "2d"

```
these windows have an overlap of 1 day
|----|
   |----|
      |----|
```

- every: 2 days -> "2d"
- period: 1 day -> "1d"

```
this would leave gaps between the windows
data points that in these gaps will not be a member of any group
|--|
       |--|
              |--|
```


In [44]:
df.sort('Date').groupby_dynamic("Date", every='1y').agg(col('Close').mean().alias('close_1y_avg'))

Date,close_1y_avg
date,f64
1984-01-01,25.578625
1985-01-01,20.193676
1986-01-01,32.461028
1987-01-01,53.889684
1988-01-01,41.540079
1989-01-01,41.659762
1990-01-01,37.562688
1991-01-01,52.495534
1992-01-01,54.803386
1993-01-01,41.026719


In [48]:
df.sort('Date').groupby_dynamic("Date", every='2y', period='1y').agg(col('Close').mean().alias('close_1y_avg_per_2y'))

Date,close_1y_avg_per_2y
date,f64
1984-01-01,25.578625
1986-01-01,32.461028
1988-01-01,41.540079
1990-01-01,37.562688
1992-01-01,54.803386
1994-01-01,34.081349
1996-01-01,24.917559
1998-01-01,30.565119
2000-01-01,71.748929
2002-01-01,19.139444
