## Date in The Index

#### Loading Libraries

In [1]:
# Numerical Computing
import numpy as np
# Data Manipulation
import pandas as pd
# Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
url = 'https://raw.githubusercontent.com/mattharrison/datasets/master/data/alta-noaa-1980-2019.csv'
alta_df = pd.read_csv(url)

In [3]:
dates = pd.to_datetime(alta_df.DATE)

In [4]:
snow = (alta_df
   .SNOW
   .rename(dates)
)

In [5]:
snow

1980-01-01    2.0
1980-01-02    3.0
1980-01-03    1.0
1980-01-04    0.0
1980-01-05    0.0
             ... 
2019-09-03    0.0
2019-09-04    0.0
2019-09-05    0.0
2019-09-06    0.0
2019-09-07    0.0
Name: SNOW, Length: 14160, dtype: float64

#### Finding Missing Data

In [6]:
snow.isna().any()

True

In [7]:
snow[snow.isna()]

1985-07-30   NaN
1985-09-12   NaN
1985-09-19   NaN
1986-02-07   NaN
1986-06-26   NaN
              ..
2017-04-26   NaN
2017-09-20   NaN
2017-10-02   NaN
2017-12-23   NaN
2018-12-03   NaN
Name: SNOW, Length: 365, dtype: float64

In [9]:
snow.loc['1985-09': '1985-09-20']

1985-09-01    0.0
1985-09-02    0.0
1985-09-03    0.0
1985-09-04    0.0
1985-09-05    0.0
1985-09-06    0.0
1985-09-07    0.0
1985-09-08    0.0
1985-09-09    0.0
1985-09-10    0.0
1985-09-11    0.0
1985-09-12    NaN
1985-09-13    0.0
1985-09-14    0.0
1985-09-15    0.0
1985-09-16    0.0
1985-09-17    0.0
1985-09-18    0.0
1985-09-19    NaN
1985-09-20    0.0
Name: SNOW, dtype: float64

#### Filling Missing Data

In [10]:
(snow
 .loc['1985-09':'1985-09-20']
 .fillna(0)
)

1985-09-01    0.0
1985-09-02    0.0
1985-09-03    0.0
1985-09-04    0.0
1985-09-05    0.0
1985-09-06    0.0
1985-09-07    0.0
1985-09-08    0.0
1985-09-09    0.0
1985-09-10    0.0
1985-09-11    0.0
1985-09-12    0.0
1985-09-13    0.0
1985-09-14    0.0
1985-09-15    0.0
1985-09-16    0.0
1985-09-17    0.0
1985-09-18    0.0
1985-09-19    0.0
1985-09-20    0.0
Name: SNOW, dtype: float64

In [11]:
snow.loc['1987-12-30': '1988-01-10']

1987-12-30    6.0
1987-12-31    5.0
1988-01-01    NaN
1988-01-02    0.0
1988-01-03    0.0
1988-01-04    NaN
1988-01-05    2.0
1988-01-06    6.0
1988-01-07    4.0
1988-01-08    9.0
1988-01-09    5.0
1988-01-10    2.0
Name: SNOW, dtype: float64

In [13]:
(snow
 .loc['1987-12-30': '1988-01-10']
 .ffill()
)

1987-12-30    6.0
1987-12-31    5.0
1988-01-01    5.0
1988-01-02    0.0
1988-01-03    0.0
1988-01-04    0.0
1988-01-05    2.0
1988-01-06    6.0
1988-01-07    4.0
1988-01-08    9.0
1988-01-09    5.0
1988-01-10    2.0
Name: SNOW, dtype: float64

In [14]:
(snow
 .loc['1987-12-30': '1988-01-10']
 .bfill()
)

1987-12-30    6.0
1987-12-31    5.0
1988-01-01    0.0
1988-01-02    0.0
1988-01-03    0.0
1988-01-04    2.0
1988-01-05    2.0
1988-01-06    6.0
1988-01-07    4.0
1988-01-08    9.0
1988-01-09    5.0
1988-01-10    2.0
Name: SNOW, dtype: float64

#### Interpolation

In [17]:
(snow
 .loc['1987-12-30win':'1988-01-10']
 .interpolate()
)

1987-12-30    6.0
1987-12-31    5.0
1988-01-01    2.5
1988-01-02    0.0
1988-01-03    0.0
1988-01-04    1.0
1988-01-05    2.0
1988-01-06    6.0
1988-01-07    4.0
1988-01-08    9.0
1988-01-09    5.0
1988-01-10    2.0
Name: SNOW, dtype: float64

In [19]:
winter = (snow.index.quarter == 1) | (snow.index.quarter == 4)

In [20]:
(snow
 .where(~(winter & snow.isna()), snow.interpolate())
 .where(~(~winter & snow.isna()), 0)
)

1980-01-01    2.0
1980-01-02    3.0
1980-01-03    1.0
1980-01-04    0.0
1980-01-05    0.0
             ... 
2019-09-03    0.0
2019-09-04    0.0
2019-09-05    0.0
2019-09-06    0.0
2019-09-07    0.0
Name: SNOW, Length: 14160, dtype: float64

In [21]:
# Validating Procedure
(snow
 .where(~(winter & snow.isna()), snow.interpolate())
 .where(~(~winter & snow.isna()), 0)
 .loc[['1985-09-19', '1988-01-01']]
)

1985-09-19    0.0
1988-01-01    2.5
Name: SNOW, dtype: float64

#### Dropping Missing Values

In [22]:
(snow
 .loc['1987-12-30': '1988-01-10']
 .dropna()
)

1987-12-30    6.0
1987-12-31    5.0
1988-01-02    0.0
1988-01-03    0.0
1988-01-05    2.0
1988-01-06    6.0
1988-01-07    4.0
1988-01-08    9.0
1988-01-09    5.0
1988-01-10    2.0
Name: SNOW, dtype: float64

#### Shifting Data

In [23]:
snow.shift(1)

1980-01-01    NaN
1980-01-02    2.0
1980-01-03    3.0
1980-01-04    1.0
1980-01-05    0.0
             ... 
2019-09-03    0.0
2019-09-04    0.0
2019-09-05    0.0
2019-09-06    0.0
2019-09-07    0.0
Name: SNOW, Length: 14160, dtype: float64

In [24]:
snow.shift(-1)

1980-01-01    3.0
1980-01-02    1.0
1980-01-03    0.0
1980-01-04    0.0
1980-01-05    1.0
             ... 
2019-09-03    0.0
2019-09-04    0.0
2019-09-05    0.0
2019-09-06    0.0
2019-09-07    NaN
Name: SNOW, Length: 14160, dtype: float64

#### Rolling Average

In [25]:
(snow
 .add(snow.shift(1))
 .add(snow.shift(2))
 .add(snow.shift(3))
 .add(snow.shift(4))
 .div(5)
)

1980-01-01    NaN
1980-01-02    NaN
1980-01-03    NaN
1980-01-04    NaN
1980-01-05    1.2
             ... 
2019-09-03    0.0
2019-09-04    0.0
2019-09-05    0.0
2019-09-06    0.0
2019-09-07    0.0
Name: SNOW, Length: 14160, dtype: float64

In [26]:
# Pandas "Rolling" Method
(snow
 .rolling(5)
 .mean()
)

1980-01-01    NaN
1980-01-02    NaN
1980-01-03    NaN
1980-01-04    NaN
1980-01-05    1.2
             ... 
2019-09-03    0.0
2019-09-04    0.0
2019-09-05    0.0
2019-09-06    0.0
2019-09-07    0.0
Name: SNOW, Length: 14160, dtype: float64

#### Resampling

In [27]:
(snow
 .resample('M')
 .max()
)

1980-01-31    20.0
1980-02-29    25.0
1980-03-31    16.0
1980-04-30    10.0
1980-05-31     9.0
              ... 
2019-05-31     5.1
2019-06-30     0.0
2019-07-31     0.0
2019-08-31     0.0
2019-09-30     0.0
Freq: M, Name: SNOW, Length: 477, dtype: float64

In [28]:
(snow
 .resample('2M')
 .max()
)

1980-01-31    20.0
1980-03-31    25.0
1980-05-31    10.0
1980-07-31     1.0
1980-09-30     0.0
              ... 
2019-01-31    19.0
2019-03-31    20.7
2019-05-31    18.0
2019-07-31     0.0
2019-09-30     0.0
Freq: 2M, Name: SNOW, Length: 239, dtype: float64

In [29]:
(snow
 .resample('A-MAY')
 .max()
)

1980-05-31    25.0
1981-05-31    26.0
1982-05-31    34.0
1983-05-31    38.0
1984-05-31    25.0
1985-05-31    22.0
1986-05-31    34.0
1987-05-31    16.0
1988-05-31    23.0
1989-05-31    30.0
1990-05-31    32.0
1991-05-31    28.0
1992-05-31    22.0
1993-05-31    30.0
1994-05-31    36.0
1995-05-31    25.0
1996-05-31    34.0
1997-05-31    22.0
1998-05-31    29.0
1999-05-31    26.0
2000-05-31    23.0
2001-05-31    19.0
2002-05-31    28.0
2003-05-31    14.0
2004-05-31    24.0
2005-05-31    31.0
2006-05-31    27.0
2007-05-31    15.0
2008-05-31    21.0
2009-05-31    23.0
2010-05-31    32.0
2011-05-31    22.0
2012-05-31    18.0
2013-05-31    19.0
2014-05-31    11.0
2015-05-31    25.0
2016-05-31    15.0
2017-05-31    26.0
2018-05-31    21.8
2019-05-31    20.7
2020-05-31     0.0
Freq: A-MAY, Name: SNOW, dtype: float64

#### Gathering Aggregate Values (But Keeping Index)

In [31]:
(snow
 .div(snow
      .resample('Q')
      .transform('sum'))
 .mul(100)
 .fillna(0)
)

1980-01-01    0.527009
1980-01-02    0.790514
1980-01-03    0.263505
1980-01-04    0.000000
1980-01-05    0.000000
                ...   
2019-09-03    0.000000
2019-09-04    0.000000
2019-09-05    0.000000
2019-09-06    0.000000
2019-09-07    0.000000
Name: SNOW, Length: 14160, dtype: float64

In [32]:
season2017 = snow.loc['2016-10':'2017-05']

In [33]:
(season2017
 .resample('M')
 .sum()
 .div(season2017
      .sum())
 .mul(100)
)

2016-10-31     2.153969
2016-11-30     9.772637
2016-12-31    15.715995
2017-01-31    25.468688
2017-02-28    21.041085
2017-03-31     9.274033
2017-04-30    14.738732
2017-05-31     1.834862
Freq: M, Name: SNOW, dtype: float64

#### Groupby Operations

In [39]:
def season(idx):
    year = idx.year
    month = idx.month
    return year.where((month < 10), year+1)

In [41]:
# (snow
#   .groupby(season)
#   .sum()
# )

In [42]:
(snow
  .resample('A-SEP')
  .sum()
)

1980-09-30    457.5
1981-09-30    503.0
1982-09-30    842.5
1983-09-30    807.5
1984-09-30    816.0
1985-09-30    536.0
1986-09-30    740.8
1987-09-30    243.1
1988-09-30    314.5
1989-09-30    429.5
1990-09-30    331.5
1991-09-30    504.7
1992-09-30    340.8
1993-09-30    683.5
1994-09-30    321.0
1995-09-30    645.0
1996-09-30    525.5
1997-09-30    563.6
1998-09-30    579.6
1999-09-30    435.7
2000-09-30    453.0
2001-09-30    468.0
2002-09-30    457.8
2003-09-30    365.4
2004-09-30    514.0
2005-09-30    472.0
2006-09-30    594.6
2007-09-30    319.7
2008-09-30    606.0
2009-09-30    476.8
2010-09-30    391.0
2011-09-30    533.8
2012-09-30    293.5
2013-09-30    362.8
2014-09-30    358.7
2015-09-30    284.3
2016-09-30    354.6
2017-09-30    524.0
2018-09-30    308.8
2019-09-30    504.5
Freq: A-SEP, Name: SNOW, dtype: float64

#### Cumulative Operations

In [43]:
(snow
 .loc['2016-10':'2017-09']
 .cumsum()
)

2016-10-01      0.0
2016-10-02      0.0
2016-10-03      4.9
2016-10-04      4.9
2016-10-05      5.5
              ...  
2017-09-26    524.0
2017-09-27    524.0
2017-09-28    524.0
2017-09-29    524.0
2017-09-30    524.0
Name: SNOW, Length: 364, dtype: float64

In [44]:
(snow
 .resample('A-SEP')
 .transform('cumsum')
)

1980-01-01      2.0
1980-01-02      5.0
1980-01-03      6.0
1980-01-04      6.0
1980-01-05      6.0
              ...  
2019-09-03    504.5
2019-09-04    504.5
2019-09-05    504.5
2019-09-06    504.5
2019-09-07    504.5
Name: SNOW, Length: 14160, dtype: float64