# Times Series Summary
- Form of structured data (order of observation matters)
    - Fixed frequency - recorded with fixed unit between observations
    - Irregular frequency - without fixed unit of time or offset

In [6]:
# import modules
import numpy as np
import pandas as pd
from datetime import datetime as dt

### Date and Time Data Types and Tools

In [33]:
# Basic daytime object
now = dt.now() # dt object [year,month, day, hour, minute, sec, microsec]

now.year        # return year
now.month       # return month
now.day         # return day
now.hour        # return hour
now.minute      # return minute
now.second      # return second
now.microsecond # return micorsecond

delta = now - dt(2012,3,5) # diff. between two dates
delta.days                 # days between two dates
delta.seconds              # seconds between two dates


50777

In [43]:
# timedelta represents temporal difference between datetime objects
from datetime import timedelta

now + timedelta( days=12 ) # date of 12 days after today's date


datetime.datetime(2020, 10, 3, 14, 6, 17, 117863)

In [75]:
# Converting between string and datetime
stamp = datetime(2001,1,3)

str(stamp)                    # convert to string '2001-01-03 00:00:00'
stamp.strftime('%Y-%m-%d')    # convert to specific format '2001-01-03'

stamp.strftime('%Y')  # year (4-digits)
stamp.strftime('%y')  # year (2-digits)
stamp.strftime('%m')  # month
stamp.strftime('%d')  # day
stamp.strftime('%H')  # hour (24hr cycle)
stamp.strftime('%I')  # hour (12hr cycle)
stamp.strftime('%M')  # minute
stamp.strftime('%S')  # seconds
stamp.strftime('%w')  # weekday [Sun=0, Sat=6]
stamp.strftime('%U')  # week number of year [days before first sunday = week0]
stamp.strftime('%W')  # week number of year [days before first monday = week0]

stamp.strftime('%F')  # shortcut '%Y-%m-%d'     ['2001-01-03']
stamp.strftime('%D')  # shortcut '%m/%d/%y'     ['01/03/01']


# If format unknown -> use parse
from dateutil.parser import parse
parse('2011-01-03')               # return datetime object
parse('6/12/2009', dayfirst=True) # if day appears before month

# Convert list of strings to datetime
datestrs = ['2011-7-06 12:00:00', '2011-08-6']
pd.to_datetime(datestrs)              # datetime object
pd.to_datetime(datestrs + [None])     # NaT = null dt value

pd.date_range('1/1/2000', periods=10)               # create series of consecutive dates
pd.date_range('1/1/2001', periods=10, freq='W-WED') # create series of n Wenesdays

DatetimeIndex(['2001-01-03', '2001-01-10', '2001-01-17', '2001-01-24',
               '2001-01-31', '2001-02-07', '2001-02-14', '2001-02-21',
               '2001-02-28', '2001-03-07'],
              dtype='datetime64[ns]', freq='W-WED')

### Times Series Basics

In [66]:
# Basic TS is series indexed by timestamps

dates = [datetime(2011, 1, 2), datetime(2011, 1, 5), datetime(2011, 1, 7), datetime(2011, 1, 8), datetime(2011, 1, 10), datetime(2011, 1, 12)]
data = np.random.rand(6)
 
ts = pd.Series(np.random.rand(6), index=dates)  #time series data
type(ts.index)          # index = DatetimeIndex object
type(ts.index[0])       # scalar index el = Timestamp object


pandas._libs.tslibs.timestamps.Timestamp

In [74]:
# Indexing, Selection, Subsetting
ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))

ts.index[2]       # return index 
ts[ ts.index[2] ] # return value

ts['2001']        # return all days in 2001
ts['2001-05']     # return all days in May, 2001

ts.truncate(after='1/9/2011') # return all days AFTER input date
ts.truncate(before='1/9/2011') # return all days BEFORE input date


2000-01-01   -1.004192
2000-01-02    1.385041
2000-01-03   -0.693117
2000-01-04   -1.285536
2000-01-05    1.062582
                ...   
2002-09-22   -1.187653
2002-09-23    1.406015
2002-09-24    0.102003
2002-09-25    0.327340
2002-09-26    0.527048
Freq: D, Length: 1000, dtype: float64

In [83]:
# Dealing with duplicate indices

dates = pd.DatetimeIndex(['1/1/2001', '1/2/2001', '1/2/2001','1/5/2001'])
dup_ts = pd.Series(np.arange(4), index=dates)

dup_ts.index.is_unique   # indices are not unique
dup_ts['1/2/2001']       # two entries for 1/2/2001

grouped = dup_ts.groupby(level=0) # aggregate data having non-u ique timestamps
grouped.mean()                    # mean value per day
grouped.count()                   # n obs per date


2001-01-01    1
2001-01-02    2
2001-01-05    1
dtype: int64

### Date Ranges, Frequencies, and Shifting
- Desirable to work relative to fixed frequency, even if it means add NaT

In [91]:
# Date Ranges

pd.date_range('2012-04-01','2012-06-01')      # DatetimeIndex of all days between dates
pd.date_range(start='2012-04-01', periods=20) # n dates (freq=Day)
pd.date_range('2001-01-01', '2001-12-01', freq='BM') # last business days per months between dates



DatetimeIndex(['2001-01-31', '2001-02-28', '2001-03-30', '2001-04-30',
               '2001-05-31', '2001-06-29', '2001-07-31', '2001-08-31',
               '2001-09-28', '2001-10-31', '2001-11-30'],
              dtype='datetime64[ns]', freq='BM')

In [107]:
# Frequencies and Date Offsets
start = '2001-1-1'
end = '2001-9-1'

pd.date_range(start, end, freq='4h')       # offset by 4hours
pd.date_range(start, end, freq='1h30min')  # offset by 1hour,30mins
pd.date_range(start, end, freq='WOM-3FRI') # get dates for every 3rd Friday of the month


DatetimeIndex(['2001-01-19', '2001-02-16', '2001-03-16', '2001-04-20',
               '2001-05-18', '2001-06-15', '2001-07-20', '2001-08-17'],
              dtype='datetime64[ns]', freq='WOM-3FRI')

In [117]:
# Shifting - moving data backward and forward through time
ts = pd.Series(np.random.randn(4), index=pd.date_range('1/1/2000', periods=4, freq='M'))

ts.shift(2)              # shifts VALUES +2 dates ahead (ts[:2]=NaN, index=same)
ts.shift(-1)             # shifts VALUES -1 dates behind (ts[4]=NaN, index=same)

ts.shift(2, freq='M')    # shifts INDEX 2 months      (ex: ts[1] value same)
ts.shift(3, freq='D')    # shifts INDEX 3 days ahead   
ts.shift(1, freq='90T')  # shifts INDEX 90min ahead   

2000-02-03    0.007270
2000-03-03    1.340966
2000-04-03    1.737451
2000-05-03   -0.542965
dtype: float64