# Chapter 11: Time Series
## Preliminaries

In [None]:
from datetime import datetime
from datetime import timedelta
from pandas.tseries.offsets import Hour, Minute
import pandas as pd
import numpy as np

## 11.1 Date and Time Data Types and Tools

In [None]:
now = datetime.now()
now

In [None]:
now.year, now.month, now.day

In [None]:
delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)
delta

In [None]:
delta.days

In [None]:
delta.seconds

In [None]:
start = datetime(2011, 1, 7)
start + timedelta(days=12)

In [None]:
start - 2 * timedelta(days=12)


### Converting Between String and Datetime

In [None]:
stamp = datetime(2011, 1, 3)
str(stamp)

In [None]:
stamp.strftime('%Y-%m-%d')

In [None]:
value = '2011-01-03'
datetime.strptime(value, '%Y-%m-%d')

In [None]:
datestrs = ['7/6/2011', '8/6/2011']
[datetime.strptime(x, '%m/%d/%Y') for x in datestrs]

In [None]:
datestrs = ['2011-07-06 12:00:00', '2011-08-06 00:00:00']
pd.to_datetime(datestrs)

In [None]:
idx = pd.to_datetime(datestrs + [None])
idx

In [None]:
idx[2]

In [None]:
pd.isna(idx)

## 11.2 Time Series Basics

In [None]:
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
        datetime(2011, 1, 7), datetime(2011, 1, 8),
        datetime(2011, 1, 10), datetime(2011, 1, 12)]
ts = pd.Series(np.random.standard_normal(6), index=dates)
ts

In [None]:
ts.index

In [None]:
ts + ts[::2]

In [None]:
ts.index.dtype

In [None]:
stamp = ts.index[0]
stamp

### Indexing, Selection, and Subsetting

In [None]:
stamp = ts.index[3]
ts[stamp]

In [None]:
ts['2011-01-10']

In [None]:
longer_ts = pd.Series(np.random.standard_normal(1000),
                     index=pd.date_range('2000-01-01', periods=1000))

longer_ts

In [None]:
longer_ts["2001"]

In [None]:
longer_ts['2001-05']

In [None]:
ts[datetime(2011, 1, 7):]

In [None]:
ts[datetime(2011, 1, 7):datetime(2011, 1, 10)]

In [None]:
ts

In [None]:
ts['2011-01-06':'2011-01-11']

In [None]:
ts.truncate(after='2011-01-09')

In [None]:
dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')
dates

In [None]:
long_df = pd.DataFrame(
    np.random.standard_normal((100, 4)),
    index=dates,
    columns=['Colorado', 'Texas',
             'New York', 'Ohio'])
long_df

In [None]:
long_df.loc['2001-05']

### Times Series with Duplicate Indices

In [None]:
dates = pd.DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-02",
                         "2000-01-02", "2000-01-03"])

dup_ts = pd.Series(np.arange(5), index=dates)
dup_ts

In [None]:
dup_ts.index.is_unique

In [None]:
dup_ts['2000-01-02']

In [None]:
grouped = dup_ts.groupby(level=0)
grouped.mean()

In [None]:
grouped.count()

## 11.3 Date Ranges, Frequencies, and Shifting

In [None]:
ts

In [None]:
resampler = ts.resample('D')
resampler

### Generating Date Ranges

In [None]:
index = pd.date_range('2012-04-01', '2012-06-01')
index

In [None]:
pd.date_range(start='2012-04-01', periods=20)

In [None]:
pd.date_range(end='2012-06-01', periods=20)

In [None]:
pd.date_range('2000-01-01', '2000-12-01', freq='ME')

In [None]:
pd.date_range('2012-05-02 12:56:31', periods=5)

In [None]:
pd.date_range('2012-05-02 12:56:31', periods=5, normalize=True)

### Frequencies and Date Offsets

In [None]:
hour = Hour()
hour

In [None]:
four_hours = Hour(4)
four_hours

In [None]:
pd.date_range('2000-01-01', '2000-01-03 23:59', freq='4h')

In [None]:
Hour(2) + Minute(30)

In [None]:
pd.date_range('2000-01-01', periods=10, freq='1h30min')

#### Week of month dates

In [None]:
monthly_dates = pd.date_range("2012-01-01", "2012-09-01", freq="WOM-3FRI")
list(monthly_dates)

### Shifting and Lagging Data

In [None]:
ts = pd.Series(np.random.standard_normal(4),
                index=pd.date_range('2012-01-01', periods=4, freq='ME'))
ts
               

In [None]:
ts.shift(-2)

In [None]:
ts.shift(1)

In [None]:
ts.shift(2, freq='ME')

In [None]:
ts.shift(3, freq='D')

In [None]:
ts.shift(1, freq='90min')

#### Shift dates with offsets

In [None]:
from pandas.tseries.offsets import Day, MonthEnd

In [None]:
now = datetime(2011, 11, 17)
now + 3 * Day()

In [None]:
now + MonthEnd()

In [None]:
now + MonthEnd(2)

In [None]:
ts = pd.Series(np.random.standard_normal(20),
                index=pd.date_range('2000-01-15', periods=20, freq='4D'))
ts

In [None]:
ts.groupby(MonthEnd().rollforward).mean()

In [None]:
ts.resample('ME').mean()

## 11.4 Time Zone Handling

In [None]:
import pytz

In [None]:
pytz.common_timezones[-5:]

In [None]:
tz = pytz.timezone('America/New_York')
tz

### Time Zone Localization and Conversion

In [None]:
dates = pd.date_range("2012-03-09", periods=6, freq="D")
ts = pd.Series(np.random.standard_normal(len(dates)), index=dates)
ts

In [None]:
pd.date_range("2012-03-09 09:30", periods=10, freq="D", tz='UTC')

In [None]:
ts.index

In [None]:
ts_utc = ts.tz_localize('UTC')
ts_utc.index

In [None]:
ts_utc.tz_convert('America/New_York').index

In [None]:
ts_eastern = ts.tz_localize('America/New_York')
ts_eastern.index

In [None]:
ts_eastern.tz_convert('UTC').index

In [None]:
ts_eastern.tz_convert('Europe/Berlin').index

In [None]:
ts.index.tz_localize('Asia/Shanghai')

### Operations with Time Zone-Aware Timestamp Objects

In [None]:
stamp = pd.Timestamp('2011-03-12 04:00')
stamp

In [None]:
stamp_utc = stamp.tz_localize('UTC')
stamp_utc

In [None]:
stamp_utc.tz_convert('America/New_York')

In [None]:
stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow')
stamp_moscow

In [None]:
stamp_utc.value

In [None]:
stamp_utc.tz_convert('America/New_York').value

In [None]:
stamp = pd.Timestamp('2012-03-11 01:30', tz='America/New_York')
stamp

In [None]:
stamp + Hour()

In [None]:
stamp = pd.Timestamp('2012-11-04 00:30', tz='America/New_York')
stamp

In [None]:
stamp + 2 * Hour()

### Operations Between Different Time Zones

In [None]:
dates = pd.date_range("2012-03-07 09:30", periods=10, freq="B")
ts = pd.Series(np.random.standard_normal(len(dates)), index=dates)
ts.index

In [None]:
ts1 = ts[:7].tz_localize('Europe/London')
ts2 = ts[2:].tz_localize('Europe/Moscow')
result = ts1 + ts2
result.index

## 11.5 Periods and Period Arithmetic

In [None]:
p = pd.Period(2011, freq='Y-DEC')
p

In [None]:
p + 5

In [None]:
p - 2

In [None]:
pd.Period(2014, freq='Y-DEC') - p

In [None]:
periods = pd.period_range('2000-01-01', '2000-06-30', freq='M')
periods

In [None]:
pd.Series(np.random.standard_normal(len(periods)), index=periods)

In [None]:
values = ["2001Q3", "2002Q2", "2003Q1"]
index = pd.PeriodIndex(values, freq='Q-DEC')
index

### Period Frequency Conversion

In [None]:
p = pd.Period(2011, freq='Y-DEC')
p

In [None]:
p.asfreq('M', how='start')

In [None]:
p.asfreq('M', how='end')

In [None]:
p.asfreq('M')

In [None]:
p = pd.Period(2011, freq='Y-JUN')
p

In [None]:
p.asfreq('M', how='start')

In [None]:
p.asfreq('M', 'end')

In [None]:
p = pd.Period('Aug-2011', freq='M')
p

In [None]:
p.asfreq('Y-JUN')

In [None]:
periods = pd.period_range('2006', '2009', freq='Y-DEC')
ts = pd.Series(np.random.standard_normal(len(periods)), index=periods)
ts

In [None]:
ts.asfreq('M', how='start')

In [None]:
ts.asfreq(freq='B', how='end')

### Quarterly Period Frequencies

In [None]:
p = pd.Period('2012Q4', freq='Q-JAN')
p   

In [None]:
p.asfreq(freq='D', how='start')

In [None]:
p.asfreq(freq='D', how='end')

In [None]:
p4pm = (p.asfreq("B", how="end") - 1).asfreq("min", how="start") + 16 * 60
p4pm


In [None]:
p4pm.to_timestamp()

In [None]:
periods = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN')
periods

In [None]:
ts = pd.Series(np.arange(len(periods)), index=periods)
print(ts)

In [None]:
new_periods = (periods.asfreq("B", "end") - 1).asfreq("h", "start") + 16
ts.index = new_periods.to_timestamp()
print(ts)

### Converting Timestamps to Periods (and Back)

In [None]:
dates = pd.date_range('2000-01-01', periods=3, freq='ME')
ts = pd.Series(np.random.standard_normal(len(dates)), index=dates)
print(ts)

In [None]:
pts = ts.to_period()
print(pts)

In [None]:
dates = pd.date_range('2000-01-29', periods=6)
ts2 = pd.Series(np.random.standard_normal(len(dates)), index=dates)
print(ts2)

In [None]:
ts2.to_period('M')

In [None]:
pts = ts2.to_period('M')
print('This is a period series: \n')
print(pts)
print('This is a timestamp series:  \n')
print(pts.to_timestamp(how='end'))

### Creating a PeriodIndex from Arrays

In [None]:
data = pd.read_csv(r"F:\books\pydata-book-3rd-edition\pydata-book-3rd-edition\examples\macrodata.csv")
data.head(5)

In [None]:
data.year

In [None]:
data.quarter

In [None]:
index = pd.PeriodIndex.from_fields(year=data.year, quarter=data.quarter, freq='Q-DEC')
index

In [None]:
data.index = index
data.infl.head(5)

## 11.6 Resampling and Frequency Conversion

In [None]:
dates = pd.date_range('2000-01-01', periods=100)
dates

In [None]:
ts = pd.Series(np.random.standard_normal(len(dates)), index=dates)
ts

In [None]:
ts.resample('ME').mean()

In [20]:
ts.resample('ME', kind='period').mean()

  ts.resample('ME', kind='period').mean()


2000-01   -0.310795
2000-02   -0.059159
2000-03    0.120590
2000-04   -0.005917
Freq: M, dtype: float64