### Time Series Analysis

In [1]:
%matplotlib inline

from IPython.core.display import HTML
from IPython.display import Image
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

In [2]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import pandas_datareader as pdr
import pandas_datareader.data as web
from datetime import datetime, timedelta
from dateutil.parser import parse

sns.set(style="ticks", color_codes=True)
sns.set_context("notebook")

In [3]:
# Creating requests cache. This will store data locally and reuse it.
import requests_cache
expire_after = timedelta(days=1)
session = requests_cache.CachedSession(cache_name='cache', backend='sqlite', expire_after=expire_after)

In [4]:
# Get some data
end = datetime.now()
start = datetime(end.year - 2, end.month, end.day)
aapl = web.DataReader("AAPL",'yahoo',start,end, session = session)

In [5]:
aapl.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-07-11,95.360001,95.889999,94.860001,95.220001,34018000,91.700244
2014-07-14,95.860001,96.889999,95.650002,96.449997,42810000,92.884773
2014-07-15,96.800003,96.849998,95.029999,95.32,45477900,91.796546
2014-07-16,96.970001,97.099998,94.739998,94.779999,53396300,91.276506
2014-07-17,95.029999,95.279999,92.57,93.089996,57298000,89.648973


In [6]:
dt1 = parse('2010-1-23')
dt1

datetime.datetime(2010, 1, 23, 0, 0)

In [21]:
st1 = dt1.strftime('%m-%d-%Y')
st1 = dt1.strftime('%F')
st1

'2010-01-23'

In [8]:
pd.to_datetime(st1)

Timestamp('2010-01-23 00:00:00')

### Indexing, Selection, Subsetting

In [9]:
ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))
print ts.head()
print '-------'
print ts.index

2000-01-01    1.324722
2000-01-02   -0.206807
2000-01-03   -0.240085
2000-01-04   -1.104607
2000-01-05    0.522831
Freq: D, dtype: float64
-------
DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04',
               '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08',
               '2000-01-09', '2000-01-10',
               ...
               '2002-09-17', '2002-09-18', '2002-09-19', '2002-09-20',
               '2002-09-21', '2002-09-22', '2002-09-23', '2002-09-24',
               '2002-09-25', '2002-09-26'],
              dtype='datetime64[ns]', length=1000, freq='D')


In [34]:
print ts['2001-1'].head()
print ts['2001-2':'2001-3'].head()
print ts[datetime(2001,3,7):datetime(2001,3,12)].head()
print ts.truncate(before='2001-5-1').head()
print ts.truncate(after='2001-1-1').head()
print ts[datetime(2000,6,1):].head()

2001-01-01   -0.236617
2001-01-02    0.145243
2001-01-03   -0.066866
2001-01-04    0.902928
2001-01-05    0.718842
Freq: D, dtype: float64
2001-02-01   -0.708197
2001-02-02   -1.316464
2001-02-03   -2.380152
2001-02-04   -0.248703
2001-02-05    0.028782
Freq: D, dtype: float64
2001-03-07   -0.288189
2001-03-08    0.199245
2001-03-09    1.700050
2001-03-10    1.087340
2001-03-11   -0.803317
Freq: D, dtype: float64
2001-05-01   -0.007989
2001-05-02    1.059096
2001-05-03   -0.586033
2001-05-04   -1.586948
2001-05-05   -1.034211
Freq: D, dtype: float64
2000-01-01    1.324722
2000-01-02   -0.206807
2000-01-03   -0.240085
2000-01-04   -1.104607
2000-01-05    0.522831
Freq: D, dtype: float64
2000-06-01   -1.097253
2000-06-02    0.725093
2000-06-03   -1.023762
2000-06-04    0.442504
2000-06-05   -0.125133
Freq: D, dtype: float64


In [35]:
dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')
df2 = pd.DataFrame(np.random.randn(100, 4), index=dates, columns=['Colorado', 'Texas', 'New York', 'Ohio'])
df2.head()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,0.286472,1.237734,0.019677,0.518226
2000-01-12,0.033036,-1.615967,-0.27967,-2.678463
2000-01-19,-1.409606,0.710044,0.74296,-0.076607
2000-01-26,0.866514,-0.30247,1.234499,0.985711
2000-02-02,-1.614416,0.287276,0.956992,1.453917


In [12]:
stamp = ts.index[1]
print(stamp)
ts[stamp]

2000-01-02 00:00:00


-0.20680686264124787