# Work with Time
## Lecture demo


In [None]:
import numpy as np
import pandas as pd
import scipy as sp
import statsmodels.api as sm
import statsmodels.graphics as smg
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from datetime import datetime
from dateutil.parser import parse
from pandas import Series
sns.set(style='white', color_codes=True, font_scale=1.3)


# make the Pandas tables a little more readable
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

In [None]:
# read in stocks file
# convert the string that corresponds to date into datetime
# the first column (date) to be the index
df_stock = pd.read_csv('stocks.csv',parse_dates=True,index_col=0)
msft = df_stock.MSFT

## The Basics (Time in Python)

In [None]:
# get the current time


In [None]:
# the current time (at the time that I called datetime.now())
 

In [None]:
# get current year, but also now.month

In [None]:
# create a datetime in the past 9/27/2015

In [None]:
# compare the times
 

In [None]:
# first thing days, seconds, nanos  (days + seconds + nanos)
  

In [None]:
# just converting to string
str(now)

## convert from strings to datetime objects

In [None]:
# read the string and give you back a datetime object
 

In [None]:
# MM/DD/YY
 

In [None]:
# for European style  DD/MM/YY
  

In [None]:
# weird formats
parse("4th of July, 2015")

In [None]:
# controlling pretty printing
date.strftime('%A')

# http://strftime.org/
# d, c, A

In [16]:
## WORKSHEET EXERCISE
dt = datetime(2011, 6, 2, 20, 0)

# get it to print "2nd of June, 2011 at 8 pm"


# Numpy Dates

In [None]:
date = np.array('2015-07-04', dtype=np.datetime64)
date

In [None]:
date + np.arange(12)

In [None]:
np.datetime64('2015-07-04 12:59:59.50', 'ns')

# generating a series of time

In [None]:
dates = [datetime(2011,1,1),datetime(2011,1,2),datetime(2011,1,3),datetime(2011,1,4),datetime(2011,1,5)]

In [None]:
# dates now holds 5 datetime objects
dates

In [None]:
# create 5 random number --> np.random.randn(5)
# make the index the dates



In [None]:
# give me the first value
  

In [None]:
# get a range  from 2nd to 5th
 

In [None]:
# give me the data for 1/3/2011
 

In [None]:
# will also work
 

In [None]:
# range
ts['1/1/2011':'1/3/2011']

In [None]:
# show me the time series

## Math with Dates

In [None]:
dates = pd.to_datetime([datetime(2015, 7, 3), '4th of July, 2015',
                       '2015-Jul-6', '07-07-2015', '20150708'])
dates

In [None]:
# tell python we're working with days


In [None]:
# find the diff to the first day


# Making Ranges

In [None]:
# make a range between 7/3/2015 and 7/10/2015


In [None]:
# make a range starting at 7/3/2015
# for 8 days


In [None]:
# make a range starting at 7/3/2015
# for 8 days but at hour granularity


# resampling data (dates)

In [None]:
# make the index the dates, create random data
ts = Series(np.random.randn(5),index=dates)  
ts

In [None]:
# resample by H --> H is hours


In [None]:
# resample by 6H --> H is hours and 6 is multiplier


In [None]:
# fill in missing data by pushing data forward


In [None]:
# missing data in series, practical example


In [None]:
fig, ax = plt.subplots(2, sharex=True)

data.asfreq('D').plot(ax=ax[0], marker='o')

data.asfreq('D', method='bfill').plot(ax=ax[1], style='-o')
data.asfreq('D', method='ffill').plot(ax=ax[1], style='--o')
ax[1].legend(["back-fill", "forward-fill"]);

# Shift in time

In [None]:
# date_range as a helper function
indx = pd.date_range('4/1/2012','6/1/2012')  # make me a date range, over 2 months
indx  # returns by day

In [None]:
# look at the bottom of object, see freq='...'  (tells you pattern)

In [None]:
# make a new time series using date_range
ts = Series(abs(np.random.randn(4)),
            index=pd.date_range('1/1/2000',periods=4,freq='M'))
# np.random.randn(4)  --> 4 random numbers
# abs(...)  made them all >= 0  (absolute value)
# index --> key for series
# date range --> start at 1/1/2000, give me 5 months

In [None]:
ts

In [None]:
# shifted value (shift by 1)
 

In [None]:
# shift backwards
  

In [None]:
# calculate ratio


In [None]:
# WORKSHEET EXERCISE
# calculate percent change 

In [None]:
# shift time
ts

In [None]:
fig, ax = plt.subplots(3, sharey=True)

# apply a frequency to the data
msft = msft.asfreq('D', method='pad')

msft.plot(ax=ax[0])
msft.shift(400).plot(ax=ax[1])
msft.tshift(400).plot(ax=ax[2])

# legends and annotations
local_max = pd.to_datetime('2013-11-05')
offset = pd.Timedelta(400, 'D')

ax[0].legend(['input'], loc=2)
ax[0].get_xticklabels()[2].set(weight='heavy', color='red')
ax[0].axvline(local_max, alpha=0.3, color='red')

ax[1].legend(['shift(400)'], loc=2)
ax[1].get_xticklabels()[2].set(weight='heavy', color='red')
ax[1].axvline(local_max + offset, alpha=0.3, color='red')

ax[2].legend(['tshift(400)'], loc=2)
ax[2].get_xticklabels()[1].set(weight='heavy', color='red')
ax[2].axvline(local_max + offset, alpha=0.3, color='red');

In [None]:
# make a larger time series
# give me 100 days staring at 1/1/2000
rng = pd.period_range('1/1/2000',periods=100,freq='D')  
ts = Series(np.random.randn(len(rng)),index=rng)
ts.plot()

In [None]:
# create a new time series with the mean as the monthly value
 

In [None]:
# find max value in that month
 

# working with data frames

In [None]:
df_stock.head(10)

In [None]:
# B is telling pandas that we are working with business days
df_stock = df_stock.to_period('B')  

In [None]:
df_stock[['AAPL','MSFT']]   # select apple and microsft


In [None]:
# resample, replacing days by months, and use mean (average monthly value)


In [None]:
# Resampling two ways - resample() is an aggregtation
# asfreq() is selection
print('On 12/30/2011, msft was at:', msft['2011-12-30'])


In [None]:
# business year end frequency (select)


In [None]:
# business year end frequency (sample)


In [None]:
msft.plot(alpha=0.5, style='-')
msft.resample('BA').mean().plot(style=':')  
msft.asfreq('BA').plot(style='--');
plt.legend(['input', 'resample', 'asfreq'],
           loc='upper left');

In [None]:
# give me the first 10 days of apple


In [None]:
# give you the amount changed over time


In [None]:
# notice: skips the non-business days
  

In [None]:
# give me a resample (not restricted to business days)
 

In [None]:
# fill in the missing data


In [None]:
# rolling data (mean over time)


In [None]:
data = pd.DataFrame({'input': msft,
                     'one-year rolling_mean': rolling.mean(),
                     'one-year rolling_std': rolling.std()})
data[500:900]

In [None]:
ax = data.plot(style=['-', '--', ':'])
ax.lines[0].set_alpha(0.3)