In [1]:
# Bismillah

In [2]:
#pip install pandas_datareader

In [3]:
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option('precision', 7)
from pandas_datareader import data

In [4]:
type(2014-8-1)
# it looks date to us, but for pandas, it is just an int. So, the operations permitted on date cannot be performed
# on this int.
# If we enter date in this format in datetime function, shown below, then we will get error since pandas does not
# recognize date in this format as a date.

int

In [5]:
dt.datetime(2014, 8, 1).month
# datetime object created. So, relevant operations can be performed on it now.
# year, month, day, hours, minutes, seconds, and microseconds must be seperated by a comma.

8

In [6]:
type(dt.datetime(2014, 8, 1, 9, 30, 45, 100000))

datetime.datetime

In [7]:
dates = [dt.datetime(2014, 8, 1), dt.datetime(2014, 8, 2)]
dates
# a list of datetime objects created.

[datetime.datetime(2014, 8, 1, 0, 0), datetime.datetime(2014, 8, 2, 0, 0)]

In [8]:
type(dates)

list

In [9]:
type(dates[1])

datetime.datetime

In [10]:
dti = pd.DatetimeIndex(dates)
dti
# datetime index created from a list of datetime objects. Each item in DatetimeIndex is an object of Timestamp class.

DatetimeIndex(['2014-08-01', '2014-08-02'], dtype='datetime64[ns]', freq=None)

In [11]:
type(dti)

pandas.core.indexes.datetimes.DatetimeIndex

In [12]:
type(dti[0])

pandas._libs.tslibs.timestamps.Timestamp

In [13]:
ts = pd.Series(np.random.randn(2))
ts
# the index is zero-based integer index.

0    1.2448113
1   -1.3911497
dtype: float64

In [14]:
type(ts)

pandas.core.series.Series

In [15]:
ts1 = pd.Series(np.random.randn(2), dates)
ts1
# the index is datetime index rather than zero-based integer index.
# a series will automatically construct a DatetimeIndex as its index when passing a list of datetime objects (dates)
# as the index parameter.

2014-08-01   -1.0236142
2014-08-02   -0.0644776
dtype: float64

In [16]:
type(ts1)

pandas.core.series.Series

In [17]:
ts2 = pd.Series(np.random.randn(2), dti)
ts2
# here we have directly passed the DatetimeIndex as the index parameter.

2014-08-01    0.8520248
2014-08-02   -1.4999467
dtype: float64

In [18]:
type(ts2)

pandas.core.series.Series

In [19]:
ts2.index
# the index of the series is DatetimeIndex as can be confirmed from the following type command.

DatetimeIndex(['2014-08-01', '2014-08-02'], dtype='datetime64[ns]', freq=None)

In [20]:
type(ts2.index)

pandas.core.indexes.datetimes.DatetimeIndex

In [21]:
# Important: The series object has taken the datetime objects (contained in the variable, dates) and constructed a 
# DatetimeIndex from the date values contained in the datetime objects. Each value of the DatetimeIndex is the 
# Timestamp object and each element (Timestamp) of the DatetimeIndex can be used to access the corresponding value in
# the Series object. This is demonstrated in the following:

In [22]:
ts2

2014-08-01    0.8520248
2014-08-02   -1.4999467
dtype: float64

In [23]:
ts2['2014-08-01']
# first row retreived using [] without slicing, loc or iloc. It is possible because there is only one column since
# this is a series. So, all we can give is an argument for a row or rows so pandas doesn't get confused.

0.8520248300091848

In [24]:
ts2['2014-08-01':'2014-08-02']
# the end date is inclusive.

2014-08-01    0.8520248
2014-08-02   -1.4999467
dtype: float64

In [25]:
ts2[dt.datetime(2014, 8, 1)]
# instead of using the date in the string with - seperator, we can use the date with , seperator without '' to produce
# the same output.

0.8520248300091848

In [26]:
# The next method to create DatetimeIndex by passing a list of dates in string as shown below:

In [27]:
dates = ['2014-08-01', '2014-08-02']
dates

['2014-08-01', '2014-08-02']

In [28]:
type(dates)
# list of strings.

list

In [29]:
type(dates[0])
# the first element (date) in the list is of type string and the same is the second element.

str

In [30]:
ts3 = pd.Series(np.random.randn(2), dates)
ts3
# here dates as a string have been passed as an index argument. But the pandas itself figured out that these are dates
# and converted them into DatetimeIndex as shown below.

2014-08-01    0.0734030
2014-08-02    0.4643571
dtype: float64

In [31]:
type(ts3.index)

pandas.core.indexes.base.Index

In [32]:
# convert a list of mixed type items/elements into DatetimeIndex using pandas' function, to_datetime().

In [33]:
dti2 = pd.to_datetime(['Aug1 2014', '2014-08-02', '2014.8.3', None])
dti2
# note that all are strings in the list except the keyword, None.
# all the strings have been converted into DatetimeIndex (pandas recognized dates as shown below:)
# None has been converted into NaT (not a time-value), which means that the source data could not be converted into
# datetime.

DatetimeIndex(['2014-08-01', '2014-08-02', '2014-08-03', 'NaT'], dtype='datetime64[ns]', freq=None)

In [34]:
type(dti2)

pandas.core.indexes.datetimes.DatetimeIndex

In [35]:
#dti3 = pd.to_datetime(['Aug 1, 2014', '2014-08-02', '2014.8.3', 'foo'], coerce = True)
# gives error. pandas cannot parse 'foo' since it does not seem like a date. Previously, pandas recognized the keyword
# None, so didn't give the error and produced NaT. Also, the coerce parameter is not working here unlike in the book.

In [36]:
# Changing the date order: The default order is 

In [37]:
dti = pd.to_datetime(['2014, 8, 1'])
dti
# 8 is taken as a month and 1 as a date.

DatetimeIndex(['2014-08-01'], dtype='datetime64[ns]', freq=None)

In [38]:
dti = pd.to_datetime(['8, 1, 2014'])
dti
# same result produced, 8 is the month and 1 is the date.

DatetimeIndex(['2014-08-01'], dtype='datetime64[ns]', freq=None)

In [39]:
dti = pd.to_datetime(['8, 1, 2014'], dayfirst = True)
dti
# if we mean to say that the date is 8th of January, then we have to tell pandas that the first argument is the day or
# date by typing 'dayfirst = True' since by default pandas will treat the first value as the month and the second as
# a day or date.
# now the date is 8th of January as we desired.

DatetimeIndex(['2014-01-08'], dtype='datetime64[ns]', freq=None)

In [40]:
dti[0]

Timestamp('2014-01-08 00:00:00')

In [41]:
# Using date_range function to create DatetineIndex (a series of TimeStamps):

In [42]:
dates = pd.date_range('8, 1, 2014', periods = 10)
dates

DatetimeIndex(['2014-08-01', '2014-08-02', '2014-08-03', '2014-08-04',
               '2014-08-05', '2014-08-06', '2014-08-07', '2014-08-08',
               '2014-08-09', '2014-08-10'],
              dtype='datetime64[ns]', freq='D')

In [43]:
s1 = pd.Series(np.random.randn(10), dates)
s1

2014-08-01    1.3243786
2014-08-02    0.8686872
2014-08-03    1.7705583
2014-08-04    0.8948812
2014-08-05    0.2853944
2014-08-06   -0.0523608
2014-08-07    0.5462764
2014-08-08    2.2903047
2014-08-09   -0.4275418
2014-08-10   -0.8488491
Freq: D, dtype: float64

In [44]:
s1.index

DatetimeIndex(['2014-08-01', '2014-08-02', '2014-08-03', '2014-08-04',
               '2014-08-05', '2014-08-06', '2014-08-07', '2014-08-08',
               '2014-08-09', '2014-08-10'],
              dtype='datetime64[ns]', freq='D')

In [45]:
type(s1.index)

pandas.core.indexes.datetimes.DatetimeIndex

# S&P 500 Example:

In [46]:
msft = data.DataReader('MSFT', 'yahoo', '2012-1-1', '2013-12-30')
msft

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2012-01-03,26.9599991,26.3899994,26.5499992,26.7700005,64731500.0,21.6231327
2012-01-04,27.4699993,26.7800007,26.8199997,27.3999996,80516100.0,22.1320114
2012-01-05,27.7299995,27.2900009,27.3799992,27.6800003,56081400.0,22.3581753
2012-01-06,28.1900005,27.5300007,27.5300007,28.1100006,99455500.0,22.7055073
2012-01-09,28.1000004,27.7199993,28.0499992,27.7399998,59706800.0,22.4066372
...,...,...,...,...,...,...
2013-12-23,36.8899994,36.5499992,36.8100014,36.6199989,25128700.0,31.3362370
2013-12-24,37.1699982,36.6399994,36.7200012,37.0800018,14243000.0,31.7298622
2013-12-26,37.4900017,37.1699982,37.2000008,37.4399986,17612800.0,32.0379295
2013-12-27,37.6199989,37.1699982,37.5800018,37.2900009,14563000.0,31.9095669


In [47]:
msft[['High', 'Low']]
# Thus, using DatetimeIndex, any desirable part of the dataframe can be accessed for any period.

Unnamed: 0_level_0,High,Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-01-03,26.9599991,26.3899994
2012-01-04,27.4699993,26.7800007
2012-01-05,27.7299995,27.2900009
2012-01-06,28.1900005,27.5300007
2012-01-09,28.1000004,27.7199993
...,...,...
2013-12-23,36.8899994,36.5499992
2013-12-24,37.1699982,36.6399994
2013-12-26,37.4900017,37.1699982
2013-12-27,37.6199989,37.1699982


In [48]:
msftAC = msft['Adj Close']
msftAC.head(5)

Date
2012-01-03    21.6231327
2012-01-04    22.1320114
2012-01-05    22.3581753
2012-01-06    22.7055073
2012-01-09    22.4066372
Name: Adj Close, dtype: float64

In [49]:
msftAC['2012-01-03']
# Nothe that msftAC is a series, therefore, a row index (which is DatetimeIndex) can be used with [] without slicing, 
# loc or iloc. 

21.623132705688477

In [50]:
msftAC['2012-01-01':'2012-01-05']
# DatetimeIndex can easily allow access to any chunk of dataframe for any desirable period.

Date
2012-01-03    21.6231327
2012-01-04    22.1320114
2012-01-05    22.3581753
Name: Adj Close, dtype: float64

In [51]:
msft['2012-02']
# Note that this didn't require the use of the .loc method, as pandas first identifies this as a partial date and then
# looks along the index of the dataframe instead of column (although .loc can be used to perform the same operation).

  msft['2012-02']


Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2012-02-01,30.0499992,29.7600002,29.7900009,29.8899994,67409900.0,24.14328
2012-02-02,30.1700001,29.7099991,29.8999996,29.9500008,52223300.0,24.1917305
2012-02-03,30.3999996,30.0900002,30.1399994,30.2399998,41838500.0,24.4259834
2012-02-06,30.2199993,29.9699993,30.0400009,30.2000008,28039700.0,24.393671
2012-02-07,30.4899998,30.0499992,30.1499996,30.3500004,39242400.0,24.5148392
2012-02-08,30.6700001,30.2199993,30.2600002,30.6599998,49659100.0,24.7652359
2012-02-09,30.7999992,30.4799995,30.6800003,30.7700005,50481600.0,24.8540821
2012-02-10,30.7999992,30.3600006,30.6399994,30.5,44605300.0,24.6359978
2012-02-13,30.7700005,30.4300003,30.6299992,30.5799999,33319800.0,24.7006149
2012-02-14,30.4599991,29.8500004,30.3299999,30.25,59644000.0,24.594923


In [52]:
msft['2012-02':'2012-02-09']
# the slice will start at the beginning of the month.

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2012-02-01,30.0499992,29.7600002,29.7900009,29.8899994,67409900.0,24.14328
2012-02-02,30.1700001,29.7099991,29.8999996,29.9500008,52223300.0,24.1917305
2012-02-03,30.3999996,30.0900002,30.1399994,30.2399998,41838500.0,24.4259834
2012-02-06,30.2199993,29.9699993,30.0400009,30.2000008,28039700.0,24.393671
2012-02-07,30.4899998,30.0499992,30.1499996,30.3500004,39242400.0,24.5148392
2012-02-08,30.6700001,30.2199993,30.2600002,30.6599998,49659100.0,24.7652359
2012-02-09,30.7999992,30.4799995,30.6800003,30.7700005,50481600.0,24.8540821


# Creating time-series with specific frequencies:

In [53]:
dates = pd.date_range('2014-08-01', '2014-10-29', freq = 'T')
dates
# create DatetimeIndex with hourly frequency. The default is 'D' for daily. Weekly, monthly etc is also possible.

DatetimeIndex(['2014-08-01 00:00:00', '2014-08-01 00:01:00',
               '2014-08-01 00:02:00', '2014-08-01 00:03:00',
               '2014-08-01 00:04:00', '2014-08-01 00:05:00',
               '2014-08-01 00:06:00', '2014-08-01 00:07:00',
               '2014-08-01 00:08:00', '2014-08-01 00:09:00',
               ...
               '2014-10-28 23:51:00', '2014-10-28 23:52:00',
               '2014-10-28 23:53:00', '2014-10-28 23:54:00',
               '2014-10-28 23:55:00', '2014-10-28 23:56:00',
               '2014-10-28 23:57:00', '2014-10-28 23:58:00',
               '2014-10-28 23:59:00', '2014-10-29 00:00:00'],
              dtype='datetime64[ns]', length=128161, freq='T')

In [54]:
np_array = np.arange(0, 90 * 60 * 24)
np_array

array([     0,      1,      2, ..., 129597, 129598, 129599])

### Representing intervals of time using periods:
#### Period () method is used to determine a period starting and ending at the desired dates with the specified frequencies. 

In [56]:
aug2014 = pd.Period('2014-08')
aug2014
# period('2014-08', 'M') contains only two dates, 1st and 31 of Aug, 2014 since the frequency is monthly.

Period('2014-08', 'M')

In [None]:
aug2014.start_time, aug2014.end_time
# pandas assumes the start date to be 1st of Aug, when only month (partial date) is mentioned. Then using the one 
# month frequency, it figures the end date to be 31st of Aug.

In [None]:
sep2014 = aug2014 + 1
sep2014
# operators overloading when plus sign in this carries a different meaning and operation that it's usual behaviour.
# In this case, addition of 1 to aug2014 will create the next month (sep2014) since the existing period (aug2014) is 
# based on a monthly frequency. If it were based on a quarterly frequency, then +1 will create next quarter.

In [None]:
sep2014.start_time, sep2014.end_time
# note that the pandas itself has figured out that September has 30 days unlike Aug that ended with the date 31.

In [None]:
# Period Objects: can be combined to form a PeriodIndex as shown below:

In [None]:
mp2013 = pd.period_range('2013-01-01', '2013-12-31', freq = 'M')
mp2013
# PeriodIndex has been created as shown below and confirmed in the type command.
# In PeriodIndex, the index labels are period objects whereas in DatetimeIndex the index labels are TimeStamps.

In [None]:
type(mp2013)

In [None]:
type(mp2013[0])
# PeriodIndex contains period objects.

In [None]:
for p in mp2013:
    print ("{0} {1} {2} {3}".format(p, p.freq, p.start_time, p.end_time))
# first '2013-01' is taken from 'mp2013' and put in p, then print is executed. In print, the {} represents placeholders
# for what comes in () after format. For instance,p relates with {0}, p.freq relates with {1} and so on. Note that the
# PeriodIndex has Period objects as can be seen immediately above. Therefore, they have the property of .start_time and
# .end_time.    

In [None]:
s = np.random.randn(12)
s

In [None]:
pandas_series = pd.Series(s, mp2013)
pandas_series
# this series has index labels containing Period Objects so the index is PeriodIndex rather than DatetimeIndex.
# this type of indexing is especially helpfull when we are interested in finding average prices each month rather than
# prices at specific dates.