# Time stamp

In [1]:
import pandas as pd 
import numpy as np


In [2]:
# For example, let's create a timestamp using a string 9/1/2019 10:05AM, and here we have our timestamp.
# Timestamp is interchangeable with Python's datetime in most cases.
pd.Timestamp('9/1/2019 10:05AM')

Timestamp('2019-09-01 10:05:00')

In [5]:
pd.Timestamp(2019,9,1,10,5,30)

Timestamp('2019-09-01 10:05:30')

In [6]:
# Timestamp also has some useful attributes, such as isoweekday(), which shows the weekday of the timestamp
# note that 1 represents Monday and 7 represents Sunday
pd.Timestamp(2019, 12, 20, 0, 0).isoweekday()

5

In [10]:
pd.Timestamp(1989,8,12,0,0).isoweekday()

6

In [11]:
# You can find extract the specific year, month, day, hour, minute, second from a timestamp
pd.Timestamp(2019, 12, 20, 5, 2,23).second

23

# Period

In [12]:
# Suppose we weren't interested in a specific point in time and instead wanted a span of time. This is where
# the Period class comes into play. Period represents a single time span, such as a specific day or month.

# Here we are creating a period that is January 2016,
pd.Period('1/2016')

Period('2016-01', 'M')

In [13]:
# M for months, D for days , Y for years
# Manipulating periods is so easy , for a example if we want to get 3 months after january 2016 we add 3

pd.Period('1/2016')+3

Period('2016-04', 'M')

In [14]:
# And if we want to ad 65 days after 18 december 1999 we simply add 65

pd.Period('12/18/1999')+65

Period('2000-02-21', 'D')

### DatetimeIndex and PeriodIndex

In [15]:
# The index of a timestamp is DatetimeIndex. Let's look at a quick example. First, let's create our example
# series t1, we'll use the Timestamp of September 1st, 2nd and 3rd of 2016. When we look at the series, each
# Timestamp is the index and has a value associated with it, in this case, a, b and c.

t1 = pd.Series(list('abc'), [pd.Timestamp('2016-09-01'), pd.Timestamp('2016-09-02'), 
                             pd.Timestamp('2016-09-03')])
t1

2016-09-01    a
2016-09-02    b
2016-09-03    c
dtype: object

In [16]:
# Looking at the type of our series index, we see that it's DatetimeIndex.
type(t1.index)

pandas.core.indexes.datetimes.DatetimeIndex

In [17]:
# Similarly, we can create a period-based index as well. 
t2 = pd.Series(list('def'), [pd.Period('2016-09'), pd.Period('2016-10'), 
                             pd.Period('2016-11')])
t2

2016-09    d
2016-10    e
2016-11    f
Freq: M, dtype: object

In [18]:
type(t2.index)

pandas.core.indexes.period.PeriodIndex

# Convert to datetime

In [20]:
# Now, let's look into how to convert to Datetime. Suppose we have a list of dates as strings and we want to
# create a new dataframe

# I'm going to try a bunch of different date formats
d1 = ['2 June 2013', 'Aug 29, 2014', '2015-06-26', '7/12/16']

# And just some random data
ts3 = pd.DataFrame(np.random.randint(10, 100, (4,2)), index=d1, 
                   columns=list('ab'))
ts3

Unnamed: 0,a,b
2 June 2013,79,51
"Aug 29, 2014",17,79
2015-06-26,38,87
7/12/16,82,46


In [21]:
ts3.index=pd.to_datetime(ts3.index)
ts3

Unnamed: 0,a,b
2013-06-02,79,51
2014-08-29,17,79
2015-06-26,38,87
2016-07-12,82,46


In [22]:
# to_datetime also() has options to change the date parse order. For example, we 
# can pass in the argument dayfirst = True to parse the date in European date.

pd.to_datetime('4.7.12', dayfirst=True)

Timestamp('2012-07-04 00:00:00')

# Time delta

In [24]:
# Timedeltas are differences in times. This is not the same as a a period, but conceptually similar. For
# instance, if we want to take the difference between September 3rd and  September 1st, we get a Timedelta of
# two days.
pd.Timestamp('7/7/2000')-pd.Timestamp('12/18/1999')

Timedelta('202 days 00:00:00')

In [26]:
# we can also manipulate dates without period

pd.Timestamp('7/7/2000')-pd.Timedelta('202D 3H')

Timestamp('1999-12-17 21:00:00')

## Offset

In [27]:
# Offset is similar to timedelta, but it follows specific calendar duration rules. Offset allows flexibility
# in terms of types of time intervals. Besides hour, day, week, month, etc it also has business day, end of
# month, semi month begin etc

# Let's create a timestamp, and see what day is that
pd.Timestamp('9/4/2016').weekday()

6

In [28]:
# Now we can now add the timestamp with a week ahead
pd.Timestamp('9/4/2016') + pd.offsets.Week()

Timestamp('2016-09-11 00:00:00')

In [29]:
# Now let's try to do the month end, then we would have the last day of Septemer
pd.Timestamp('9/4/2016') + pd.offsets.MonthEnd()

Timestamp('2016-09-30 00:00:00')

### Working with dates in data frames

In [36]:
# Next, let's look at a few tricks for working with dates in a DataFrame. Suppose we want to look at nine
# measurements, taken bi-weekly, every Sunday, starting in October 2016. Using date_range, we can create this
# DatetimeIndex. In data_range, we have to either specify the start or end date. If it is not explicitly
# specified, by default, the date is considered the start date. Then we have to specify number of periods, and
# a frequency. Here, we set it to "2W-SUN", which means biweekly on Sunday


dates = pd.date_range('12/18/1999',periods=9,freq='2W-SAT')
dates

DatetimeIndex(['1999-12-18', '2000-01-01', '2000-01-15', '2000-01-29',
               '2000-02-12', '2000-02-26', '2000-03-11', '2000-03-25',
               '2000-04-08'],
              dtype='datetime64[ns]', freq='2W-SAT')

In [37]:
# There are many other frequencies that you can specify. For example, you can do business day
pd.date_range('10-01-2016', periods=9, freq='B')

DatetimeIndex(['2016-10-03', '2016-10-04', '2016-10-05', '2016-10-06',
               '2016-10-07', '2016-10-10', '2016-10-11', '2016-10-12',
               '2016-10-13'],
              dtype='datetime64[ns]', freq='B')

In [38]:
# Or you can do quarterly, with the quarter start in June
pd.date_range('04-01-2016', periods=12, freq='QS-JUN')

DatetimeIndex(['2016-06-01', '2016-09-01', '2016-12-01', '2017-03-01',
               '2017-06-01', '2017-09-01', '2017-12-01', '2018-03-01',
               '2018-06-01', '2018-09-01', '2018-12-01', '2019-03-01'],
              dtype='datetime64[ns]', freq='QS-JUN')

In [39]:
# Now, let's go back to our weekly on Sunday example and create a DataFrame using these dates, and some random
# data, and see what we can do with it.

dates = pd.date_range('10-01-2016', periods=9, freq='2W-SUN')
df = pd.DataFrame({'Count 1': 100 + np.random.randint(-5, 10, 9).cumsum(),
                  'Count 2': 120 + np.random.randint(-5, 10, 9)}, index=dates)
df

Unnamed: 0,Count 1,Count 2
2016-10-02,100,125
2016-10-16,97,127
2016-10-30,102,128
2016-11-13,102,120
2016-11-27,105,125
2016-12-11,103,126
2016-12-25,98,119
2017-01-08,95,116
2017-01-22,103,124


In [42]:
# First, we can check what day of the week a specific date is. For example, here we can see that all the dates
# in our index are on a Sunday. Which matches the frequency that we set
df.index.weekday

Int64Index([6, 6, 6, 6, 6, 6, 6, 6, 6], dtype='int64')

In [44]:
df.diff()

Unnamed: 0,Count 1,Count 2
2016-10-02,,
2016-10-16,-3.0,2.0
2016-10-30,5.0,1.0
2016-11-13,0.0,-8.0
2016-11-27,3.0,5.0
2016-12-11,-2.0,1.0
2016-12-25,-5.0,-7.0
2017-01-08,-3.0,-3.0
2017-01-22,8.0,8.0


In [45]:
# Suppose we want to know what the mean count is for each month in our DataFrame. We can do this using
# resample. Converting from a higher frequency from a lower frequency is called downsampling (we'll talk about
# this in a moment)
df.resample('M').mean()

Unnamed: 0,Count 1,Count 2
2016-10-31,99.666667,126.666667
2016-11-30,103.5,122.5
2016-12-31,100.5,122.5
2017-01-31,99.0,120.0


In [46]:
# Now let's talk about datetime indexing and slicing, which is a wonderful feature of the pandas DataFrame.
# For instance, we can use partial string indexing to find values from a particular year,
df['2017']

Unnamed: 0,Count 1,Count 2
2017-01-08,95,116
2017-01-22,103,124


In [48]:
# Now let's talk about datetime indexing and slicing, which is a wonderful feature of the pandas DataFrame.
# For instance, we can use partial string indexing to find values from a particular year,
df['2016-10']

Unnamed: 0,Count 1,Count 2
2016-10-02,100,125
2016-10-16,97,127
2016-10-30,102,128


In [49]:
# Or we can even slice on a range of dates For example, here we only want the values from December 2016
# onwards.
df['2016-12':]

Unnamed: 0,Count 1,Count 2
2016-12-11,103,126
2016-12-25,98,119
2017-01-08,95,116
2017-01-22,103,124


In [50]:
df['2016']

Unnamed: 0,Count 1,Count 2
2016-10-02,100,125
2016-10-16,97,127
2016-10-30,102,128
2016-11-13,102,120
2016-11-27,105,125
2016-12-11,103,126
2016-12-25,98,119
