In [1]:
#import dependencies
import numpy as np
import pandas as pd

## 1. Generating TimeStamps with pd.date_range()

In [2]:
#generating timestamps with different frequencies
#the times accepted and generated by pandas are human readable

"""
a date range is not just an arbitrary collection of timestamps, 
since there is a frequency for generating the timestamps.
"""

#dates recurring on a monthly basis
"""
despite giving 12th as the start date, the given dates 
are those at the end of the month(the default behaviour).
"""

#I'm thinking that rng_monthly still contains a list of timestamps, eventhough it is not 
#seen in the output; because the time has been normalized to midnight times.
rng_monthly = pd.date_range(start='2018 Apr 12 17:10', periods=10, freq='M', normalize=True)
print(rng_monthly)

#dates recurring on a daily basis
rng_daily = pd.date_range(start='2018 Apr 5 14:00', periods=5, freq='D')
print(rng_daily)

#dates recurring on a weekday basis('Business Day Frequency')
#by default, saturday and sunday are taken as holidays
rng_weekdays = pd.date_range(start='2018 Apr 6 13:00', periods=5, freq='B')
print(rng_weekdays)

DatetimeIndex(['2018-04-30', '2018-05-31', '2018-06-30', '2018-07-31',
               '2018-08-31', '2018-09-30', '2018-10-31', '2018-11-30',
               '2018-12-31', '2019-01-31'],
              dtype='datetime64[ns]', freq='M')
DatetimeIndex(['2018-04-05 14:00:00', '2018-04-06 14:00:00',
               '2018-04-07 14:00:00', '2018-04-08 14:00:00',
               '2018-04-09 14:00:00'],
              dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2018-04-06 13:00:00', '2018-04-09 13:00:00',
               '2018-04-10 13:00:00', '2018-04-11 13:00:00',
               '2018-04-12 13:00:00'],
              dtype='datetime64[ns]', freq='B')


## 2. Generating TimeStamp with pd.TimeStamp()

In [7]:
#00:00:00 is included in the output as a timestamp is a specific point in time, and not just a day
t = pd.Timestamp('23 Jun 1984')
t

Timestamp('1984-06-23 00:00:00')

In [54]:
#shows upto 6 decimal places. The ones that are not displayed are still available with t
t = pd.Timestamp('23 Jun 1984 23:45:12.45939988')
t

Timestamp('1984-06-23 23:45:12.459399')

## 3. What date format does pandas use by default (American/Rest of the World)?

In [None]:
"""
The variable t1 is 23rd July 1999, and t1 is in european format.
When given an unabmiguous Timestamp like t1, pandas catches that
you are using European Format, and therefore gives a timestamp considering the month as the value in the middle (month is July in this case).
For, t2 this is ambiguous, thus pandas defaults to american formatting and takes the month as the first value (month is June in this case).

However when given an ambiguous timestamp like t2, it defaults to
the american format of yyyy-dd-mm
"""

In [3]:
t1 = pd.Timestamp('23/7/1999')
t2 = pd.Timestamp('6/7/1999')
t1, t2

(Timestamp('1999-07-23 00:00:00'), Timestamp('1999-06-07 00:00:00'))

In [4]:
"""
We can see how pandas defaults to the American date format
by seeing which timestamp matches the verbose timestamp t3
"""
#european for July 1, 1982
t1 = pd.Timestamp('1/7/1982')
#american for July 1, 1982
t2 = pd.Timestamp('7/1/1982')
#the intended timestamp is described in t3
t3 = pd.Timestamp('July 1 1982')
t1, t2, t3
#as we can see, t3 matches the american format
#actually, upon googling, i think its in chinese format(yyyy-mm-dd)

(Timestamp('1982-01-07 00:00:00'),
 Timestamp('1982-07-01 00:00:00'),
 Timestamp('1982-07-01 00:00:00'))

## 4. A couple of the many useful methods in Timestamp

In [5]:
t = pd.Timestamp('June 10 1987 23:25:16')
#to find what quarter a particular timestamp falls into
t.quarter

2

In [6]:
#pandas is quite flexible when it comes to accepting human readable times
t = pd.Timestamp('Dec 31 1968 8 pm')
t

Timestamp('1968-12-31 20:00:00')

In [12]:
#you can also find what day of the week a given Timestamp is! (among other things)
t = pd.Timestamp('31/03/1999 2:35 pm')
days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"]
#dayofweek returns a value from 0 to 6 (Monday to Sunday)
days[t.dayofweek]

'Wednesday'

### 4.1 Adding intervals to timestamps

In [13]:
#In reality time intervals are generally more useful, in comparison to a specific timestamp.
#Because, in the real world, there are situations where you agree to a time interval, rather than a super specific timestamp.

#To get there, we first need a concept of an interval or delta of time.

In [14]:
#a time delta of a day and a millisecond
pd.Timedelta('1 day 1ms')

Timedelta('1 days 00:00:00.001000')

In [15]:
#adding an hour and a half to our timestamp
pd.Timestamp('12 Jun 1898 8 pm') + pd.Timedelta('1.5 hours') #1.5 hr works equally well

Timestamp('1898-06-12 21:30:00')

In [16]:
#adding 50 minutes to each of the periodically occuring timestamps in rng_monthly
#We see pandas lets us add timedeltas to a whole date range,
#not just a specific timestamp
rng_monthly + pd.Timedelta('50 min')

DatetimeIndex(['2018-04-30 00:50:00', '2018-05-31 00:50:00',
               '2018-06-30 00:50:00', '2018-07-31 00:50:00',
               '2018-08-31 00:50:00', '2018-09-30 00:50:00',
               '2018-10-31 00:50:00', '2018-11-30 00:50:00',
               '2018-12-31 00:50:00', '2019-01-31 00:50:00'],
              dtype='datetime64[ns]', freq='M')

In [17]:
#trying out a variant of the above command
#in this example, its the same as negatively offsetting the whole date_range by a day
rng_daily - pd.Timedelta('1 day')

DatetimeIndex(['2018-04-04 14:00:00', '2018-04-05 14:00:00',
               '2018-04-06 14:00:00', '2018-04-07 14:00:00',
               '2018-04-08 14:00:00'],
              dtype='datetime64[ns]', freq='D')

## 5. Time Spans

    We now get to time intervals/spans, which what we've been after since section04.
    In pandas these are denoted by Period

In [22]:
#creating a span of one month, namely the month of july 2016
p = pd.Period('7/2016')

#we can create a timestamp and check whether the timestamp falls within a given interval/span/period
ts = pd.Timestamp('21/7/2016')
#prints out true since ts falls within the month of July in 2016
print(ts > p.start_time and ts < p.end_time)
print(p.start_time)
print(p.end_time)

True
2016-07-01 00:00:00
2016-07-31 23:59:59.999999999


In [27]:
#notice that the period range gets rounded off to a "nice" readable time
period_rng = pd.period_range('21 Nov 1989 12:15', freq='H', periods=10)
print(period_rng)

#if you want the period_range to be generated at the exact specified minute
period_rng = pd.period_range('21 Nov 1989 12:15', freq='60T', periods=10)
print(period_rng)

"""
The dtype differentiates a date_range from a period_range.
Conceptually, a date_range is a collection of time_stamps, 
while a period_range is a collection of time intervals.
Both have a frequency as part of their definition.
"""

#you can also have custom frequencies, if you want updates every 2h20min, there you go!
period_rng = pd.period_range('31 Oct 2006', freq='2h20min', periods=10)
print(period_rng)
for index, time_period in enumerate(period_rng[:2]):
    print("Time period {} starts at {} and ends at {}".format(index, time_period.start_time, time_period.end_time))

PeriodIndex(['1989-11-21 12:00', '1989-11-21 13:00', '1989-11-21 14:00',
             '1989-11-21 15:00', '1989-11-21 16:00', '1989-11-21 17:00',
             '1989-11-21 18:00', '1989-11-21 19:00', '1989-11-21 20:00',
             '1989-11-21 21:00'],
            dtype='period[H]', freq='H')
PeriodIndex(['1989-11-21 12:15', '1989-11-21 13:15', '1989-11-21 14:15',
             '1989-11-21 15:15', '1989-11-21 16:15', '1989-11-21 17:15',
             '1989-11-21 18:15', '1989-11-21 19:15', '1989-11-21 20:15',
             '1989-11-21 21:15'],
            dtype='period[60T]', freq='60T')
PeriodIndex(['2006-10-31 00:00', '2006-10-31 02:20', '2006-10-31 04:40',
             '2006-10-31 07:00', '2006-10-31 09:20', '2006-10-31 11:40',
             '2006-10-31 14:00', '2006-10-31 16:20', '2006-10-31 18:40',
             '2006-10-31 21:00'],
            dtype='period[140T]', freq='140T')
Time period 0 starts at 2006-10-31 00:00:00 and ends at 2006-10-31 02:19:59.999999999
Time period 1 starts a

## 6. Lets make a Time Series (finally)!

In [29]:
#so a time series is where the data is indexed by timestamps as opposed to an index of numbers
numRange = 40
timeseries = pd.Series(range(numRange), pd.period_range('2016-07-01 8:15pm', freq='60T', periods=numRange))
timeseries.head()

2016-07-01 20:15    0
2016-07-01 21:15    1
2016-07-01 22:15    2
2016-07-01 23:15    3
2016-07-02 00:15    4
Freq: 60T, dtype: int64

In [32]:
#finding the values that were recorded between 8pm on Jun 1st, 2016 to 6 am on Jun 2nd, 2017
timeseries['2016-07-01 8:00pm':'2016-07-02 5:00am']
#if we had given 5am and not 5:00am, the observation recorded at 5:15 would have been included
#(ie, the observation recorded during the hour of 5am)

#you now have all the pandas functionality you usually have, but with time!

2016-07-01 20:15    0
2016-07-01 21:15    1
2016-07-01 22:15    2
2016-07-01 23:15    3
2016-07-02 00:15    4
2016-07-02 01:15    5
2016-07-02 02:15    6
2016-07-02 03:15    7
2016-07-02 04:15    8
Freq: 60T, dtype: int64

In [33]:
#time series for values recorded at a frequency of 60 Business days
ts_pd = pd.Series(range(5, numRange + 5), pd.period_range('Aug 11 1937 4:00pm', freq='60B', periods=numRange))
#printing out the first three observations that are 60 business days apart
ts_pd[:3]

#so in the following output, for the 60 business days starting from Aug 11,1937 
#the value recorded was 5(for all the days falling within those 60 business days).

1937-08-11    5
1937-11-03    6
1938-01-26    7
Freq: 60B, dtype: int64

In [34]:
#here we make a time series indexed by the same values as above, but we use a date_range rather than a period_range
ts_dt = pd.Series(range(5, numRange + 5), pd.date_range('Aug 11 1937 4:00pm', freq='60B', periods=numRange))
#printing out the first three observations that are 60 business days apart
ts_dt[:3]

#so in the following output, the value recorded on Aug 11, 1937 was 5. 
#It says nothing about the value for the next 60 business days.
#The next observation was recorded on Nov 01, 1937.

1937-08-11 16:00:00    5
1937-11-03 16:00:00    6
1938-01-26 16:00:00    7
Freq: 60B, dtype: int64

In [35]:
#so eventhough the above time series might look the same,
#the first one talks about values over a period of time, 
#while the second one refers to values recorded at a specific moment in time

print(ts_pd['8-11-1937':'08-11-1938'], end='\n\n')
print(ts_dt['8-11-1937':'08-11-1938'])

#the reason why the time is displayed for the date_range and not the period range, 
#is because the values recorded for the date_range where made at a "specific" timestamp.
#Those values recorded for the period_range are for the entirety of the period(here, 60 business days)

1937-08-11    5
1937-11-03    6
1938-01-26    7
1938-04-20    8
1938-07-13    9
Freq: 60B, dtype: int64

1937-08-11 16:00:00    5
1937-11-03 16:00:00    6
1938-01-26 16:00:00    7
1938-04-20 16:00:00    8
1938-07-13 16:00:00    9
Freq: 60B, dtype: int64


### 6.1 Converting between DateTimeIndex and PeriodIndex

In [40]:
# ts_dt.to_period()
#converting period data to timestamp data
ts_pd2 = ts_pd.to_timestamp()
print(ts_pd2['8-11-1937':'08-11-1938'])
#i guess time is not displayed, eventhough it is now a timestamp, 
#is because the period_range had no info about times.

1937-08-11    5
1937-11-03    6
1938-01-26    7
1938-04-20    8
1938-07-13    9
Freq: 12W-WED, dtype: int64


## 7.Exercises

In [50]:
"""
1. How can you create a pd.Timestamp with a European style formatted date string? hint: dayfirst flag
"""
#american format = yyyy/mm/dd, mm/dd/year (either way, month is written before year)
#european format = yyyy/dd/mm, dd/mm/year (either way, day is written before month)
american_dates = ['01/31/2014', '02/06/2013']

#goes through dates in yyyy/mm/dd format
for date in american_dates:
    print(pd.to_datetime(date))
    #spits out dates in yyyy/dd/mm (date comes first)
    print(pd.to_datetime(date, dayfirst=True))

2014-01-31 00:00:00
2014-01-31 00:00:00
2013-02-06 00:00:00
2013-06-02 00:00:00


In [47]:
"""
2.How can you generate string representation in a desired format from a pd.Timestamp?
"""
import datetime
time_rn = datetime.datetime.now()

#change 10th of March 2011 from american formatting to eurpean formatting.
str_time = pd.Timestamp('03-10-2011').strftime('%d/%m/%Y')
print(str_time, type(str_time))

print(time_rn.strftime('%d/%b/%Y'))

10/03/2011 <class 'str'>
13/Jun/2019
