In [1]:
import pandas as pd
import numpy as np

# Generate series of times

In [2]:
# specify with start date & number of periods
rng = pd.date_range('2016 Jul 15 10:15', periods = 10, freq = 'M')
rng

DatetimeIndex(['2016-07-31 10:15:00', '2016-08-31 10:15:00',
               '2016-09-30 10:15:00', '2016-10-31 10:15:00',
               '2016-11-30 10:15:00', '2016-12-31 10:15:00',
               '2017-01-31 10:15:00', '2017-02-28 10:15:00',
               '2017-03-31 10:15:00', '2017-04-30 10:15:00'],
              dtype='datetime64[ns]', freq='M')

In [3]:
rng = pd.date_range('2016 Jul 15 10:15', '2016 Jul 25', freq='8H', tz='Asia/Bangkok')
rng

# counting backward also works
# rng = pd.date_range('2016 Jul 15 10:15', '2016 Jul 25', freq = '8H')

DatetimeIndex(['2016-07-15 10:15:00+07:00', '2016-07-15 18:15:00+07:00',
               '2016-07-16 02:15:00+07:00', '2016-07-16 10:15:00+07:00',
               '2016-07-16 18:15:00+07:00', '2016-07-17 02:15:00+07:00',
               '2016-07-17 10:15:00+07:00', '2016-07-17 18:15:00+07:00',
               '2016-07-18 02:15:00+07:00', '2016-07-18 10:15:00+07:00',
               '2016-07-18 18:15:00+07:00', '2016-07-19 02:15:00+07:00',
               '2016-07-19 10:15:00+07:00', '2016-07-19 18:15:00+07:00',
               '2016-07-20 02:15:00+07:00', '2016-07-20 10:15:00+07:00',
               '2016-07-20 18:15:00+07:00', '2016-07-21 02:15:00+07:00',
               '2016-07-21 10:15:00+07:00', '2016-07-21 18:15:00+07:00',
               '2016-07-22 02:15:00+07:00', '2016-07-22 10:15:00+07:00',
               '2016-07-22 18:15:00+07:00', '2016-07-23 02:15:00+07:00',
               '2016-07-23 10:15:00+07:00', '2016-07-23 18:15:00+07:00',
               '2016-07-24 02:15:00+07:00', '2016-0

### let's explore some of the other options for date_range
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.date_range.html

In particular, let's take a look at 'normalize' and 'end'

In [4]:
rng = pd.date_range('2016 Jul 15', periods = 10, freq = 'M')
rng

DatetimeIndex(['2016-07-31', '2016-08-31', '2016-09-30', '2016-10-31',
               '2016-11-30', '2016-12-31', '2017-01-31', '2017-02-28',
               '2017-03-31', '2017-04-30'],
              dtype='datetime64[ns]', freq='M')

In [5]:
# each element is a TimeStamp
rng[0]
type(rng[0])

pandas._libs.tslib.Timestamp

### Which of these formats DON'T work?
'2016 Jul 1', '7/1/2016', '1/7/2016', 'July 1, 2016', '2016-07-01'


#### You'll want to try this with pd.to_datetime
#### All of them work. Note that 1/7/2016 is in January (pandas uses American format if ambiguous)!

In [6]:
# You may encounter the need to deal with European style formatted dates. 
# In that case, you'll want to use pd.to_datetime (with dayfirst=True)

In [7]:
# See the behavior with the following examples. Do you see the pattern?

In [8]:
pd.to_datetime('01/07/2016', dayfirst = True)

Timestamp('2016-07-01 00:00:00')

In [9]:
pd.to_datetime('01/07/2016', dayfirst = False)

Timestamp('2016-01-07 00:00:00')

In [10]:
pd.to_datetime('01/07/2016', dayfirst = True) == pd.to_datetime('01/07/2016', dayfirst = False)

False

In [11]:
pd.to_datetime('01-07-2016', dayfirst = True) == pd.to_datetime('01-07-2016', dayfirst = False)

False

In [12]:
pd.to_datetime('2016-01-07', dayfirst = True) == pd.to_datetime('2016-01-07', dayfirst = False)

True

# Timestamps

In [13]:
# add increasing detail
pd.Timestamp('2016-07-10')

Timestamp('2016-07-10 00:00:00')

In [14]:
pd.Timestamp('2016-07-10 10:15:15')

Timestamp('2016-07-10 10:15:15')

### How much detail can you add?

In [15]:
# max. resulution is ns
pd.Timestamp('2016-07-10 10:15:15.1231243125895534')

Timestamp('2016-07-10 10:15:15.123124312')

### What are some properties of time stamps? 
Hint: http://pandas.pydata.org/pandas-docs/stable/timeseries.html#time-date-components

In [16]:
t = pd.Timestamp('2016-07-10 10:15:15')

In [17]:
t.quarter

3

In [18]:
t.is_quarter_end

False

In [19]:
t.weekday_name

'Sunday'

In [20]:
t = pd.Timestamp('2016-07-10 8 pm')
t

Timestamp('2016-07-10 20:00:00')

### How are time stamps lacking?

In [21]:
# starting point
pd.to_datetime(0, unit='ns')

Timestamp('1970-01-01 00:00:00')

# Time offsets

In [22]:
pd.Timedelta('1 day 1us')

Timedelta('1 days 00:00:00.000001')

In [23]:
pd.Timestamp('2016-07-01 8:00') + pd.Timedelta('1.5 hours')

Timestamp('2016-07-01 09:30:00')

In [24]:
pd.Timedelta('15ns')*1000 #bug? what kind of bug?
# The format only shows down to ms, but it is there. Try *1000.

Timedelta('0 days 00:00:00.000015')

In [25]:
rng

DatetimeIndex(['2016-07-31', '2016-08-31', '2016-09-30', '2016-10-31',
               '2016-11-30', '2016-12-31', '2017-01-31', '2017-02-28',
               '2017-03-31', '2017-04-30'],
              dtype='datetime64[ns]', freq='M')

In [26]:
rng + pd.Timedelta('1day')

DatetimeIndex(['2016-08-01', '2016-09-01', '2016-10-01', '2016-11-01',
               '2016-12-01', '2017-01-01', '2017-02-01', '2017-03-01',
               '2017-04-01', '2017-05-01'],
              dtype='datetime64[ns]', freq='MS')

# Time spans

In [27]:
p = pd.Period('7/2016')
# feed month/year --> default period = 1month
# feed date/month/year --> default period = 1day, ... and so on ...
p

Period('2016-07', 'M')

In [28]:
t = pd.Timestamp('7/21/2016')
p.start_time < t and p.end_time > t

True

In [29]:
# note the rounding in time
rng = pd.period_range('2016-01-01 12:15', freq = 'H', periods = 10)
rng

PeriodIndex(['2016-01-01 12:00', '2016-01-01 13:00', '2016-01-01 14:00',
             '2016-01-01 15:00', '2016-01-01 16:00', '2016-01-01 17:00',
             '2016-01-01 18:00', '2016-01-01 19:00', '2016-01-01 20:00',
             '2016-01-01 21:00'],
            dtype='period[H]', freq='H')

In [30]:
# the rounding time is fixed
rng = pd.period_range('2016-01-01 12:15', freq = '60T', periods = 10)
rng

PeriodIndex(['2016-01-01 12:15', '2016-01-01 13:15', '2016-01-01 14:15',
             '2016-01-01 15:15', '2016-01-01 16:15', '2016-01-01 17:15',
             '2016-01-01 18:15', '2016-01-01 19:15', '2016-01-01 20:15',
             '2016-01-01 21:15'],
            dtype='period[60T]', freq='60T')

In [31]:
rng = pd.period_range('2016-01-01 12:15', freq = '1H', periods = 10)
rng

PeriodIndex(['2016-01-01 12:00', '2016-01-01 13:00', '2016-01-01 14:00',
             '2016-01-01 15:00', '2016-01-01 16:00', '2016-01-01 17:00',
             '2016-01-01 18:00', '2016-01-01 19:00', '2016-01-01 20:00',
             '2016-01-01 21:00'],
            dtype='period[H]', freq='H')

### How can you determine whether a timestamp falls within a given period?

In [32]:
p_temp = pd.Period('2017-06-13')
t_temp = pd.Timestamp('2017-08-13 16:34')
p_temp.start_time < t_temp and t_temp < p_temp.end_time

False

In [33]:
# This also works
p_temp.start_time < t_temp < p_temp.end_time

False

In [34]:
# %load snippets/startend.py

### Try out some other functionality with different offset-aliases. Anything interesting?

In [35]:
rng = pd.date_range('2017-06-13 21:14', periods=10, freq='B')
rng

DatetimeIndex(['2017-06-13 21:14:00', '2017-06-14 21:14:00',
               '2017-06-15 21:14:00', '2017-06-16 21:14:00',
               '2017-06-19 21:14:00', '2017-06-20 21:14:00',
               '2017-06-21 21:14:00', '2017-06-22 21:14:00',
               '2017-06-23 21:14:00', '2017-06-26 21:14:00'],
              dtype='datetime64[ns]', freq='B')

### How can you make a pandas Time Series with these aliases?

In [36]:
num_periods = 40
ts_pd = pd.Series(range(num_periods), pd.period_range('2016-07-01 11:15', freq = '60T', periods = num_periods))

In [37]:
ts_pd['2016-7-1 11':'2016-7-1 18']

2016-07-01 11:15    0
2016-07-01 12:15    1
2016-07-01 13:15    2
2016-07-01 14:15    3
2016-07-01 15:15    4
2016-07-01 16:15    5
2016-07-01 17:15    6
2016-07-01 18:15    7
Freq: 60T, dtype: int32

In [38]:
type(ts_pd.index[0])
ts_pd.index[0]

Period('2016-07-01 11:15', '60T')

### How can we index a time series with a date_range?

In [39]:
num_periods = 40
ts_dt = pd.Series(range(num_periods), pd.date_range('2016-07-01 11:15', freq = '60T', periods = num_periods))
ts_dt['2016-7-1 11':'2016-7-1 18']

2016-07-01 11:15:00    0
2016-07-01 12:15:00    1
2016-07-01 13:15:00    2
2016-07-01 14:15:00    3
2016-07-01 15:15:00    4
2016-07-01 16:15:00    5
2016-07-01 17:15:00    6
2016-07-01 18:15:00    7
Freq: 60T, dtype: int32

In [40]:
type(ts_dt.index[0])
ts_dt.index[0]

Timestamp('2016-07-01 11:15:00', freq='60T')

### What are the use cases for a series with a DateTimeIndex vs a PeriodIndex? Where will we see a difference?

In [41]:
# Timestamp is a point in time
ts_dt['2016-7-1 11']

2016-07-01 11:15:00    0
Freq: 60T, dtype: int32

In [42]:
# Period is an interval which in this case is 60T
# Tt spans from one Timestamp to another Timestamp
print(ts_pd['2016-7-1 11'])

print("Starting Timestamp: ", ts_pd['2016-7-1 11'].index[0].start_time)
print("Ending Timestamp: ", ts_pd['2016-7-1 11'].index[0].end_time)

2016-07-01 11:15    0
2016-07-01 12:15    1
Freq: 60T, dtype: int32
Starting Timestamp:  2016-07-01 11:15:00
Ending Timestamp:  2016-07-01 12:14:59.999999999


### Grab everything on July 1 and July 2

In [140]:
ts_pd['2016-7-1':'2016-7-2']

2016-07-01 11:15     0
2016-07-01 12:15     1
2016-07-01 13:15     2
2016-07-01 14:15     3
2016-07-01 15:15     4
2016-07-01 16:15     5
2016-07-01 17:15     6
2016-07-01 18:15     7
2016-07-01 19:15     8
2016-07-01 20:15     9
2016-07-01 21:15    10
2016-07-01 22:15    11
2016-07-01 23:15    12
2016-07-02 00:15    13
2016-07-02 01:15    14
2016-07-02 02:15    15
2016-07-02 03:15    16
2016-07-02 04:15    17
2016-07-02 05:15    18
2016-07-02 06:15    19
2016-07-02 07:15    20
2016-07-02 08:15    21
2016-07-02 09:15    22
2016-07-02 10:15    23
2016-07-02 11:15    24
2016-07-02 12:15    25
2016-07-02 13:15    26
2016-07-02 14:15    27
2016-07-02 15:15    28
2016-07-02 16:15    29
2016-07-02 17:15    30
2016-07-02 18:15    31
2016-07-02 19:15    32
2016-07-02 20:15    33
2016-07-02 21:15    34
2016-07-02 22:15    35
2016-07-02 23:15    36
Freq: 60T, dtype: int32

In [146]:
# playing with index slicing
ts_pd['2016-7-1':'2016-7-2':3]

2016-07-01 11:15     0
2016-07-01 14:15     3
2016-07-01 17:15     6
2016-07-01 20:15     9
2016-07-01 23:15    12
2016-07-02 02:15    15
2016-07-02 05:15    18
2016-07-02 08:15    21
2016-07-02 11:15    24
2016-07-02 14:15    27
2016-07-02 17:15    30
2016-07-02 20:15    33
2016-07-02 23:15    36
Freq: 60T, dtype: int32

In [152]:
ts_pd['2016-7-2'::-2]

2016-07-02 23:15    36
2016-07-02 21:15    34
2016-07-02 19:15    32
2016-07-02 17:15    30
2016-07-02 15:15    28
2016-07-02 13:15    26
2016-07-02 11:15    24
2016-07-02 09:15    22
2016-07-02 07:15    20
2016-07-02 05:15    18
2016-07-02 03:15    16
2016-07-02 01:15    14
2016-07-01 23:15    12
2016-07-01 21:15    10
2016-07-01 19:15     8
2016-07-01 17:15     6
2016-07-01 15:15     4
2016-07-01 13:15     2
2016-07-01 11:15     0
Freq: 60T, dtype: int32

### How can we convert between a DateTimeIndex and a PeriodIndex?

In [57]:
aa=ts_dt.to_period(freq=ts_dt.index[0].freq)
# ts_pd.to_timestamp()
aa

2016-07-01 11:15     0
2016-07-01 12:15     1
2016-07-01 13:15     2
2016-07-01 14:15     3
2016-07-01 15:15     4
2016-07-01 16:15     5
2016-07-01 17:15     6
2016-07-01 18:15     7
2016-07-01 19:15     8
2016-07-01 20:15     9
2016-07-01 21:15    10
2016-07-01 22:15    11
2016-07-01 23:15    12
2016-07-02 00:15    13
2016-07-02 01:15    14
2016-07-02 02:15    15
2016-07-02 03:15    16
2016-07-02 04:15    17
2016-07-02 05:15    18
2016-07-02 06:15    19
2016-07-02 07:15    20
2016-07-02 08:15    21
2016-07-02 09:15    22
2016-07-02 10:15    23
2016-07-02 11:15    24
2016-07-02 12:15    25
2016-07-02 13:15    26
2016-07-02 14:15    27
2016-07-02 15:15    28
2016-07-02 16:15    29
2016-07-02 17:15    30
2016-07-02 18:15    31
2016-07-02 19:15    32
2016-07-02 20:15    33
2016-07-02 21:15    34
2016-07-02 22:15    35
2016-07-02 23:15    36
2016-07-03 00:15    37
2016-07-03 01:15    38
2016-07-03 02:15    39
Freq: 60T, dtype: int32

# Try

(1) How can you create a pd.Timestamp with a European style formatted date string? hint: dayfirst flag

(2) How can you generate string representation in a desired format from a pd.Timestamp? 

In [134]:
print('Timestamp default:', pd.Timestamp('07-15-2016'))
print('Feed Timestamp to format converter', pd.to_datetime(pd.Timestamp('07-15-2016'), dayfirst=True))
print('Feed string to format converter', pd.to_datetime('07-12-2016', dayfirst=True))
print('Feed string (feed month>12) to format converter (It swaps the order for you. Not great!)', pd.to_datetime('07-15-2016', dayfirst=True))

print()

# fromat using Python's strftime
t1 = pd.to_datetime('07-12-2016', dayfirst=True)
print(t1.strftime(format = '%Y/%m/%d-%a') + '. to_datetime produces ' + str(type(t1)))
t2 = pd.Timestamp('07-12-2016')
print(t2.strftime(format = '%Y/%m/%d-%a') + '. Timestamp produces ' + str(type(t2)))

Timestamp default: 2016-07-15 00:00:00
Feed Timestamp to format converter 2016-07-15 00:00:00
Feed string to format converter 2016-12-07 00:00:00
Feed string (feed month>12) to format converter (It swaps the order for you. Not great!) 2016-07-15 00:00:00

2016/12/07-Wed. to_datetime produces <class 'pandas._libs.tslib.Timestamp'>
2016/07/12-Tue. Timestamp produces <class 'pandas._libs.tslib.Timestamp'>


In [137]:
# %load snippets/try1.py
# %M = minute, %D = month/date/year
t1 = pd.to_datetime('2016-06-18 12:15pm', dayfirst = True).strftime(format = '%Y/%m/%d')
print(t1)

t2 = pd.Timestamp('2016-06-18 12:15pm').strftime(format = '%Y/%m/%d')
print(t2)

2016/06/18
2016/06/18
