In [1]:
import pandas as pd
import numpy as np

# Generate series of timestamps

In [2]:
# specify with start date & number of periods
rng = pd.date_range(start = '2016 Jul 15 10:15', periods = 10, freq = 'M')
rng

DatetimeIndex(['2016-07-31 10:15:00', '2016-08-31 10:15:00',
               '2016-09-30 10:15:00', '2016-10-31 10:15:00',
               '2016-11-30 10:15:00', '2016-12-31 10:15:00',
               '2017-01-31 10:15:00', '2017-02-28 10:15:00',
               '2017-03-31 10:15:00', '2017-04-30 10:15:00'],
              dtype='datetime64[ns]', freq='M')

### let's explore some of the other options for date_range
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.date_range.html

In particular, let's take a look at 'normalize' and 'end'

In [3]:
rng = pd.date_range(start = '2017 June 1', periods = 10, freq = 'W')
rng

DatetimeIndex(['2017-06-04', '2017-06-11', '2017-06-18', '2017-06-25',
               '2017-07-02', '2017-07-09', '2017-07-16', '2017-07-23',
               '2017-07-30', '2017-08-06'],
              dtype='datetime64[ns]', freq='W-SUN')

In [4]:
rng = pd.date_range(start = '2017 June 1', end = '2017 Aug 21', freq = 'W')
len(rng)

12

### Which of these formats DON'T work?
'2016 Jul 1', '7/1/2016', '1/7/2016', 'July 1, 2016', '2016-07-01'


#### You'll want to try this with pd.to_datetime

In [5]:
# You may encounter the need to deal with European style formatted dates. 
# In that case, you'll want to use pd.to_datetime

In [6]:
# See the behavior with the following examples. Do you see the pattern?

In [7]:
pd.to_datetime('01/07/2016', dayfirst = True) == pd.to_datetime('01/07/2016', dayfirst = False)

False

In [8]:
pd.to_datetime('01-07-2016', dayfirst = True) == pd.to_datetime('01-07-2016', dayfirst = False)

False

In [9]:
pd.to_datetime('2016-01-07', dayfirst = True) == pd.to_datetime('2016-01-07', dayfirst = False)

True

# Timestamps

In [10]:
# add increasing detail
pd.Timestamp('2017-06-1')

Timestamp('2017-06-01 00:00:00')

In [11]:
pd.Timestamp('2017-06-1 17:38:15.123456789')

Timestamp('2017-06-01 17:38:15.123456789')

### How much detail can you add?

### What are some properties of time stamps? 
Hint: http://pandas.pydata.org/pandas-docs/stable/timeseries.html#time-date-components

In [12]:
t = pd.Timestamp('2016-07-10 10:15:15')

In [13]:
t.quarter

3

In [14]:
t = pd.Timestamp('2016-07-10 8 pm')
t

Timestamp('2016-07-10 20:00:00')

In [15]:
t.dayofweek

6

In [16]:
t.weekday_name

'Sunday'

### How are time stamps lacking?
Some events have a duration, rather than a single moment in time.

# Time offsets

In [17]:
pd.Timedelta('3 day 1us')

Timedelta('3 days 00:00:00.000001')

In [18]:
pd.Timestamp('2016-07-01 8:00') + pd.Timedelta('1.5 hours')

Timestamp('2016-07-01 09:30:00')

In [19]:
pd.Timedelta('15ns')*1000 #bug? what kind of bug?

Timedelta('0 days 00:00:00.000015')

In [20]:
rng + pd.Timedelta('1day')

DatetimeIndex(['2017-06-05', '2017-06-12', '2017-06-19', '2017-06-26',
               '2017-07-03', '2017-07-10', '2017-07-17', '2017-07-24',
               '2017-07-31', '2017-08-07', '2017-08-14', '2017-08-21'],
              dtype='datetime64[ns]', freq='W-MON')

# Time spans

In [21]:
p = pd.Period('7/2017')
t = pd.Timestamp('7/21/2017')
p.start_time < t and p.end_time > t

True

In [22]:
rng = pd.period_range('2017-06-01 12:15', freq = 'H', periods = 10)
rng

PeriodIndex(['2017-06-01 12:00', '2017-06-01 13:00', '2017-06-01 14:00',
             '2017-06-01 15:00', '2017-06-01 16:00', '2017-06-01 17:00',
             '2017-06-01 18:00', '2017-06-01 19:00', '2017-06-01 20:00',
             '2017-06-01 21:00'],
            dtype='period[H]', freq='H')

In [23]:
rng = pd.period_range('2016-01-01 12:15', freq = '60T', periods = 10)
rng

PeriodIndex(['2016-01-01 12:15', '2016-01-01 13:15', '2016-01-01 14:15',
             '2016-01-01 15:15', '2016-01-01 16:15', '2016-01-01 17:15',
             '2016-01-01 18:15', '2016-01-01 19:15', '2016-01-01 20:15',
             '2016-01-01 21:15'],
            dtype='period[60T]', freq='60T')

In [24]:
rng = pd.period_range('2016-01-01 12:15', freq = '1H', periods = 10)
rng

PeriodIndex(['2016-01-01 12:00', '2016-01-01 13:00', '2016-01-01 14:00',
             '2016-01-01 15:00', '2016-01-01 16:00', '2016-01-01 17:00',
             '2016-01-01 18:00', '2016-01-01 19:00', '2016-01-01 20:00',
             '2016-01-01 21:00'],
            dtype='period[H]', freq='H')

### How can you determine whether a timestamp falls within a given period?

In [25]:
# %load snippets/startend.py
p = pd.Period('2017-06')
p

Period('2017-06', 'M')

In [26]:
p.start_time

Timestamp('2017-06-01 00:00:00')

In [27]:
p.end_time

Timestamp('2017-06-30 23:59:59.999999999')

In [28]:
t = pd.Timestamp('Jun 2, 2017')
p.start_time < t < p.end_time

True

### Try out some other functionality with different offset-aliases. Anything interesting?

In [29]:
p = pd.period_range('2017 june', periods=12, freq='w')
p

PeriodIndex(['2017-05-29/2017-06-04', '2017-06-05/2017-06-11',
             '2017-06-12/2017-06-18', '2017-06-19/2017-06-25',
             '2017-06-26/2017-07-02', '2017-07-03/2017-07-09',
             '2017-07-10/2017-07-16', '2017-07-17/2017-07-23',
             '2017-07-24/2017-07-30', '2017-07-31/2017-08-06',
             '2017-08-07/2017-08-13', '2017-08-14/2017-08-20'],
            dtype='period[W-SUN]', freq='W-SUN')

### How can you make a pandas Time Series with these aliases?

In [30]:
num_periods = 40
ts_pd = pd.Series(range(num_periods), pd.period_range('2016-07-01 11:15', freq = '60T', periods = num_periods))
type(ts_pd.index)

pandas.tseries.period.PeriodIndex

In [31]:
ts_pd['2016-7-1 11':'2016-7-1 13']

2016-07-01 11:15    0
2016-07-01 12:15    1
2016-07-01 13:15    2
Freq: 60T, dtype: int32

### How can we index a time series with a date_range?

In [32]:
num_periods = 40
ts_dt = pd.Series(range(num_periods), pd.date_range('2016-07-01 11:15', freq = '60T', periods = num_periods))
type(ts_dt.index)

pandas.tseries.index.DatetimeIndex

### What are the use cases for a series with a DateTimeIndex vs a PeriodIndex? Where will we see a difference?

In [33]:
ts_dt['2016-7-1 11']

2016-07-01 11:15:00    0
Freq: 60T, dtype: int32

In [34]:
ts_pd['2016-7-1 11']

2016-07-01 11:15    0
2016-07-01 12:15    1
Freq: 60T, dtype: int32

### How can we convert between a DateTimeIndex and a PeriodIndex?

In [35]:
ts_dt.to_period(freq='60T').head()

2016-07-01 11:15    0
2016-07-01 12:15    1
2016-07-01 13:15    2
2016-07-01 14:15    3
2016-07-01 15:15    4
Freq: 60T, dtype: int32

In [36]:
ts_pd.to_timestamp(how='E').head()

2016-07-01 12:14:59    0
2016-07-01 13:14:59    1
2016-07-01 14:14:59    2
2016-07-01 15:14:59    3
2016-07-01 16:14:59    4
Freq: H, dtype: int32

### Grab everything on July 1 and July 2

In [37]:
ts_pd['2016-7-1':'2016-7-2']

2016-07-01 11:15     0
2016-07-01 12:15     1
2016-07-01 13:15     2
2016-07-01 14:15     3
2016-07-01 15:15     4
2016-07-01 16:15     5
2016-07-01 17:15     6
2016-07-01 18:15     7
2016-07-01 19:15     8
2016-07-01 20:15     9
2016-07-01 21:15    10
2016-07-01 22:15    11
2016-07-01 23:15    12
2016-07-02 00:15    13
2016-07-02 01:15    14
2016-07-02 02:15    15
2016-07-02 03:15    16
2016-07-02 04:15    17
2016-07-02 05:15    18
2016-07-02 06:15    19
2016-07-02 07:15    20
2016-07-02 08:15    21
2016-07-02 09:15    22
2016-07-02 10:15    23
2016-07-02 11:15    24
2016-07-02 12:15    25
2016-07-02 13:15    26
2016-07-02 14:15    27
2016-07-02 15:15    28
2016-07-02 16:15    29
2016-07-02 17:15    30
2016-07-02 18:15    31
2016-07-02 19:15    32
2016-07-02 20:15    33
2016-07-02 21:15    34
2016-07-02 22:15    35
2016-07-02 23:15    36
Freq: 60T, dtype: int32

# Try

(1) How can you create a pd.Timestamp with a European style formatted date string? hint: dayfirst flag

(2) How can you generate string representation in a desired format from a pd.Timestamp? 

In [38]:
pd.to_datetime(['1-6-2017'], dayfirst=True)

DatetimeIndex(['2017-06-01'], dtype='datetime64[ns]', freq=None)

In [39]:
pd.to_datetime('1-6-2017', dayfirst=True).strftime(format = '%Y-%m-%d')

'2017-06-01'

### Neat Indexing Trick

In [40]:
numbers = list(range(20))
numbers[::2]

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

In [41]:
numbers[::4]

[0, 4, 8, 12, 16]

In [42]:
numbers[15:1:-1]

[15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2]

In [43]:
ts_pd['2016-7-1'::5]

2016-07-01 11:15     0
2016-07-01 16:15     5
2016-07-01 21:15    10
2016-07-02 02:15    15
2016-07-02 07:15    20
2016-07-02 12:15    25
2016-07-02 17:15    30
2016-07-02 22:15    35
Freq: 60T, dtype: int32

In [44]:
ts_pd['2016-7-3'::-5]

2016-07-03 02:15    39
2016-07-02 21:15    34
2016-07-02 16:15    29
2016-07-02 11:15    24
2016-07-02 06:15    19
2016-07-02 01:15    14
2016-07-01 20:15     9
2016-07-01 15:15     4
Freq: 60T, dtype: int32