In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

In [4]:
dates = [datetime(2018, 1, 1, 10, 0, 0),
         datetime(2018, 1, 2, 10, 0, 0),
         datetime(2018, 1, 3, 10, 0, 0),
         datetime(2018, 1, 4, 10, 0, 0),
         datetime(2018, 1, 5, 10, 0, 0)]

s = pd.Series(np.random.randn(5), index=dates)

s

2018-01-01 10:00:00    0.017555
2018-01-02 10:00:00    0.296906
2018-01-03 10:00:00    0.470880
2018-01-04 10:00:00    0.714449
2018-01-05 10:00:00    1.184830
dtype: float64

In [13]:
# Access the timestamp by index

ts1 = s.index[0]
ts2 = s.index[1]

In [14]:
ts1

Timestamp('2018-01-01 10:00:00')

In [15]:
ts2

Timestamp('2018-01-02 10:00:00')

In [19]:
## Calculate time delta
ts2-ts1

Timedelta('1 days 00:00:00')

In [36]:
# create a timedelta

d2 = pd.Timedelta("2 days 12 hour")

In [33]:
ts1 + d2

Timestamp('2018-01-03 22:00:00')

In [37]:
## Timedelta is in nano-seconds (1 second = 10^9 nano-seconds)
d2.value

216000000000000

## Indexing

In [39]:
dates = [datetime(2018, 1, 1, 10, 0, 0),
         datetime(2018, 1, 2, 10, 0, 0),
         datetime(2018, 1, 3, 10, 0, 0),
         datetime(2018, 1, 4, 10, 0, 0),
         datetime(2018, 1, 5, 10, 0, 0),
         datetime(2018, 2, 1, 14, 0, 0),
         datetime(2018, 2, 2, 15, 0, 0)]

s = pd.Series(np.random.randn(7), index=dates)
s

2018-01-01 10:00:00    0.472417
2018-01-02 10:00:00    2.419081
2018-01-03 10:00:00    0.649052
2018-01-04 10:00:00   -0.362409
2018-01-05 10:00:00    0.303793
2018-02-01 14:00:00   -0.071814
2018-02-02 15:00:00    0.599288
dtype: float64

In [45]:
# Display a range of specific date

s['2018-1-1' : '2018-1-3']

2018-01-01 10:00:00    0.472417
2018-01-02 10:00:00    2.419081
2018-01-03 10:00:00    0.649052
dtype: float64

In [47]:
# Display the entire month
s['2018-1']

2018-01-01 10:00:00    0.472417
2018-01-02 10:00:00    2.419081
2018-01-03 10:00:00    0.649052
2018-01-04 10:00:00   -0.362409
2018-01-05 10:00:00    0.303793
dtype: float64

In [49]:
# using datetime object
s[datetime(2018,1,1) : datetime(2018,1,3)]

2018-01-01 10:00:00    0.472417
2018-01-02 10:00:00    2.419081
dtype: float64

### Generating a date range

In [55]:
# freq determines the frequence
## 'D'=Day   'H'=Hour   'QS'=Quarter
### See documentations for frequence

pd.date_range("2018-1-1", periods=15, freq='D')

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08',
               '2018-01-09', '2018-01-10', '2018-01-11', '2018-01-12',
               '2018-01-13', '2018-01-14', '2018-01-15'],
              dtype='datetime64[ns]', freq='D')

In [57]:
pd.date_range("2018-1-1", periods=15, freq='H')

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',
               '2018-01-01 02:00:00', '2018-01-01 03:00:00',
               '2018-01-01 04:00:00', '2018-01-01 05:00:00',
               '2018-01-01 06:00:00', '2018-01-01 07:00:00',
               '2018-01-01 08:00:00', '2018-01-01 09:00:00',
               '2018-01-01 10:00:00', '2018-01-01 11:00:00',
               '2018-01-01 12:00:00', '2018-01-01 13:00:00',
               '2018-01-01 14:00:00'],
              dtype='datetime64[ns]', freq='H')

In [61]:
pd.date_range("2018-1-1", periods=5, freq='QS')

DatetimeIndex(['2018-01-01', '2018-04-01', '2018-07-01', '2018-10-01',
               '2019-01-01'],
              dtype='datetime64[ns]', freq='QS-JAN')

### Duplicates in timestamp indices

In [69]:
d = pd.DatetimeIndex(['2018-2-1','2018-2-2','2018-2-2','2018-2-3'])
d

DatetimeIndex(['2018-02-01', '2018-02-02', '2018-02-02', '2018-02-03'], dtype='datetime64[ns]', freq=None)

In [76]:
s = pd.Series(np.arange(4), index=d)
s

2018-02-01    0
2018-02-02    1
2018-02-02    2
2018-02-03    3
dtype: int32

In [77]:
s.index.is_unique

False

## Converting to fixed-frequency series
## Resample

In [32]:
d = pd.DatetimeIndex(['2018-1-1','2018-1-2','2018-1-5','2018-1-8'])
s = pd.Series([1,2,5,8], index=d)
s

2018-01-01    1
2018-01-02    2
2018-01-05    5
2018-01-08    8
dtype: int64

#### Resample to daily data
#### resample('frequency') 

In [33]:
resample = s.resample('D')

In [34]:
resample.interpolate()

2018-01-01    1.0
2018-01-02    2.0
2018-01-03    3.0
2018-01-04    4.0
2018-01-05    5.0
2018-01-06    6.0
2018-01-07    7.0
2018-01-08    8.0
Freq: D, dtype: float64

In [21]:
resample.ffill()

2018-01-01    1
2018-01-02    2
2018-01-03    2
2018-01-04    2
2018-01-05    5
2018-01-06    5
2018-01-07    5
2018-01-08    8
Freq: D, dtype: int64

### Shifting data

In [36]:
s = pd.Series(np.random.rand(8), index=pd.date_range('2018-1-1',periods=8, freq='D'))
s

2018-01-01    0.591526
2018-01-02    0.283947
2018-01-03    0.163072
2018-01-04    0.978407
2018-01-05    0.008276
2018-01-06    0.329193
2018-01-07    0.884075
2018-01-08    0.655528
Freq: D, dtype: float64

In [40]:
s.shift(2)   # Shift data down

2018-01-01         NaN
2018-01-02         NaN
2018-01-03    0.591526
2018-01-04    0.283947
2018-01-05    0.163072
2018-01-06    0.978407
2018-01-07    0.008276
2018-01-08    0.329193
Freq: D, dtype: float64

In [42]:
s.shift(-2)  # Shift data upward

2018-01-01    0.163072
2018-01-02    0.978407
2018-01-03    0.008276
2018-01-04    0.329193
2018-01-05    0.884075
2018-01-06    0.655528
2018-01-07         NaN
2018-01-08         NaN
Freq: D, dtype: float64

### Time zone
* UTC: coordinated universal time
* Time zone is expressed as an offset to the UTC
    * e.g. California is 7 hours (PDT) or 8 hours (daylight saving) UTC

### Time zone names (import pytz)

In [44]:
import pytz

In [55]:
pytz.common_timezones[-5:]

['US/Eastern', 'US/Hawaii', 'US/Mountain', 'US/Pacific', 'UTC']

In [59]:
# Getting the time zone object
tz = pytz.timezone('US/Pacific')
tz

<DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>

##### By default, the time zone is NONE in Pandas

In [71]:
s = pd.Series(np.random.randn(6),index=pd.date_range('2018-1-1 10:00', periods=6, freq='D'))
s

2018-01-01 10:00:00   -0.670580
2018-01-02 10:00:00    0.572532
2018-01-03 10:00:00   -0.726298
2018-01-04 10:00:00    2.425179
2018-01-05 10:00:00    0.865788
2018-01-06 10:00:00   -0.069655
Freq: D, dtype: float64

In [74]:
print(s.index.tz)

None


In [83]:
# Using time zone

s_utc = pd.Series(np.random.randn(6),index=pd.date_range('2018-1-1 10:00', periods=6, freq='D', tz='UTC'))
s_utc

2018-01-01 10:00:00+00:00   -1.138929
2018-01-02 10:00:00+00:00   -1.901670
2018-01-03 10:00:00+00:00   -1.563052
2018-01-04 10:00:00+00:00   -0.020952
2018-01-05 10:00:00+00:00    0.589968
2018-01-06 10:00:00+00:00    0.026788
Freq: D, dtype: float64

In [85]:
s_utc.index.tz

<UTC>

## Converting to another time zone
* using tz_convert()
* daylight saving is automatically applied
You can only convert to another timezone if it has been localized.

In [87]:
s_utc.tz_convert('US/Pacific')

2018-01-01 02:00:00-08:00   -1.138929
2018-01-02 02:00:00-08:00   -1.901670
2018-01-03 02:00:00-08:00   -1.563052
2018-01-04 02:00:00-08:00   -0.020952
2018-01-05 02:00:00-08:00    0.589968
2018-01-06 02:00:00-08:00    0.026788
Freq: D, dtype: float64

## Localize time zone
Assign a timezone after a series has been created
* tz_localize()

In [90]:
# Timestamps without a time zone
s = pd.Series(np.random.randn(6), index=pd.date_range('2018-1-1 10:00', periods=6, freq="D"))
s

2018-01-01 10:00:00   -0.428118
2018-01-02 10:00:00   -1.660617
2018-01-03 10:00:00   -1.917001
2018-01-04 10:00:00   -0.703800
2018-01-05 10:00:00   -1.809462
2018-01-06 10:00:00    1.926103
Freq: D, dtype: float64

In [99]:
# assign a timezone to the series
s2 = s.tz_localize('US/Pacific')
s2

2018-01-01 10:00:00-08:00   -0.428118
2018-01-02 10:00:00-08:00   -1.660617
2018-01-03 10:00:00-08:00   -1.917001
2018-01-04 10:00:00-08:00   -0.703800
2018-01-05 10:00:00-08:00   -1.809462
2018-01-06 10:00:00-08:00    1.926103
Freq: D, dtype: float64

## Time-zone aware operation

pandas can not compute timestamps if they have different timezone

In [103]:
t1 = pd.Timestamp('2018-01-01 12:00:00', tz='US/Pacific')
t2 = pd.Timestamp('2018-01-01 12:00:00', tz='UTC')

In [105]:
t1.tz_convert('UTC')-t2

Timedelta('0 days 08:00:00')