In [1]:
import pandas as pd
import numpy as np

In [2]:
# TIMES
rng = pd.date_range('2016 Jul 1', periods = 10, freq = 'D')
rng

DatetimeIndex(['2016-07-01', '2016-07-02', '2016-07-03', '2016-07-04',
               '2016-07-05', '2016-07-06', '2016-07-07', '2016-07-08',
               '2016-07-09', '2016-07-10'],
              dtype='datetime64[ns]', freq='D')

In [3]:
# Which of these formats DON'T work?
#'2016 Jul 1', '7/1/2016', '1/7/2016', 'July 1, 2016', '2016-07-01', '2016/07/01'

In [4]:
rng = pd.date_range('01, Jul, 2016', periods = 10, freq = 'D')
rng

DatetimeIndex(['2016-07-01', '2016-07-02', '2016-07-03', '2016-07-04',
               '2016-07-05', '2016-07-06', '2016-07-07', '2016-07-08',
               '2016-07-09', '2016-07-10'],
              dtype='datetime64[ns]', freq='D')

In [5]:
# Is '7/1/2016 in January or July?
print(pd.date_range('1/7/2016', periods=1)[0].month)

1


In [6]:
# What is the class of an individual object held in the date_range?
type(rng)

pandas.core.indexes.datetimes.DatetimeIndex

In [7]:
# TIME STAMPS VS TIME SPANS
pd.Timestamp('2016-07-10')

Timestamp('2016-07-10 00:00:00')

In [8]:
# You can also more details 
pd.Timestamp('2016-07-10 10')

Timestamp('2016-07-10 10:00:00')

In [9]:
# Or even more...
pd.Timestamp('2016-07-10 10:15')

Timestamp('2016-07-10 10:15:00')

In [10]:
# How much detail can you add?
pd.Timestamp('2016-07-10 10:15:59.999999999999')

Timestamp('2016-07-10 10:15:59.999999999')

As we can see we can go up to nanoseconds, adding more details is simply ignored.

In [11]:
# What are some properties of timestamps? Try them out.
# hint: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#time-date-components
t = pd.Timestamp('2016-07-10 10:15')

In [12]:
print(dir(t))

['__add__', '__array_priority__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__pyx_vtable__', '__radd__', '__reduce__', '__reduce_cython__', '__reduce_ex__', '__repr__', '__rsub__', '__setattr__', '__setstate__', '__setstate_cython__', '__sizeof__', '__str__', '__sub__', '__subclasshook__', '__weakref__', '_date_repr', '_freq', '_freqstr', '_repr_base', '_round', '_set_freq', '_short_repr', '_time_repr', 'asm8', 'astimezone', 'ceil', 'combine', 'ctime', 'date', 'day', 'day_name', 'day_of_week', 'day_of_year', 'dayofweek', 'dayofyear', 'days_in_month', 'daysinmonth', 'dst', 'floor', 'fold', 'freq', 'freqstr', 'fromisocalendar', 'fromisoformat', 'fromordinal', 'fromtimestamp', 'hour', 'is_leap_year', 'is_month_end', 'is_month_start', 'is_quarter_end', 'is_quarter_start', 'is_year_end', 'is_year_start', 'iso

In [13]:
#Lets try some of these attributes/methods (we cannot know from the print above what is an attribute or a method)
t.max

Timestamp('2262-04-11 23:47:16.854775807')

In [14]:
t.is_leap_year

True

In [15]:
t.is_year_end

False

In [16]:
t.dayofweek, t.day_name()

(6, 'Sunday')

In [17]:
t.second

0

In [18]:
t._date_repr, t._time_repr

('2016-07-10', '10:15:00')

In [19]:
t.to_datetime64()

numpy.datetime64('2016-07-10T10:15:00.000000000')

In [20]:
t._repr_base

'2016-07-10 10:15:00'

In [21]:
t.hour

10

In [22]:
t.ctime()

'Sun Jul 10 10:15:00 2016'

In [23]:
# TIME SPANS
pd.Period('2016-01')

Period('2016-01', 'M')

In [24]:
# What's that extra info above? How does it get set?

In [25]:
pd.Period('2016-01-01')

Period('2016-01-01', 'D')

In [26]:
pd.Period('2016-01-01 10')

Period('2016-01-01 10:00', 'H')

In [27]:
pd.Period('2016-01-01 10:10')

Period('2016-01-01 10:10', 'T')

In [28]:
pd.Period('2016-01-01 10:10:10')

Period('2016-01-01 10:10:10', 'S')

It looks like it is inferred from the smallest time element.

In [29]:
# What's the most detailed Period you can get?
print(pd.Period('2016-07-10 10:15:59.111').freqstr)
pd.Period('2016-07-10 10:15:59.111').freq

L


<Milli>

In [30]:
print(pd.Period('2016-07-10 10:15:59.111222').freqstr)
pd.Period('2016-07-10 10:15:59.111222').freq

U


<Micro>

In [31]:
print(pd.Period('2016-07-10 10:15:59.11122233').freqstr)
pd.Period('2016-07-10 10:15:59.11122233').freq

N


<Nano>

In [32]:
# How can you make multiple time periods? 
# Hint look for analogy with pd.date_range() above
pd.period_range('2016-07-10', periods=7)

PeriodIndex(['2016-07-10', '2016-07-11', '2016-07-12', '2016-07-13',
             '2016-07-14', '2016-07-15', '2016-07-16'],
            dtype='period[D]')

In [33]:
pd.period_range(end='2016-07-10', periods=7)

PeriodIndex(['2016-07-04', '2016-07-05', '2016-07-06', '2016-07-07',
             '2016-07-08', '2016-07-09', '2016-07-10'],
            dtype='period[D]')

In [34]:
pd.period_range(start='2016-07', end='2016-07-10')

PeriodIndex(['2016-07-01', '2016-07-02', '2016-07-03', '2016-07-04',
             '2016-07-05', '2016-07-06', '2016-07-07', '2016-07-08',
             '2016-07-09', '2016-07-10'],
            dtype='period[D]')

In [35]:
# TIME OFFSETS
pd.Timedelta('1 day')

Timedelta('1 days 00:00:00')

In [36]:
pd.Period('2016-01-01 10:10') + pd.Timedelta('1 day')

Period('2016-01-02 10:10', 'T')

In [37]:
pd.Timestamp('2016-01-01 10:10') + pd.Timedelta('1 day')

Timestamp('2016-01-02 10:10:00')

In [38]:
pd.Timestamp('2016-01-01 10:10') + pd.Timedelta('15 ns')

Timestamp('2016-01-01 10:10:00.000000015')

In [39]:
# FANCY FREQUENCY SETTING
# Only want business days
pd.period_range('2016-01-01 10:10', freq = 'B', periods = 10)

PeriodIndex(['2016-01-01', '2016-01-04', '2016-01-05', '2016-01-06',
             '2016-01-07', '2016-01-08', '2016-01-11', '2016-01-12',
             '2016-01-13', '2016-01-14'],
            dtype='period[B]')

In [40]:
# It's possible to combine frequencies. What if you want to advance by 25 hours each day. What are the 2 ways to do it?
p1 = pd.period_range('2016-01-01 10:10', freq = '25H', periods = 10)

In [41]:
p2 = pd.period_range('2016-01-01 10:10', freq = '1D1H', periods = 10)

In [42]:
p1

PeriodIndex(['2016-01-01 10:00', '2016-01-02 11:00', '2016-01-03 12:00',
             '2016-01-04 13:00', '2016-01-05 14:00', '2016-01-06 15:00',
             '2016-01-07 16:00', '2016-01-08 17:00', '2016-01-09 18:00',
             '2016-01-10 19:00'],
            dtype='period[25H]')

In [43]:
p2

PeriodIndex(['2016-01-01 10:00', '2016-01-02 11:00', '2016-01-03 12:00',
             '2016-01-04 13:00', '2016-01-05 14:00', '2016-01-06 15:00',
             '2016-01-07 16:00', '2016-01-08 17:00', '2016-01-09 18:00',
             '2016-01-10 19:00'],
            dtype='period[25H]')

In [44]:
# What are some other combos you can produce?
# hint: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases

In [45]:
p3 = pd.period_range('2016-01-01', freq = '1W', periods = 10)
p3

PeriodIndex(['2015-12-28/2016-01-03', '2016-01-04/2016-01-10',
             '2016-01-11/2016-01-17', '2016-01-18/2016-01-24',
             '2016-01-25/2016-01-31', '2016-02-01/2016-02-07',
             '2016-02-08/2016-02-14', '2016-02-15/2016-02-21',
             '2016-02-22/2016-02-28', '2016-02-29/2016-03-06'],
            dtype='period[W-SUN]')

In [46]:
p4 = pd.period_range('2016-01-01 08:00', freq = 'min', periods = 10)
p4

PeriodIndex(['2016-01-01 08:00', '2016-01-01 08:01', '2016-01-01 08:02',
             '2016-01-01 08:03', '2016-01-01 08:04', '2016-01-01 08:05',
             '2016-01-01 08:06', '2016-01-01 08:07', '2016-01-01 08:08',
             '2016-01-01 08:09'],
            dtype='period[T]')

In [47]:
p4 = pd.period_range('2016-01-01 08:00', freq = '1H30T', periods = 10)
p4

PeriodIndex(['2016-01-01 08:00', '2016-01-01 09:30', '2016-01-01 11:00',
             '2016-01-01 12:30', '2016-01-01 14:00', '2016-01-01 15:30',
             '2016-01-01 17:00', '2016-01-01 18:30', '2016-01-01 20:00',
             '2016-01-01 21:30'],
            dtype='period[90T]')

In [48]:
# INDEXING WITH TIME OBJECTS
# You can use these objects for indices
# Let's start with using a date range as above
rng = pd.date_range('2016 Jul 1', periods = 10, freq = 'D')
rng
pd.Series(range(len(rng)), index = rng)

2016-07-01    0
2016-07-02    1
2016-07-03    2
2016-07-04    3
2016-07-05    4
2016-07-06    5
2016-07-07    6
2016-07-08    7
2016-07-09    8
2016-07-10    9
Freq: D, dtype: int64

In [49]:
# You can also use time period indices, in cases where it makes more sense 
# to think about your index as a time span rather than a single point in time

periods = [pd.Period('2016-01'), pd.Period('2016-02'), pd.Period('2016-03')]
ts = pd.Series(np.random.randn(len(periods)), index = periods)
ts

2016-01    1.606950
2016-02    1.332032
2016-03   -0.819913
Freq: M, dtype: float64

In [50]:
# What type is the index for ts?
type(ts.index)

pandas.core.indexes.period.PeriodIndex

In [51]:
# Experiment with various indices
# Hint: does ts['2016'] work?
print(ts['2016'])
ts[ts.index <= '2016-02']

2016-01    1.606950
2016-02    1.332032
2016-03   -0.819913
Freq: M, dtype: float64


2016-01    1.606950
2016-02    1.332032
Freq: M, dtype: float64

In [52]:
# Timestamped data can be convereted to period indices with to_period and vice versa with to_timestamp
ts = pd.Series(range(10), pd.date_range('07-10-16 8:00', periods = 10, freq = 'H'))
ts

2016-07-10 08:00:00    0
2016-07-10 09:00:00    1
2016-07-10 10:00:00    2
2016-07-10 11:00:00    3
2016-07-10 12:00:00    4
2016-07-10 13:00:00    5
2016-07-10 14:00:00    6
2016-07-10 15:00:00    7
2016-07-10 16:00:00    8
2016-07-10 17:00:00    9
Freq: H, dtype: int64

In [53]:
ts_period = ts.to_period()
ts_period

2016-07-10 08:00    0
2016-07-10 09:00    1
2016-07-10 10:00    2
2016-07-10 11:00    3
2016-07-10 12:00    4
2016-07-10 13:00    5
2016-07-10 14:00    6
2016-07-10 15:00    7
2016-07-10 16:00    8
2016-07-10 17:00    9
Freq: H, dtype: int64

In [54]:
ts_period['2016-07-10 08:30':'2016-07-10 11:45'] # we have the concept of overlap with time periods

2016-07-10 08:00    0
2016-07-10 09:00    1
2016-07-10 10:00    2
2016-07-10 11:00    3
Freq: H, dtype: int64

In [55]:
ts['2016-07-10 08:30':'2016-07-10 11:45'] # we have the concept of include with timestamps

2016-07-10 09:00:00    1
2016-07-10 10:00:00    2
2016-07-10 11:00:00    3
Freq: H, dtype: int64