In [1]:
%matplotlib inline
import numpy as np
import numpy.ma as ma
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
import seaborn as sns
import timeit
import line_profiler

import datetime

from io import StringIO
from pandas.api.types import CategoricalDtype

import pandas._testing as tm

# 2.19 Time series / date functionality

In [10]:
dti = pd.to_datetime(
    ['1.1.2018', np.datetime64('2018-01-01'), datetime.datetime(2018,1,1)]
)
dti[1]

Timestamp('2018-01-01 00:00:00')

In [11]:
dti = pd.date_range('2018-01-01', periods=3, freq='4H')
dti

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 04:00:00',
               '2018-01-01 08:00:00'],
              dtype='datetime64[ns]', freq='4H')

In [12]:
dti = dti.tz_localize('Europe/Berlin')


In [13]:
dti.tz_convert('US/Central')

DatetimeIndex(['2017-12-31 17:00:00-06:00', '2017-12-31 21:00:00-06:00',
               '2018-01-01 01:00:00-06:00'],
              dtype='datetime64[ns, US/Central]', freq=None)

In [14]:
idx = pd.date_range("2018-01-01", periods=5, freq="H")
ts = pd.Series(range(len(idx)), index=idx)
print(ts)
ts.resample("2H").count()
print(ts.resample("2H").mean())
ts.resample("2H").median()

2018-01-01 00:00:00    0
2018-01-01 01:00:00    1
2018-01-01 02:00:00    2
2018-01-01 03:00:00    3
2018-01-01 04:00:00    4
Freq: H, dtype: int64
2018-01-01 00:00:00    0.5
2018-01-01 02:00:00    2.5
2018-01-01 04:00:00    4.0
Freq: 2H, dtype: float64


2018-01-01 00:00:00    0.5
2018-01-01 02:00:00    2.5
2018-01-01 04:00:00    4.0
Freq: 2H, dtype: float64

In [15]:
friday = pd.Timestamp("2018-01-05")
friday.day_name()
saturday = friday + pd.Timedelta("1 day")
saturday.day_name() # 'Saturday'

# Add 1 business day (Friday --> Monday)
monday = friday + pd.offsets.BDay()
monday.day_name()

'Monday'

In [16]:
pd.Series(range(3), index=pd.date_range('2020', freq='D', periods=3))


2020-01-01    0
2020-01-02    1
2020-01-03    2
Freq: D, dtype: int64

In [17]:
df = pd.DataFrame({"type A":[15], "type B": [20], "date": ["2012-03-01"], "station": ["s1"]})

In [18]:
df

Unnamed: 0,type A,type B,date,station
0,15,20,2012-03-01,s1


In [19]:
friday = pd.Timestamp("2018-01-05")
friday.day_name()
friday + pd.Timedelta('1 Day')
(friday + pd.offsets.Day()).day_name()

'Saturday'

In [20]:
s = pd.Series(pd.date_range('2020', periods=3))
df = s.to_frame()

In [21]:
df = pd.Series(pd.date_range('1/13/2021', periods=6)).to_frame()
df.columns=["dates"]
df['day'] = df.dates.dt.day_name()
df['weekday'] = df.dates.dt.weekday
df

Unnamed: 0,dates,day,weekday
0,2021-01-13,Wednesday,2
1,2021-01-14,Thursday,3
2,2021-01-15,Friday,4
3,2021-01-16,Saturday,5
4,2021-01-17,Sunday,6
5,2021-01-18,Monday,0


In [22]:
pd.Timestamp(pd.NaT)
pd.Timedelta(pd.NaT)
pd.Period(pd.NaT)
pd.DateOffset()

<DateOffset>

In [23]:
pd.Timedelta('14 days')//pd.Timedelta(1, 'W')+1

3

In [24]:
pd.Timestamp("2012-05-01")
pd.Timestamp(datetime.datetime(2012, 5, 1))
pd.Timestamp(2012, 5, 1)

Timestamp('2012-05-01 00:00:00')

In [25]:
pd.Period("2011-01")
pd.Period("2011.01", 'D')
pd.Period("2011.01", freq='D')

Period('2011-01-01', 'D')

In [26]:
dates = pd.date_range('2020/01/01', periods=3)
ts = pd.Series(np.arange(3), dates)
type(ts.index) # pandas.core.indexes.datetimes.DatetimeIndex

pandas.core.indexes.datetimes.DatetimeIndex

In [27]:
periods = pd.date_range('2021-01', periods=3, freq='M')
periods = periods.to_period()
type(periods) # pandas.core.indexes.period.PeriodIndex
ts = pd.Series(np.arange(3), periods)
ts.index # PeriodIndex(['2021-01', '2021-02', '2021-03'], dtype='period[M]', freq='M')
type(ts.index) #pandas.core.indexes.period.PeriodIndex

pandas.core.indexes.period.PeriodIndex

In [28]:
pd.date_range('2018-01', periods=3, freq='M')
pd.period_range('2018-01', periods=3, freq='M')

PeriodIndex(['2018-01', '2018-02', '2018-03'], dtype='period[M]', freq='M')

In [29]:
pd.Period("2011-01", 'D')
pd.Timestamp("2011-01", 'D')

Timestamp('2011-01-01 00:00:00', freq='D')

In [31]:
pd.date_range('2021-01', periods=3, freq='M').to_period()

PeriodIndex(['2021-01', '2021-02', '2021-03'], dtype='period[M]', freq='M')

In [40]:
print(pd.Timestamp('2020.01.01')) # 2020-01-01 00:00:00
pd.to_datetime(['2020.01.01', '2020.01.02'])
pd.date_range('2020.01.01', periods=3, freq='D')

pd.Period('2020.01.01')

2020-01-01 00:00:00


Period('2020-01-01', 'D')

In [50]:
pd.to_datetime(pd.Series(["Jul 31, 2020","2010-01-10", None])) 
pd.to_datetime(["Jul 31, 2020","2010-01-10", None]) # DatetimeIndex(['2020-07-31', '2010-01-10', 'NaT'], dtype='datetime64[ns]', freq=None)

DatetimeIndex(['2020-07-31', '2010-01-10', 'NaT'], dtype='datetime64[ns]', freq=None)

In [55]:
pd.to_datetime('04-01-2021',dayfirst=True)
pd.to_datetime('04-18-2021',dayfirst=True) # Timestamp('2021-04-18 00:00:00')

Timestamp('2021-04-18 00:00:00')

In [61]:
pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"], freq="infer") # DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], dtype='datetime64[ns]', freq='2D')
pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-06"], freq="infer") # DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-06'], dtype='datetime64[ns]', freq=None)

DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-06'], dtype='datetime64[ns]', freq=None)

In [63]:
pd.to_datetime("2020/11/12", format="%Y/%m/%d")
pd.to_datetime("12-11-2010 00:00", format="%d-%m-%Y %H:%M")

Timestamp('2010-11-12 00:00:00')

In [2]:
pd.Timestamp.max

Timestamp('2262-04-11 23:47:16.854775807')

In [11]:
pd.Timestamp('2021.04.30') + pd.offsets.BQuarterEnd()

Timestamp('2021-06-30 00:00:00')

In [16]:
periods = pd.date_range('2021-01', periods=3, freq='M')
periods.dtype

dtype('<M8[ns]')

In [19]:
pd.Timestamp.max

Timestamp('2262-04-11 23:47:16.854775807')

In [27]:
df = DataFrame({
    'year': [2021, 2021],
    'month': [4,5],
    'day':[30, 1],
    'hours':[23, 1]
})
pd.to_datetime(df[['year', 'month', 'day']]) # ValueError: to assemble mappings requires at least that [year, month, day] be specified: [day] is missing

0   2021-04-30
1   2021-05-01
dtype: datetime64[ns]

In [37]:
pd.to_datetime(['2020/01/01', '2021.01.02', 'asd']) # ParserError: Unknown string format: asd
pd.to_datetime(['2020/01/01', '2021.01.02', 'asd'], errors='raise') # ParserError: Unknown string format: asd
pd.to_datetime(['2020/01/01', '2021.01.02', 'asd'], errors='ignore')
pd.to_datetime(['2020/01/01', '2021.01.02', 'asd'], errors='coerce')

DatetimeIndex(['2020-01-01', '2021-01-02', 'NaT'], dtype='datetime64[ns]', freq=None)

In [46]:
pd.to_datetime([1349720105, 1349806505, 1349892905, 
                1349979305, 1350065705], unit="s")
pd.to_datetime([1349720105100, 1349720105200, 1349720105300, 
                1349720105400,1349720105500], unit='ms')
pd.to_datetime([1618866007.161886600, 1618866007161886601])

DatetimeIndex(['1970-01-01 00:00:01.618866007', '2021-04-19 21:00:07.161886601'], dtype='datetime64[ns]', freq=None)

In [45]:
pd.to_datetime(1490195805433502912, unit="ns")

Timestamp('2017-03-22 15:16:45.433502912')

In [53]:
stamps = pd.date_range("2012-10-08 18:15:05", periods=4, freq="D")
(stamps - pd.Timestamp("1970-01-01")) // pd.Timedelta("1s")

Int64Index([1349720105, 1349806505, 1349892905, 1349979305], dtype='int64')

In [58]:
pd.to_datetime([0, 1, 2], unit="D", origin=pd.Timestamp("2000-01-01"))

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03'], dtype='datetime64[ns]', freq=None)

# 2.19.4 Generating ranges of timestamps

In [4]:
dates = [
    datetime.datetime(2020,1,3),
    datetime.datetime(2020,1,4),
    datetime.datetime(2020,1,5),
]
dates
# Note the frequency information
index = pd.DatetimeIndex(dates)
index # DatetimeIndex(['2020-01-03', '2020-01-04', '2020-01-05'], dtype='datetime64[ns]', freq=None)

# Automatically converted to DatetimeIndex
index = pd.Index(dates)
index # DatetimeIndex(['2020-01-03', '2020-01-04', '2020-01-05'], dtype='datetime64[ns]', freq=None)


DatetimeIndex(['2020-01-03', '2020-01-04', '2020-01-05'], dtype='datetime64[ns]', freq=None)

In [8]:
pd.period_range(start='2020.01.01', end='2020.01.03')

PeriodIndex(['2020-01-01', '2020-01-02', '2020-01-03'], dtype='period[D]', freq='D')

In [10]:
pd.Timestamp.max # Timestamp('2262-04-11 23:47:16.854775807')
pd.Timestamp.min # Timestamp('1677-09-21 00:12:43.145225')

Timestamp('1677-09-21 00:12:43.145225')

In [66]:
start = datetime.datetime(2011, 1, 1)
end = datetime.datetime(2012, 1, 1)

df= pd.date_range(start, end, freq='M')
pd.period_range(start, end, freq='M')
pd.bdate_range(start, end, freq='M')

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',
               '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-31'],
              dtype='datetime64[ns]', freq='M')

In [13]:
pd.date_range(start, periods=5, freq='QS')
pd.bdate_range(start, periods=5, freq='BQS')
pd.period_range(start, periods=4, freq='W-MON')

PeriodIndex(['2010-12-28/2011-01-03', '2011-01-04/2011-01-10',
             '2011-01-11/2011-01-17', '2011-01-18/2011-01-24'],
            dtype='period[W-MON]', freq='W-MON')

In [17]:
pd.bdate_range(start, end, freq="M")

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',
               '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-31'],
              dtype='datetime64[ns]', freq='M')

In [18]:
pd.date_range(start, end, freq="BM")

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-29',
               '2011-05-31', '2011-06-30', '2011-07-29', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-30'],
              dtype='datetime64[ns]', freq='BM')

In [22]:
pd.bdate_range(end=end, periods=3, freq='BM')

DatetimeIndex(['2011-10-31', '2011-11-30', '2011-12-30'], dtype='datetime64[ns]', freq='BM')

In [23]:
weekmask = "Mon Wed Fri"
holidays = [datetime.datetime(2011, 1, 5), datetime.datetime(2011, 3, 14)]

In [48]:
pd.bdate_range(datetime.datetime(2011, 1, 1), 
              datetime.datetime(2011, 4, 30), freq='C',
             weekmask=weekmask, holidays=holidays)

DatetimeIndex(['2011-01-03', '2011-01-07', '2011-01-10', '2011-01-12',
               '2011-01-14', '2011-01-17', '2011-01-19', '2011-01-21',
               '2011-01-24', '2011-01-26', '2011-01-28', '2011-01-31',
               '2011-02-02', '2011-02-04', '2011-02-07', '2011-02-09',
               '2011-02-11', '2011-02-14', '2011-02-16', '2011-02-18',
               '2011-02-21', '2011-02-23', '2011-02-25', '2011-02-28',
               '2011-03-02', '2011-03-04', '2011-03-07', '2011-03-09',
               '2011-03-11', '2011-03-16', '2011-03-18', '2011-03-21',
               '2011-03-23', '2011-03-25', '2011-03-28', '2011-03-30',
               '2011-04-01', '2011-04-04', '2011-04-06', '2011-04-08',
               '2011-04-11', '2011-04-13', '2011-04-15', '2011-04-18',
               '2011-04-20', '2011-04-22', '2011-04-25', '2011-04-27',
               '2011-04-29'],
              dtype='datetime64[ns]', freq='C')

In [51]:
pd.Timestamp.min # Timestamp('1677-09-21 00:12:43.145225')
pd.Timestamp.max # Timestamp('2262-04-11 23:47:16.854775807')

Timestamp('2262-04-11 23:47:16.854775807')

In [53]:
df = pd.DataFrame({'Date': ['16234', '16003']})
df['Date'] = pd.to_datetime(df['Date'], format='%y%j')
df

Unnamed: 0,Date
0,2016-08-21
1,2016-01-03


In [58]:
rng = pd.bdate_range(start, end, freq="BM")
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts[:5]
ts[:5].index
ts[::2].index

DatetimeIndex(['2011-01-31', '2011-03-31', '2011-05-31', '2011-07-29',
               '2011-09-30', '2011-11-30'],
              dtype='datetime64[ns]', freq='2BM')

In [60]:
ts["1/31/2011"]
ts[datetime.datetime(2011, 12, 25):]

2011-12-30   -0.239682
Freq: BM, dtype: float64

In [64]:
ts['2011']
ts['2011-5']

2011-01-31   -0.100624
2011-02-28   -0.452321
2011-03-31    2.077659
2011-04-29    0.559258
2011-05-31    0.701382
2011-06-30   -0.544770
2011-07-29   -1.489551
2011-08-31   -0.855428
2011-09-30    0.955600
2011-10-31    0.366601
2011-11-30    1.572487
2011-12-30   -0.239682
Freq: BM, dtype: float64

In [74]:
df = ts.to_frame()
df.rename(columns={(0):'col'}, inplace=True)
df['2011-01-31'] # KeyError: '2011-01-31'
df.loc['2011-01-31'] # col   -0.100624

col   -0.100624
Name: 2011-01-31 00:00:00, dtype: float64

In [29]:
dft = pd.DataFrame(
    np.random.randn(100000, 1),
    columns=["A"],
    index=pd.date_range("20130101", periods=100000, freq="T"),
)
dft.loc['2013']
dft["2013-1":"2013-2"]

Unnamed: 0,A
2013-01-01 00:00:00,0.390870
2013-01-01 00:01:00,0.350306
2013-01-01 00:02:00,1.538080
2013-01-01 00:03:00,-0.394361
2013-01-01 00:04:00,-1.246669
...,...
2013-02-28 23:55:00,0.122760
2013-02-28 23:56:00,1.239373
2013-02-28 23:57:00,-1.572780
2013-02-28 23:58:00,-0.567776


In [92]:
dft2 = pd.DataFrame(
    np.random.randn(20,1),
    columns=['A'],
    index=pd.MultiIndex.from_product([
        pd.date_range("2020.01.01", periods=10, freq='12H'),
        ['a','b']
    ])
)
dft2.loc['20200105']
idx = pd.IndexSlice
dft2 = dft2.swaplevel(0,1).sort_index()
dft2.loc[idx[:,'20200105'], :]
dft2.loc[idx['a','20200105'], :]

Unnamed: 0,Unnamed: 1,A
a,2020-01-05 00:00:00,-0.922086
a,2020-01-05 12:00:00,0.619447


In [95]:
df = pd.DataFrame([0], 
        index=pd.DatetimeIndex(["2019-01-01"],tz="US/Pacific"))
df["2019-01-01 12:00:00+04:00":"2019-01-01 13:00:00+04:00"]

Unnamed: 0,0
2019-01-01 00:00:00-08:00,0


In [96]:
df["2019-01-01 12:00:00+04:00":"2019-01-01 13:00:00+04:00"]

Unnamed: 0,0
2019-01-01 00:00:00-08:00,0


In [14]:
series_minute = pd.Series(np.arange(1,4), 
            pd.DatetimeIndex(["2011-12-31 23:59:00", "2012-01-01 00:00:00", 
                              "2012-01-01 00:02:00"],))
series_minute.index.resolution # 'minute'
# A timestamp string less accurate than a minute gives a Series object.
series_minute['2011-12-31 23'] # 2011-12-31 23:59:00
series_minute['2012-01-01 00:00'] # 2

2011-12-31 23:59:00    1
dtype: int64

In [24]:
dft_minute = pd.DataFrame(
    {"a": [1, 2, 3], "b": [4, 5, 6]}, index=series_minute.index
)
dft_minute.loc['2012-01-01 00:02']

a    3
b    6
Name: 2012-01-01 00:02:00, dtype: int64

In [28]:
series_monthly = pd.Series(
    [1, 2, 3], pd.DatetimeIndex(["2011-12", "2012-01", "2012-02"])
)
series_monthly.index.resolution # 'day'
series_monthly["2011-12"] # returns Series - 2011-12-01
series_monthly["2011-12-01"] # returns scaler 1

1

In [32]:
dft[datetime.datetime(2013, 1, 1): datetime.datetime(2013, 2, 28)]

Unnamed: 0,A
2013-01-01 00:00:00,0.390870
2013-01-01 00:01:00,0.350306
2013-01-01 00:02:00,1.538080
2013-01-01 00:03:00,-0.394361
2013-01-01 00:04:00,-1.246669
...,...
2013-02-27 23:56:00,-1.037733
2013-02-27 23:57:00,-1.733930
2013-02-27 23:58:00,0.424122
2013-02-27 23:59:00,1.417177


In [33]:
dft[datetime.datetime(2013, 1, 1, 10, 12, 0): datetime.datetime(2013, 2, 28, 10, 12, 0)]

Unnamed: 0,A
2013-01-01 10:12:00,-1.417470
2013-01-01 10:13:00,-1.125379
2013-01-01 10:14:00,-1.855770
2013-01-01 10:15:00,-1.651701
2013-01-01 10:16:00,1.253949
...,...
2013-02-28 10:08:00,-0.697029
2013-02-28 10:09:00,2.197959
2013-02-28 10:10:00,-0.673779
2013-02-28 10:11:00,1.285071


In [43]:
rng2 = pd.date_range("2020-01-01", "2021-01-01", freq="W")
rng2
ts2 = pd.Series(np.random.randn(len(rng2)), index=rng2)
ts2.truncate(before='2020-1', after='2020-02') # after month is not included
ts2['2020-01':'2020-02']
ts2[[0,2,6]]
ts2[[0,2,6]].index # Freq is lost
ts2[[0,2,4]].index # Freq is present

DatetimeIndex(['2020-01-05', '2020-01-19', '2020-02-02'], dtype='datetime64[ns]', freq='2W-SUN')

In [48]:
idx = pd.date_range(start="2019-12-29", freq="D", periods=4)
idx.isocalendar()
idx.to_series().dt.isocalendar()

Unnamed: 0,year,week,day
2019-12-29,2019,52,7
2019-12-30,2020,1,1
2019-12-31,2020,1,2
2020-01-01,2020,1,3


In [54]:
friday = pd.Timestamp('2021.05.07')
friday.day_name()

# Add 2 business days (Friday --> Tuesday)
two_business_days = 2 * pd.offsets.BDay()
two_business_days.apply(friday)
two_business_days + friday
(two_business_days + friday).day_name()

'Tuesday'

In [59]:
ts = pd.Timestamp("2021-05-08 00:00:00")

# BusinessHour's valid offset dates are Monday through Friday
offset = pd.offsets.BusinessHour(start="09:00")

# Bring the date to the closest offset date (Monday
offset.rollforward(ts) # Timestamp('2018-01-08 09:00:00')

# Date is brought to the closest offset date first and then the hour is added
ts + offset # Timestamp('2021-05-10 10:00:00')

Timestamp('2021-05-10 10:00:00')

In [67]:
ts = pd.Timestamp("2014-01-01 09:00")
day = pd.offsets.Day()
day.apply(ts) # Timestamp('2014-01-02 09:00:00')
day.apply(ts).normalize() # Timestamp('2014-01-02 00:00:00')

ts = pd.Timestamp("2014-01-01 22:00")
hour = pd.offsets.Hour()
(ts + hour).normalize() # Timestamp('2014-01-01 00:00:00')

hour.apply(pd.Timestamp("2014-01-01 23:30")).normalize() # Timestamp('2014-01-02 00:00:00')

Timestamp('2014-01-02 00:00:00')