In [1]:
import numpy as np  
import pandas as pd
from datetime import datetime

In [6]:
#Parsing dates from a variety of formats using Pandas. Uses NumPy datetime64 and timedelta64 dtypes
dti = pd.to_datetime(["1/1/2022", np.datetime64("2022-05-01"), datetime(2018, 1, 1)])
dti[1]-dti[0]

Timedelta('120 days 00:00:00')

In [7]:
#Create a range of dates
dti = pd.date_range("2018-01-01", periods=3, freq="H") #H = hourly frequency, periods = number of periods to generate
dti

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',
               '2018-01-01 02:00:00'],
              dtype='datetime64[ns]', freq='H')

In [26]:
# For time series data, it’s conventional to represent the time component in the index of a Series or DataFrame so manipulations can be performed with respect to the time element.
# Create a Series with time as the index
pd.Series(range(3), index=pd.date_range("2000", freq="D", periods=3))

2000-01-01    0
2000-01-02    1
2000-01-03    2
Freq: D, dtype: int64

In [33]:
pd.Series(pd.date_range("2000", freq="D", periods=3)), pd.Series(pd.date_range("2000", freq="D", periods=3)).dt.day

(0   2000-01-01
 1   2000-01-02
 2   2000-01-03
 dtype: datetime64[ns],
 0    1
 1    2
 2    3
 dtype: int64)

In [34]:
pd.Series(pd.period_range("1/1/2011", freq="M", periods=3)), pd.Series(pd.date_range("2000", freq="M", periods=3))

(0    2011-01
 1    2011-02
 2    2011-03
 dtype: period[M],
 0   2000-01-31
 1   2000-02-29
 2   2000-03-31
 dtype: datetime64[ns])

In [8]:
#Convert to a different time zone
dti = dti.tz_localize("UTC")
dti

DatetimeIndex(['2018-01-01 00:00:00+00:00', '2018-01-01 01:00:00+00:00',
               '2018-01-01 02:00:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='H')

In [9]:
dti.tz_convert("US/Pacific") #Convert to a different time zone

DatetimeIndex(['2017-12-31 16:00:00-08:00', '2017-12-31 17:00:00-08:00',
               '2017-12-31 18:00:00-08:00'],
              dtype='datetime64[ns, US/Pacific]', freq='H')

In [13]:
idx = pd.date_range("2018-01-01", periods=6, freq="H")

ts = pd.Series(range(len(idx)), index=idx)
ts

2018-01-01 00:00:00    0
2018-01-01 01:00:00    1
2018-01-01 02:00:00    2
2018-01-01 03:00:00    3
2018-01-01 04:00:00    4
2018-01-01 05:00:00    5
Freq: H, dtype: int64

In [14]:
ts.resample("2H").mean() #resample() is a time-based groupby, followed by a reduction method on each of its groups

2018-01-01 00:00:00    0.5
2018-01-01 02:00:00    2.5
2018-01-01 04:00:00    4.5
Freq: 2H, dtype: float64

In [20]:
day = pd.Timestamp("2023-10-19")
day.day_name() #returns day of the week on a timestamp

'Thursday'

In [21]:
day+=pd.Timedelta("1 day") #add a day to a timestamp
day.day_name()

'Friday'

In [22]:
help(pd.offsets.BDay) #help on Timestamp

Help on class BusinessDay in module pandas._libs.tslibs.offsets:

class BusinessDay(BusinessMixin)
 |  DateOffset subclass representing possibly n business days.
 |  
 |  Method resolution order:
 |      BusinessDay
 |      BusinessMixin
 |      SingleConstructorOffset
 |      BaseOffset
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __setstate__(...)
 |      Reconstruct an instance from a pickled state
 |  
 |  apply_index = wrapper(self, other)
 |  
 |  is_on_offset(...)
 |  
 |  ----------------------------------------------------------------------
 |  Static methods defined here:
 |  
 |  __new__(*args, **kwargs) from builtins.type
 |      Create and return a new object.  See help(type) for accurate signature.
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes defined here:
 |  
 |  __pyx_vtable__ = <capsule object NULL>
 |  
 |  ----------------------------------------------------------------------
 |  Met

In [25]:
day+pd.offsets.BDay()

Timestamp('2023-10-23 00:00:00')

In [2]:
#time series data that associates values with points in time.
time_stamp = pd.Timestamp(datetime(2017, 1, 1)) #pd.Timestamp("2012-05-01") #pd.Timestamp(2012, 5, 1)
time_stamp

Timestamp('2017-01-01 00:00:00')

In [35]:
#time series data that associate things like change variables with a time span
pd.Period("2011-01"), pd.Period("2012-05", freq="D")

(Period('2011-01', 'M'), Period('2012-05-01', 'D'))

In [38]:
dates = [
    pd.Timestamp("2012-05-01"),
    pd.Timestamp("2012-05-02"),
    pd.Timestamp("2012-05-03"),
]

ts = pd.Series(np.random.randn(3), dates)
ts.index

DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)

In [40]:
periods = [pd.Period("2012-01"), pd.Period("2012-02"), pd.Period("2012-03")]
ts = pd.Series(np.random.randn(3), periods)
ts.index

PeriodIndex(['2012-01', '2012-02', '2012-03'], dtype='period[M]')

In [42]:
pd.to_datetime(pd.Series(["Jul 31, 2009", "Jan 10, 2010", None])), pd.to_datetime(["2005/11/23", "2010/12/31"]) #converts argument to datetime or timestamp

(0   2009-07-31
 1   2010-01-10
 2          NaT
 dtype: datetime64[ns],
 DatetimeIndex(['2005-11-23', '2010-12-31'], dtype='datetime64[ns]', freq=None))

In [44]:
#If you use dates which start with the day first (i.e. European style), you can pass the dayfirst flag:
pd.to_datetime(["04-01-2012 10:00"], dayfirst=True), pd.to_datetime(["04-14-2012 10:00"], dayfirst=True)


(DatetimeIndex(['2012-01-04 10:00:00'], dtype='datetime64[ns]', freq=None),
 DatetimeIndex(['2012-04-14 10:00:00'], dtype='datetime64[ns]', freq=None))

If you pass a single string to to_datetime, it returns a single Timestamp. Timestamp can also accept string input, but it doesn’t accept string parsing options like dayfirst or format, so use to_datetime if these are required.

In [45]:
pd.to_datetime("2010/11/12", format="%Y/%m/%d"), pd.to_datetime("12-11-2010 00:00", format="%d-%m-%Y %H:%M")

(Timestamp('2010-11-12 00:00:00'), Timestamp('2010-11-12 00:00:00'))

In [46]:
df = pd.DataFrame(
    {"year": [2015, 2016], "month": [2, 3], "day": [4, 5], "hour": [2, 3]}
)
df

Unnamed: 0,year,month,day,hour
0,2015,2,4,2
1,2016,3,5,3


In [47]:
pd.to_datetime(df)

0   2015-02-04 02:00:00
1   2016-03-05 03:00:00
dtype: datetime64[ns]

In [49]:
# working with invalid dates
pd.to_datetime(["2009/07/31", "asd"], errors="ignore"), pd.to_datetime(["2009/07/31", "asd"], errors="coerce")

(Index(['2009/07/31', 'asd'], dtype='object'),
 DatetimeIndex(['2009-07-31', 'NaT'], dtype='datetime64[ns]', freq=None))

In [54]:
pd.to_datetime([1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit="s", origin='unix'), pd.to_datetime([1349720105100, 1349720105200, 1349720105300, 1349720105400, 1349720105500],
    unit="ms")


(DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
                '2012-10-10 18:15:05', '2012-10-11 18:15:05',
                '2012-10-12 18:15:05'],
               dtype='datetime64[ns]', freq=None),
 DatetimeIndex(['2012-10-08 18:15:05.100000', '2012-10-08 18:15:05.200000',
                '2012-10-08 18:15:05.300000', '2012-10-08 18:15:05.400000',
                '2012-10-08 18:15:05.500000'],
               dtype='datetime64[ns]', freq=None))

In [55]:
stamps = pd.date_range("2012-10-08 18:15:05", periods=4, freq="D")
(stamps - pd.Timestamp("1970-01-01")) // pd.Timedelta("1s")

Int64Index([1349720105, 1349806505, 1349892905, 1349979305], dtype='int64')

In [56]:
pd.to_datetime([1, 2, 3], unit="D", origin=pd.Timestamp("1960-01-01"))

DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], dtype='datetime64[ns]', freq=None)

If we need timestamps on a regular frequency, we can use the date_range() and bdate_range() functions to create a DatetimeIndex. The default frequency for date_range is a calendar day while the default for bdate_range is a business day:

In [57]:
start = datetime(2022, 1, 1)
end = datetime(2023, 1, 1)
index = pd.date_range(start, end)
index


DatetimeIndex(['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04',
               '2022-01-05', '2022-01-06', '2022-01-07', '2022-01-08',
               '2022-01-09', '2022-01-10',
               ...
               '2022-12-23', '2022-12-24', '2022-12-25', '2022-12-26',
               '2022-12-27', '2022-12-28', '2022-12-29', '2022-12-30',
               '2022-12-31', '2023-01-01'],
              dtype='datetime64[ns]', length=366, freq='D')

In [58]:
index = pd.bdate_range(start, end)
index

DatetimeIndex(['2022-01-03', '2022-01-04', '2022-01-05', '2022-01-06',
               '2022-01-07', '2022-01-10', '2022-01-11', '2022-01-12',
               '2022-01-13', '2022-01-14',
               ...
               '2022-12-19', '2022-12-20', '2022-12-21', '2022-12-22',
               '2022-12-23', '2022-12-26', '2022-12-27', '2022-12-28',
               '2022-12-29', '2022-12-30'],
              dtype='datetime64[ns]', length=260, freq='B')

In [62]:
pd.date_range(start, end, freq="BM") #business month end frequency

DatetimeIndex(['2022-01-31', '2022-02-28', '2022-03-31', '2022-04-29',
               '2022-05-31', '2022-06-30', '2022-07-29', '2022-08-31',
               '2022-09-30', '2022-10-31', '2022-11-30', '2022-12-30'],
              dtype='datetime64[ns]', freq='BM')

In [60]:
pd.date_range(start, end, freq="W") #weekly frequency

DatetimeIndex(['2022-01-02', '2022-01-09', '2022-01-16', '2022-01-23',
               '2022-01-30', '2022-02-06', '2022-02-13', '2022-02-20',
               '2022-02-27', '2022-03-06', '2022-03-13', '2022-03-20',
               '2022-03-27', '2022-04-03', '2022-04-10', '2022-04-17',
               '2022-04-24', '2022-05-01', '2022-05-08', '2022-05-15',
               '2022-05-22', '2022-05-29', '2022-06-05', '2022-06-12',
               '2022-06-19', '2022-06-26', '2022-07-03', '2022-07-10',
               '2022-07-17', '2022-07-24', '2022-07-31', '2022-08-07',
               '2022-08-14', '2022-08-21', '2022-08-28', '2022-09-04',
               '2022-09-11', '2022-09-18', '2022-09-25', '2022-10-02',
               '2022-10-09', '2022-10-16', '2022-10-23', '2022-10-30',
               '2022-11-06', '2022-11-13', '2022-11-20', '2022-11-27',
               '2022-12-04', '2022-12-11', '2022-12-18', '2022-12-25',
               '2023-01-01'],
              dtype='datetime64[ns]', freq='W-S

In [63]:
pd.bdate_range(end=end, periods=20) #business day frequency

DatetimeIndex(['2022-12-05', '2022-12-06', '2022-12-07', '2022-12-08',
               '2022-12-09', '2022-12-12', '2022-12-13', '2022-12-14',
               '2022-12-15', '2022-12-16', '2022-12-19', '2022-12-20',
               '2022-12-21', '2022-12-22', '2022-12-23', '2022-12-26',
               '2022-12-27', '2022-12-28', '2022-12-29', '2022-12-30'],
              dtype='datetime64[ns]', freq='B')

In [64]:
pd.bdate_range(start=start, periods=20)

DatetimeIndex(['2022-01-03', '2022-01-04', '2022-01-05', '2022-01-06',
               '2022-01-07', '2022-01-10', '2022-01-11', '2022-01-12',
               '2022-01-13', '2022-01-14', '2022-01-17', '2022-01-18',
               '2022-01-19', '2022-01-20', '2022-01-21', '2022-01-24',
               '2022-01-25', '2022-01-26', '2022-01-27', '2022-01-28'],
              dtype='datetime64[ns]', freq='B')