In [1]:
import pandas as pd
import datetime as dt

# The *pandas* Timestamp Object

In [2]:
pd.Timestamp("2015-07-08")

Timestamp('2015-07-08 00:00:00')

In [3]:
pd.Timestamp("2015-07-08 12:13:14")

Timestamp('2015-07-08 12:13:14')

In [4]:
pd.Timestamp("05/06/2015") # By default MDY

Timestamp('2015-05-06 00:00:00')

In [5]:
pd.Timestamp(dt.date(2017, 4, 3))

Timestamp('2017-04-03 00:00:00')

# The *pandas* DateTimeIndex Object

In [6]:
dates = ["2016-01-02", "2016-05-15", "2009-08-01"]
dt_idx = pd.DatetimeIndex(dates)

In [7]:
values = [100, 200, 300]
s = pd.Series(data=values, index=dt_idx)

In [8]:
s

2016-01-02    100
2016-05-15    200
2009-08-01    300
dtype: int64

# The pd.to_datetime() Method

In [9]:
pd.to_datetime("2001-04-19")

Timestamp('2001-04-19 00:00:00')

In [10]:
pd.to_datetime(["2015-02-03", "05/06/2016"])

DatetimeIndex(['2015-02-03', '2016-05-06'], dtype='datetime64[ns]', freq=None)

In [11]:
pd.to_datetime("04/05/2016", dayfirst=True)

Timestamp('2016-05-04 00:00:00')

In [12]:
times = pd.Series(["2015-02-03", "05/06/2016", "06/07/2017"])

In [13]:
pd.to_datetime(times)

0   2015-02-03
1   2016-05-06
2   2017-06-07
dtype: datetime64[ns]

In [14]:
times = pd.Series(["2015-02-03", "05/06/2016", "06/07/2017", "Errors"])
pd.to_datetime(times, errors="coerce")

0   2015-02-03
1   2016-05-06
2   2017-06-07
3          NaT
dtype: datetime64[ns]

In [15]:
pd.to_datetime(1349720105, unit="s")

Timestamp('2012-10-08 18:15:05')

# Date Ranges pd.date_range()

In [16]:
pd.date_range(start="2016-01-01", end="2016-01-10", freq="D")

DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',
               '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',
               '2016-01-09', '2016-01-10'],
              dtype='datetime64[ns]', freq='D')

In [17]:
pd.date_range(start="2016-01-01", end="2016-01-10", freq="3D")

DatetimeIndex(['2016-01-01', '2016-01-04', '2016-01-07', '2016-01-10'], dtype='datetime64[ns]', freq='3D')

In [18]:
pd.date_range(start="2016-01-01", end="2016-01-10", freq="B")

DatetimeIndex(['2016-01-01', '2016-01-04', '2016-01-05', '2016-01-06',
               '2016-01-07', '2016-01-08'],
              dtype='datetime64[ns]', freq='B')

In [19]:
pd.date_range(start="2016-01-01", end="2016-01-20", freq="W")

DatetimeIndex(['2016-01-03', '2016-01-10', '2016-01-17'], dtype='datetime64[ns]', freq='W-SUN')

In [20]:
pd.date_range(start="2016-01-01", end="2016-01-20", freq="W-MON")

DatetimeIndex(['2016-01-04', '2016-01-11', '2016-01-18'], dtype='datetime64[ns]', freq='W-MON')

In [21]:
pd.date_range(start="2016-01-01", end="2016-01-5", freq="12H")

DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 12:00:00',
               '2016-01-02 00:00:00', '2016-01-02 12:00:00',
               '2016-01-03 00:00:00', '2016-01-03 12:00:00',
               '2016-01-04 00:00:00', '2016-01-04 12:00:00',
               '2016-01-05 00:00:00'],
              dtype='datetime64[ns]', freq='12H')

In [22]:
pd.date_range(start="2016-01-01", end="2016-03-20", freq="M")

DatetimeIndex(['2016-01-31', '2016-02-29'], dtype='datetime64[ns]', freq='M')

In [23]:
pd.date_range(start="2016-01-01", end="2016-03-20", freq="MS")

DatetimeIndex(['2016-01-01', '2016-02-01', '2016-03-01'], dtype='datetime64[ns]', freq='MS')

In [24]:
pd.date_range(start="2016-01-01", end="2018-03-20", freq="A")

DatetimeIndex(['2016-12-31', '2017-12-31'], dtype='datetime64[ns]', freq='A-DEC')

In [25]:
pd.date_range(start="2012-09-09", periods=10)

DatetimeIndex(['2012-09-09', '2012-09-10', '2012-09-11', '2012-09-12',
               '2012-09-13', '2012-09-14', '2012-09-15', '2012-09-16',
               '2012-09-17', '2012-09-18'],
              dtype='datetime64[ns]', freq='D')

In [26]:
pd.date_range(start="2012-09-09", periods=10, freq="3D")

DatetimeIndex(['2012-09-09', '2012-09-12', '2012-09-15', '2012-09-18',
               '2012-09-21', '2012-09-24', '2012-09-27', '2012-09-30',
               '2012-10-03', '2012-10-06'],
              dtype='datetime64[ns]', freq='3D')

In [27]:
pd.date_range(start="2012-09-09", periods=10, freq="MS")

DatetimeIndex(['2012-10-01', '2012-11-01', '2012-12-01', '2013-01-01',
               '2013-02-01', '2013-03-01', '2013-04-01', '2013-05-01',
               '2013-06-01', '2013-07-01'],
              dtype='datetime64[ns]', freq='MS')

In [28]:
pd.date_range(start="2012-09-09", periods=10, freq="B")

DatetimeIndex(['2012-09-10', '2012-09-11', '2012-09-12', '2012-09-13',
               '2012-09-14', '2012-09-17', '2012-09-18', '2012-09-19',
               '2012-09-20', '2012-09-21'],
              dtype='datetime64[ns]', freq='B')

In [29]:
pd.date_range(end="2012-09-09", periods=10)

DatetimeIndex(['2012-08-31', '2012-09-01', '2012-09-02', '2012-09-03',
               '2012-09-04', '2012-09-05', '2012-09-06', '2012-09-07',
               '2012-09-08', '2012-09-09'],
              dtype='datetime64[ns]', freq='D')

In [30]:
pd.date_range(end="2012-09-09", periods=10, freq="3D")

DatetimeIndex(['2012-08-13', '2012-08-16', '2012-08-19', '2012-08-22',
               '2012-08-25', '2012-08-28', '2012-08-31', '2012-09-03',
               '2012-09-06', '2012-09-09'],
              dtype='datetime64[ns]', freq='3D')

In [31]:
pd.date_range(end="2012-09-09", periods=10, freq="B")

DatetimeIndex(['2012-08-27', '2012-08-28', '2012-08-29', '2012-08-30',
               '2012-08-31', '2012-09-03', '2012-09-04', '2012-09-05',
               '2012-09-06', '2012-09-07'],
              dtype='datetime64[ns]', freq='B')

# The .dt Accessor

In [32]:
dates = pd.date_range(start="2000-01-01", end="2010-12-31", freq="24D")

In [33]:
s = pd.Series(dates)
s.head()

0   2000-01-01
1   2000-01-25
2   2000-02-18
3   2000-03-13
4   2000-04-06
dtype: datetime64[ns]

In [34]:
days = s[s.dt.is_month_start]
days.head()

0     2000-01-01
19    2001-04-01
38    2002-07-01
104   2006-11-01
109   2007-03-01
dtype: datetime64[ns]

# Import Financial Data Set with *pandas_datareader*

In [35]:
from pandas_datareader import data

In [36]:
msft_stocks = data.DataReader("MSFT", start="2010-01-01", 
                              end="2017-12-31", data_source="google")

In [37]:
msft_stocks.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,30.62,31.1,30.59,30.95,38414185
2010-01-05,30.85,31.1,30.64,30.96,49758862
2010-01-06,30.88,31.08,30.52,30.77,58182332
2010-01-07,30.63,30.7,30.19,30.45,50564285
2010-01-08,30.28,30.88,30.24,30.66,51201289


In [38]:
msft_stocks.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1921 entries, 2010-01-04 to 2017-08-21
Data columns (total 5 columns):
Open      1921 non-null float64
High      1921 non-null float64
Low       1921 non-null float64
Close     1921 non-null float64
Volume    1921 non-null int64
dtypes: float64(4), int64(1)
memory usage: 90.0 KB


In [39]:
msft_stocks.index

DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
               '2010-01-08', '2010-01-11', '2010-01-12', '2010-01-13',
               '2010-01-14', '2010-01-15',
               ...
               '2017-08-08', '2017-08-09', '2017-08-10', '2017-08-11',
               '2017-08-14', '2017-08-15', '2017-08-16', '2017-08-17',
               '2017-08-18', '2017-08-21'],
              dtype='datetime64[ns]', name='Date', length=1921, freq=None)

# Selecting in a DF with DateTimeIndex

In [40]:
msft_stocks.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,30.62,31.1,30.59,30.95,38414185
2010-01-05,30.85,31.1,30.64,30.96,49758862
2010-01-06,30.88,31.08,30.52,30.77,58182332


In [41]:
msft_stocks.loc["2017-05-15"]

Open            68.14
High            68.48
Low             67.57
Close           68.43
Volume    31530301.00
Name: 2017-05-15 00:00:00, dtype: float64

In [42]:
msft_stocks.iloc[60]

Open            29.64
High            29.72
Low             29.17
Close           29.29
Volume    63762620.00
Name: 2010-03-31 00:00:00, dtype: float64

In [43]:
msft_stocks.loc["2017-05-15":"2017-05-25"]

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-05-15,68.14,68.48,67.57,68.43,31530301
2017-05-16,68.23,69.44,68.16,69.41,34956038
2017-05-17,68.89,69.1,67.43,67.48,30548781
2017-05-18,67.4,68.13,67.14,67.71,25201274
2017-05-19,67.5,68.1,67.43,67.69,26961119
2017-05-22,67.89,68.5,67.5,68.45,16237550
2017-05-23,68.72,68.75,68.38,68.68,15425824
2017-05-24,68.87,68.88,68.45,68.77,14666865
2017-05-25,68.97,69.88,68.91,69.62,21854095


In [44]:
birthdays = pd.date_range(start="1990-05-15", end="2017-05-15",
                          freq=pd.DateOffset(years=1))

msft_stocks[msft_stocks.index.isin(birthdays)]

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012-05-15,30.64,30.8,30.15,30.21,61830348
2013-05-15,33.45,33.9,33.43,33.84,46309506
2014-05-15,40.09,40.4,39.51,39.6,37793243
2015-05-15,48.87,48.9,48.05,48.3,28642694
2017-05-15,68.14,68.48,67.57,68.43,31530301


# TimeStamp object attributes

In [45]:
someday = msft_stocks.index[500]
someday

Timestamp('2011-12-28 00:00:00')

In [46]:
someday.day
someday.weekday_name
someday.is_month_start

False

In [47]:
msft_stocks.insert(0, "Weekday", msft_stocks.index.weekday_name)
msft_stocks.head(3)

Unnamed: 0_level_0,Weekday,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,Monday,30.62,31.1,30.59,30.95,38414185
2010-01-05,Tuesday,30.85,31.1,30.64,30.96,49758862
2010-01-06,Wednesday,30.88,31.08,30.52,30.77,58182332


In [48]:
msft_stocks.insert(1, "Is Start of Month", 
                   msft_stocks.index.is_month_start)
msft_stocks.head(3)

Unnamed: 0_level_0,Weekday,Is Start of Month,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-04,Monday,False,30.62,31.1,30.59,30.95,38414185
2010-01-05,Tuesday,False,30.85,31.1,30.64,30.96,49758862
2010-01-06,Wednesday,False,30.88,31.08,30.52,30.77,58182332


# The .truncate() Method

In [49]:
msft_stocks.truncate(before="2011-02-05", after="2011-02-20")

Unnamed: 0_level_0,Weekday,Is Start of Month,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2011-02-07,Monday,False,27.8,28.34,27.79,28.2,68980871
2011-02-08,Tuesday,False,28.1,28.34,28.05,28.28,34910467
2011-02-09,Wednesday,False,28.19,28.26,27.91,27.97,52905018
2011-02-10,Thursday,False,27.93,27.94,27.29,27.5,76672349
2011-02-11,Friday,False,27.76,27.81,27.07,27.25,83939643
2011-02-14,Monday,False,27.2,27.27,26.95,27.23,56766112
2011-02-15,Tuesday,False,27.04,27.33,26.95,26.96,44120592
2011-02-16,Wednesday,False,27.05,27.07,26.6,27.02,70817867
2011-02-17,Thursday,False,26.97,27.37,26.91,27.21,57211558
2011-02-18,Friday,False,27.13,27.21,26.99,27.06,68672855


# pd.DateOffset Object

In [50]:
google_stocks = data.DataReader("GOOG", start="2010-01-01", 
                                end=dt.datetime.now(), 
                                data_source="google")

In [51]:
google_stocks.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,313.16,314.44,311.81,313.06,
2010-01-05,313.28,313.61,310.46,311.68,
2010-01-06,312.62,312.62,302.88,303.83,


In [52]:
google_stocks.index[:5]

DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
               '2010-01-08'],
              dtype='datetime64[ns]', name='Date', freq=None)

In [53]:
(google_stocks.index + pd.DateOffset(days=5))[:5]

DatetimeIndex(['2010-01-09', '2010-01-10', '2010-01-11', '2010-01-12',
               '2010-01-13'],
              dtype='datetime64[ns]', name='Date', freq=None)

In [54]:
(google_stocks.index + pd.DateOffset(years=2))[:5]

DatetimeIndex(['2012-01-04', '2012-01-05', '2012-01-06', '2012-01-07',
               '2012-01-08'],
              dtype='datetime64[ns]', name='Date', freq=None)

In [55]:
(google_stocks.index + pd.tseries.offsets.QuarterBegin())[:10]

DatetimeIndex(['2010-03-01', '2010-03-01', '2010-03-01', '2010-03-01',
               '2010-03-01', '2010-03-01', '2010-03-01', '2010-03-01',
               '2010-03-01', '2010-03-01'],
              dtype='datetime64[ns]', name='Date', freq=None)

In [56]:
(google_stocks.index - pd.tseries.offsets.MonthEnd())[:10]

DatetimeIndex(['2009-12-31', '2009-12-31', '2009-12-31', '2009-12-31',
               '2009-12-31', '2009-12-31', '2009-12-31', '2009-12-31',
               '2009-12-31', '2009-12-31'],
              dtype='datetime64[ns]', name='Date', freq=None)

In [57]:
from pandas.tseries.offsets import *

In [58]:
(google_stocks.index - MonthBegin())[:10]

DatetimeIndex(['2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01'],
              dtype='datetime64[ns]', name='Date', freq=None)

# The *Timedelta* Object

In [59]:
t1 = pd.Timestamp("2016-05-14")
t2 = pd.Timestamp("2017-01-12")

In [60]:
t2 - t1

Timedelta('243 days 00:00:00')

In [61]:
pd.Timedelta(days=2, minutes=12)

Timedelta('2 days 00:12:00')

In [68]:
pd.Timedelta("5 minutes 16 hours 1 day")

Timedelta('1 days 16:05:00')

In [74]:
shipping = pd.read_csv("ecommerce.csv", index_col="ID", 
                        parse_dates=["order_date", "delivery_date"])
shipping.head()

Unnamed: 0_level_0,order_date,delivery_date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1998-05-24,1999-02-05
2,1992-04-22,1998-03-06
4,1991-02-10,1992-08-26
5,1992-07-21,1997-11-20
7,1993-09-02,1998-06-10


In [76]:
shipping["Delivery Time"] = shipping["delivery_date"] - shipping["order_date"]

In [77]:
shipping.head()

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1998-05-24,1999-02-05,257 days
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days
5,1992-07-21,1997-11-20,1948 days
7,1993-09-02,1998-06-10,1742 days


In [79]:
shipping["delivery_date"] = shipping["delivery_date"] + shipping["Delivery Time"]

In [80]:
shipping.head()

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1998-05-24,1999-10-20,257 days
2,1992-04-22,2004-01-18,2144 days
4,1991-02-10,1994-03-12,563 days
5,1992-07-21,2003-03-22,1948 days
7,1993-09-02,2003-03-18,1742 days


In [81]:
shipping.dtypes

order_date        datetime64[ns]
delivery_date     datetime64[ns]
Delivery Time    timedelta64[ns]
dtype: object

In [83]:
(shipping["Delivery Time"] > "365 days").head()

ID
1    False
2     True
4     True
5     True
7     True
Name: Delivery Time, dtype: bool

In [84]:
(shipping["Delivery Time"] > pd.Timedelta(days=365)).head()

ID
1    False
2     True
4     True
5     True
7     True
Name: Delivery Time, dtype: bool