In [1]:
import pandas as pd  # pandas library
import datetime as dt # python's inbuilt module

<h3>Review of Python's datetime Module</h3><br>
Inside datetime module there is a method called datetime() to create datetime object

In [2]:
someday = dt.date(2020,2,4)
someday

datetime.date(2020, 2, 4)

In [3]:
#attributes of date
print(someday.year)
print(someday.month)
print(someday.day)

2020
2
4


In [4]:
dt.datetime(2010,1,10) # defaults to midnight 00:00

datetime.datetime(2010, 1, 10, 0, 0)

In [5]:
dt.datetime(2010,1,10, 8,13,57)

datetime.datetime(2010, 1, 10, 8, 13, 57)

In [6]:
dt.datetime(2010,1,10, 17,13,57)

datetime.datetime(2010, 1, 10, 17, 13, 57)

In [7]:
str(dt.datetime(2010,1,10, 17,13,57))

'2010-01-10 17:13:57'

In [8]:
print(someday)

2020-02-04


In [9]:
sometime = dt.datetime(2010,1,10, 17,13,57)

In [10]:
print(sometime.year)
print(sometime.month)
print(sometime.day)
print(sometime.hour)
print(sometime.minute)
print(sometime.second)

2010
1
10
17
13
57


<h3>The pandas Timestamp Object</h3><br>
Timestamp is pandas version of Python's datetime object

In [11]:
pd.Timestamp('2015-03-31')

Timestamp('2015-03-31 00:00:00')

In [12]:
pd.Timestamp('2015/03/31')

Timestamp('2015-03-31 00:00:00')

In [13]:
pd.Timestamp('2013, 11, 04')

Timestamp('2013-11-04 00:00:00')

In [14]:
pd.Timestamp('1/1/2017')

Timestamp('2017-01-01 00:00:00')

In [15]:
pd.Timestamp('19/12/2015')

Timestamp('2015-12-19 00:00:00')

In [16]:
pd.Timestamp('12/19/2015')

Timestamp('2015-12-19 00:00:00')

In [17]:
pd.Timestamp('4/3/2000')

Timestamp('2000-04-03 00:00:00')

In [18]:
pd.Timestamp('2021-03-08 08:35:15')

Timestamp('2021-03-08 08:35:15')

In [19]:
pd.Timestamp('2021-03-08 6:13:15 PM')

Timestamp('2021-03-08 18:13:15')

In [20]:
pd.Timestamp(dt.date(2015,1,1))

Timestamp('2015-01-01 00:00:00')

In [21]:
pd.Timestamp(dt.datetime(2000,2,3,21,34,44))

Timestamp('2000-02-03 21:34:44')

<h3>The pandas DatetimeIndex Object</h3><br>
Collection of Pandas Timestamp

In [22]:
dates = ['2016/01/02','2016/04/12','2009/09/07']
pd.DatetimeIndex(dates)

DatetimeIndex(['2016-01-02', '2016-04-12', '2009-09-07'], dtype='datetime64[ns]', freq=None)

In [23]:
dates = [dt.date(2016,1,10), dt.date(1994, 6, 13), dt.date(2003, 12, 29)]
dt_index = pd.DatetimeIndex(dates)

In [24]:
values = [100,200,300]
pd.Series(values, index=dt_index)

2016-01-10    100
1994-06-13    200
2003-12-29    300
dtype: int64

<h3>The pd.to_datetime() Method</h3>

In [25]:
pd.to_datetime('2020-01-01')
pd.to_datetime(dt.date(2020,1,1))
pd.to_datetime(dt.datetime(2020,1,1,13,34,30))
pd.to_datetime(['2020-01-09','2020/02/08','2016','July 4th, 1998'])

DatetimeIndex(['2020-01-09', '2020-02-08', '2016-01-01', '1998-07-04'], dtype='datetime64[ns]', freq=None)

In [26]:
times = pd.Series(['2020-01-09','2020/02/08','2016','July 4th, 1998'])
times

0        2020-01-09
1        2020/02/08
2              2016
3    July 4th, 1998
dtype: object

In [27]:
pd.to_datetime(times)

0   2020-01-09
1   2020-02-08
2   2016-01-01
3   1998-07-04
dtype: datetime64[ns]

In [28]:
dates = pd.Series(['July 4th 1996', '10/04/1991', 'Hello', '2015-02-31'])
dates

0    July 4th 1996
1       10/04/1991
2            Hello
3       2015-02-31
dtype: object

In [29]:
pd.to_datetime(dates, errors='coerce')
# errors=coerce - very important

0   1996-07-04
1   1991-10-04
2          NaT
3          NaT
dtype: datetime64[ns]

Unix Time : number of seconds since 1st Jan 1970 mid night. This is a very popular timeformat to store into system.

In [30]:
pd.to_datetime([1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit='s')

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05',
               '2012-10-12 18:15:05'],
              dtype='datetime64[ns]', freq=None)

<h3>Create Range of Dates with the pd.date_range() Method</h3>

In [31]:
times = pd.date_range(start='2016-01-01', end='2016-01-10', freq='D')
times[0]

Timestamp('2016-01-01 00:00:00', freq='D')

In [32]:
type(times[0])

pandas._libs.tslibs.timestamps.Timestamp

In [33]:
pd.date_range(start='2016-01-01', end='2016-01-10', freq='2D')

DatetimeIndex(['2016-01-01', '2016-01-03', '2016-01-05', '2016-01-07',
               '2016-01-09'],
              dtype='datetime64[ns]', freq='2D')

In [34]:
pd.date_range(start='2016-01-01', end='2016-01-10', freq='B') # Only business days - skip sat and sun

DatetimeIndex(['2016-01-01', '2016-01-04', '2016-01-05', '2016-01-06',
               '2016-01-07', '2016-01-08'],
              dtype='datetime64[ns]', freq='B')

In [35]:
pd.date_range(start='2016-01-01', end='2016-01-15', freq='W') # Week freq='W-SUN' - default

DatetimeIndex(['2016-01-03', '2016-01-10'], dtype='datetime64[ns]', freq='W-SUN')

In [36]:
pd.date_range(start='2016-01-01', end='2016-01-15', freq='W-FRI')

DatetimeIndex(['2016-01-01', '2016-01-08', '2016-01-15'], dtype='datetime64[ns]', freq='W-FRI')

In [37]:
pd.date_range(start='2016-01-01', end='2016-01-15', freq='H') # Each hour

output = None

In [38]:
pd.date_range(start='2016-01-01', end='2016-01-15', freq='10H') # every 10 hour

DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 10:00:00',
               '2016-01-01 20:00:00', '2016-01-02 06:00:00',
               '2016-01-02 16:00:00', '2016-01-03 02:00:00',
               '2016-01-03 12:00:00', '2016-01-03 22:00:00',
               '2016-01-04 08:00:00', '2016-01-04 18:00:00',
               '2016-01-05 04:00:00', '2016-01-05 14:00:00',
               '2016-01-06 00:00:00', '2016-01-06 10:00:00',
               '2016-01-06 20:00:00', '2016-01-07 06:00:00',
               '2016-01-07 16:00:00', '2016-01-08 02:00:00',
               '2016-01-08 12:00:00', '2016-01-08 22:00:00',
               '2016-01-09 08:00:00', '2016-01-09 18:00:00',
               '2016-01-10 04:00:00', '2016-01-10 14:00:00',
               '2016-01-11 00:00:00', '2016-01-11 10:00:00',
               '2016-01-11 20:00:00', '2016-01-12 06:00:00',
               '2016-01-12 16:00:00', '2016-01-13 02:00:00',
               '2016-01-13 12:00:00', '2016-01-13 22:00:00',
               '2016-01-

In [39]:
pd.date_range(start='2016-01-01', end='2016-12-31', freq='M') # Month Ends

DatetimeIndex(['2016-01-31', '2016-02-29', '2016-03-31', '2016-04-30',
               '2016-05-31', '2016-06-30', '2016-07-31', '2016-08-31',
               '2016-09-30', '2016-10-31', '2016-11-30', '2016-12-31'],
              dtype='datetime64[ns]', freq='M')

In [40]:
pd.date_range(start='2016-01-01', end='2016-12-31', freq='MS') # Month Starts

DatetimeIndex(['2016-01-01', '2016-02-01', '2016-03-01', '2016-04-01',
               '2016-05-01', '2016-06-01', '2016-07-01', '2016-08-01',
               '2016-09-01', '2016-10-01', '2016-11-01', '2016-12-01'],
              dtype='datetime64[ns]', freq='MS')

In [41]:
pd.date_range(start='2016-01-01', end='2030-12-31', freq='A') # Year Ends

DatetimeIndex(['2016-12-31', '2017-12-31', '2018-12-31', '2019-12-31',
               '2020-12-31', '2021-12-31', '2022-12-31', '2023-12-31',
               '2024-12-31', '2025-12-31', '2026-12-31', '2027-12-31',
               '2028-12-31', '2029-12-31', '2030-12-31'],
              dtype='datetime64[ns]', freq='A-DEC')

Utilizing periods parameter

In [42]:
pd.date_range(start='2012-09-09', periods=25, freq='D')

DatetimeIndex(['2012-09-09', '2012-09-10', '2012-09-11', '2012-09-12',
               '2012-09-13', '2012-09-14', '2012-09-15', '2012-09-16',
               '2012-09-17', '2012-09-18', '2012-09-19', '2012-09-20',
               '2012-09-21', '2012-09-22', '2012-09-23', '2012-09-24',
               '2012-09-25', '2012-09-26', '2012-09-27', '2012-09-28',
               '2012-09-29', '2012-09-30', '2012-10-01', '2012-10-02',
               '2012-10-03'],
              dtype='datetime64[ns]', freq='D')

In [43]:
pd.date_range(start='2012-09-09', periods=25, freq='B')

DatetimeIndex(['2012-09-10', '2012-09-11', '2012-09-12', '2012-09-13',
               '2012-09-14', '2012-09-17', '2012-09-18', '2012-09-19',
               '2012-09-20', '2012-09-21', '2012-09-24', '2012-09-25',
               '2012-09-26', '2012-09-27', '2012-09-28', '2012-10-01',
               '2012-10-02', '2012-10-03', '2012-10-04', '2012-10-05',
               '2012-10-08', '2012-10-09', '2012-10-10', '2012-10-11',
               '2012-10-12'],
              dtype='datetime64[ns]', freq='B')

In [44]:
pd.date_range(start='2012-09-09', periods=25, freq='10H')

DatetimeIndex(['2012-09-09 00:00:00', '2012-09-09 10:00:00',
               '2012-09-09 20:00:00', '2012-09-10 06:00:00',
               '2012-09-10 16:00:00', '2012-09-11 02:00:00',
               '2012-09-11 12:00:00', '2012-09-11 22:00:00',
               '2012-09-12 08:00:00', '2012-09-12 18:00:00',
               '2012-09-13 04:00:00', '2012-09-13 14:00:00',
               '2012-09-14 00:00:00', '2012-09-14 10:00:00',
               '2012-09-14 20:00:00', '2012-09-15 06:00:00',
               '2012-09-15 16:00:00', '2012-09-16 02:00:00',
               '2012-09-16 12:00:00', '2012-09-16 22:00:00',
               '2012-09-17 08:00:00', '2012-09-17 18:00:00',
               '2012-09-18 04:00:00', '2012-09-18 14:00:00',
               '2012-09-19 00:00:00'],
              dtype='datetime64[ns]', freq='10H')

Notice that we can end date and period, pandas will start from a point and move till end to generate DatetimeIndex

In [45]:
pd.date_range(end='1999-12-31', periods=20, freq='D')

DatetimeIndex(['1999-12-12', '1999-12-13', '1999-12-14', '1999-12-15',
               '1999-12-16', '1999-12-17', '1999-12-18', '1999-12-19',
               '1999-12-20', '1999-12-21', '1999-12-22', '1999-12-23',
               '1999-12-24', '1999-12-25', '1999-12-26', '1999-12-27',
               '1999-12-28', '1999-12-29', '1999-12-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='D')

<h3>The .dt Accessor</h3>

<br>In case pandas also have the method with same name as Python, dt accessor is used before calling that method

In [46]:
bunch_of_dates = pd.date_range(start='2000-01-01', end='2010-12-31', freq='24D')
s=pd.Series(bunch_of_dates)
s.head(3)

0   2000-01-01
1   2000-01-25
2   2000-02-18
dtype: datetime64[ns]

In [47]:
s.dt.day.head()

0     1
1    25
2    18
3    13
4     6
dtype: int64

In [48]:
s.dt.month.head()

0    1
1    1
2    2
3    3
4    4
dtype: int64

In [49]:
s.dt.weekday.head()

0    5
1    1
2    4
3    0
4    3
dtype: int64

In [50]:
mask = s.dt.is_quarter_start
s[mask]

0     2000-01-01
19    2001-04-01
38    2002-07-01
137   2009-01-01
dtype: datetime64[ns]

In [51]:
mask = s.dt.is_month_end
s[mask]

5     2000-04-30
57    2003-09-30
71    2004-08-31
90    2005-11-30
123   2008-01-31
161   2010-07-31
dtype: datetime64[ns]

<h3>Install pandas-datareader - Pandas library to fetch data from web</h3>

In [52]:
# !conda install pandas-datareader

<h3>Import Financial Data Set with pandas_datareader Library</h3>

In [53]:
from pandas_datareader import data

  from pandas.util.testing import assert_frame_equal


In [54]:
company = 'MSFT'  # you can google company stock symbol
start = '2010-01-01'
end = '2017-12-31'

stocks = data.DataReader(name=company, data_source='yahoo', start=start, end=end)
stocks.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-12-31,30.99,30.48,30.98,30.48,31929700.0,23.92544
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,24.294369
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,24.302216
2010-01-06,31.08,30.52,30.879999,30.77,58182400.0,24.15307
2010-01-07,30.700001,30.190001,30.629999,30.450001,50559700.0,23.901886


In [55]:
stocks.index[0]

Timestamp('2009-12-31 00:00:00')

In [56]:
# stocks.axes

<h3>Selecting Rows from a DataFrame with a DateTimeIndex</h3>

In [57]:
stocks.loc['2014-03-04']

High         3.848000e+01
Low          3.807000e+01
Open         3.820000e+01
Close        3.841000e+01
Volume       2.680240e+07
Adj Close    3.372342e+01
Name: 2014-03-04 00:00:00, dtype: float64

In [58]:
stocks.iloc[300]

High         2.585000e+01
Low          2.536000e+01
Open         2.541000e+01
Close        2.568000e+01
Volume       4.990580e+07
Adj Close    2.069910e+01
Name: 2011-03-11 00:00:00, dtype: float64

In [59]:
stocks.loc['2016-01-01']

KeyError: '2016-01-01'

In [60]:
stocks.loc['2013-10-01':'2013-10-07']

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-10-01,33.610001,33.299999,33.349998,33.580002,36718700.0,29.043051
2013-10-02,34.029999,33.290001,33.360001,33.919998,46946800.0,29.337111
2013-10-03,34.0,33.419998,33.880001,33.860001,38703800.0,29.285215
2013-10-04,33.990002,33.619999,33.689999,33.880001,33008100.0,29.302513
2013-10-07,33.709999,33.200001,33.599998,33.299999,35069300.0,28.800877


In [62]:
birthdays = pd.date_range(start= '1993-02-13', end= '2017-12-31', freq=pd.DateOffset(years = 1))

In [64]:
mask = stocks.index.isin(birthdays)

In [66]:
stocks.loc[mask]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2012-02-13,30.77,30.43,30.629999,30.58,33319800.0,25.154863
2013-02-13,28.110001,27.879999,27.93,28.030001,41715600.0,23.709335
2014-02-13,37.860001,37.330002,37.330002,37.610001,37635500.0,32.775257
2015-02-13,43.869999,43.150002,43.380001,43.869999,40264900.0,39.277378
2017-02-13,64.860001,64.129997,64.239998,64.720001,22920100.0,61.155426


<h3>Timestamp Object Attributes</h3>

In [71]:
someday = stocks.index[500]
someday.day
someday.month
someday.year
# someday.weekday_name
someday.is_month_end
someday.is_month_start
someday.weekday

<function Timestamp.weekday>

In [72]:
stocks.insert(0,'Year',stocks.index.year)

In [73]:
stocks.head()

Unnamed: 0_level_0,Year,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2009-12-31,2009,30.99,30.48,30.98,30.48,31929700.0,23.92544
2010-01-04,2010,31.1,30.59,30.620001,30.950001,38409100.0,24.294369
2010-01-05,2010,31.1,30.639999,30.85,30.959999,49749600.0,24.302216
2010-01-06,2010,31.08,30.52,30.879999,30.77,58182400.0,24.15307
2010-01-07,2010,30.700001,30.190001,30.629999,30.450001,50559700.0,23.901886


In [74]:
stocks.insert(1,'Is Start of Month', stocks.index.is_month_start)

In [75]:
stocks.head()

Unnamed: 0_level_0,Year,Is Start of Month,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2009-12-31,2009,False,30.99,30.48,30.98,30.48,31929700.0,23.92544
2010-01-04,2010,False,31.1,30.59,30.620001,30.950001,38409100.0,24.294369
2010-01-05,2010,False,31.1,30.639999,30.85,30.959999,49749600.0,24.302216
2010-01-06,2010,False,31.08,30.52,30.879999,30.77,58182400.0,24.15307
2010-01-07,2010,False,30.700001,30.190001,30.629999,30.450001,50559700.0,23.901886


In [76]:
stocks[stocks['Is Start of Month']]

Unnamed: 0_level_0,Year,Is Start of Month,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-02-01,2010,True,28.480000,27.920000,28.389999,28.410000,85931100.0,22.300581
2010-03-01,2010,True,29.049999,28.530001,28.770000,29.020000,43805400.0,22.885929
2010-04-01,2010,True,29.540001,28.620001,29.350000,29.160000,74768100.0,22.996336
2010-06-01,2010,True,26.309999,25.520000,25.530001,25.889999,76152400.0,20.509659
2010-07-01,2010,True,23.320000,22.730000,23.090000,23.160000,92239400.0,18.346998
...,...,...,...,...,...,...,...,...
2017-06-01,2017,True,70.610001,69.449997,70.239998,70.099998,21603600.0,67.022667
2017-08-01,2017,True,73.419998,72.489998,73.099998,72.580002,22132300.0,69.393814
2017-09-01,2017,True,74.739998,73.639999,74.709999,73.940002,21736200.0,71.070747
2017-11-01,2017,True,83.760002,82.879997,83.680000,83.180000,22307400.0,79.952187


<h3>The .truncate() Method</h3>

In [78]:
stocks.truncate(before='2011-02-05', after='2011-02-28')
# you can use loc, iloc also here

Unnamed: 0_level_0,Year,Is Start of Month,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2011-02-07,2011,False,28.34,27.790001,27.799999,28.200001,68980900.0,22.596769
2011-02-08,2011,False,28.34,28.049999,28.1,28.280001,34904200.0,22.66087
2011-02-09,2011,False,28.26,27.91,28.190001,27.969999,52905100.0,22.412468
2011-02-10,2011,False,27.940001,27.290001,27.93,27.5,76672400.0,22.03586
2011-02-11,2011,False,27.809999,27.07,27.76,27.25,83939700.0,21.835524
2011-02-14,2011,False,27.27,26.950001,27.209999,27.23,56766200.0,21.819502
2011-02-15,2011,False,27.33,26.950001,27.040001,26.959999,44116500.0,21.730837
2011-02-16,2011,False,27.07,26.6,27.049999,27.02,70817900.0,21.779205
2011-02-17,2011,False,27.370001,26.91,26.969999,27.209999,57207300.0,21.932344
2011-02-18,2011,False,27.209999,26.99,27.129999,27.059999,68667800.0,21.811447


<h3>pd.DateOffset Objects</h3>

In [88]:
from pandas.tseries.offsets import *

In [79]:
stocks = data.DataReader(name='GOOG', data_source='yahoo', start= dt.date(2000,1,1), end=dt.datetime.now())
stocks.tail()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-05-07,1377.599976,1355.27002,1365.939941,1372.560059,1397600.0,1372.560059
2020-05-08,1398.76001,1375.47998,1383.130005,1388.369995,1388100.0,1388.369995
2020-05-11,1416.530029,1377.151978,1378.280029,1403.26001,1410600.0,1403.26001
2020-05-12,1415.0,1374.77002,1407.119995,1375.73999,1390600.0,1375.73999
2020-05-13,1385.482056,1328.400024,1377.050049,1349.329956,1810600.0,1349.329956


In [83]:
stocks.index + pd.DateOffset(days = 5) # add 5 days in each index

output= None

<h3>More Fun with pd.DateOffset Objects</h3>

In [97]:
stocks.index + pd.tseries.offsets.MonthEnd()
# add appropriate offset to every date to make it next month end
stocks.index - pd.tseries.offsets.MonthEnd()
# add appropriate offset to every date to make it last month end
stocks.index + pd.tseries.offsets.MonthBegin()
stocks.index - pd.tseries.offsets.MonthBegin()
stocks.index - MonthBegin()
stocks.index - BMonthEnd()
stocks.index + QuarterEnd()
stocks.index - QuarterBegin()
stocks.index + YearEnd()
stocks.index + YearBegin()

DatetimeIndex(['2005-01-01', '2005-01-01', '2005-01-01', '2005-01-01',
               '2005-01-01', '2005-01-01', '2005-01-01', '2005-01-01',
               '2005-01-01', '2005-01-01',
               ...
               '2021-01-01', '2021-01-01', '2021-01-01', '2021-01-01',
               '2021-01-01', '2021-01-01', '2021-01-01', '2021-01-01',
               '2021-01-01', '2021-01-01'],
              dtype='datetime64[ns]', name='Date', length=3961, freq=None)

<h3>The pandas Timedelta Object</h3><br>
Represents a time duration/ time span

In [100]:
timeA = pd.Timestamp('2016-03-31 04:35:16 PM')
timeB = pd.Timestamp('2016-03-20 02:16:49 AM')

In [101]:
timeA- timeB

Timedelta('11 days 14:18:27')

In [102]:
timeB- timeA

Timedelta('-12 days +09:41:33')

In [106]:
pd.Timedelta(days=3, minutes=45, hours=12, weeks=8)

Timedelta('59 days 12:45:00')

In [108]:
pd.Timedelta(days=3, minutes=45, hours=12, weeks=8, years=2)

ValueError: cannot construct a Timedelta from the passed arguments, allowed keywords are [weeks, days, hours, minutes, seconds, milliseconds, microseconds, nanoseconds]

In [109]:
pd.Timedelta('5 minutes')

Timedelta('0 days 00:05:00')

In [110]:
pd.Timedelta('6 hours 12 minutes')

Timedelta('0 days 06:12:00')

In [112]:
pd.Timedelta('14 days 6 hours 12 minutes 49 seconds')

Timedelta('14 days 06:12:49')

<h3>Timedeltas in a Dataset</h3>

In [115]:
shipping = pd.read_csv('resources/ecommerce.csv', index_col='ID', parse_dates=['order_date', 'delivery_date'])
shipping.head()

Unnamed: 0_level_0,order_date,delivery_date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1998-05-24,1999-02-05
2,1992-04-22,1998-03-06
4,1991-02-10,1992-08-26
5,1992-07-21,1997-11-20
7,1993-09-02,1998-06-10


In [118]:
shipping['Delivery_time'] = shipping['delivery_date'] - shipping['order_date']

In [119]:
shipping.head()

Unnamed: 0_level_0,order_date,delivery_date,Delivery_time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1998-05-24,1999-02-05,257 days
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days
5,1992-07-21,1997-11-20,1948 days
7,1993-09-02,1998-06-10,1742 days


In [122]:
shipping['Twice As Long'] = shipping['delivery_date'] + shipping['Delivery_time']

In [123]:
shipping.head()

Unnamed: 0_level_0,order_date,delivery_date,Delivery_time,Twice As Long
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1998-05-24,1999-02-05,257 days,1999-10-20
2,1992-04-22,1998-03-06,2144 days,2004-01-18
4,1991-02-10,1992-08-26,563 days,1994-03-12
5,1992-07-21,1997-11-20,1948 days,2003-03-22
7,1993-09-02,1998-06-10,1742 days,2003-03-18


In [124]:
shipping.dtypes

order_date        datetime64[ns]
delivery_date     datetime64[ns]
Delivery_time    timedelta64[ns]
Twice As Long     datetime64[ns]
dtype: object

In [127]:
mask = shipping['Delivery_time'] > '3000 days'
shipping[mask]

Unnamed: 0_level_0,order_date,delivery_date,Delivery_time,Twice As Long
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
32,1990-01-20,1998-07-24,3107 days,2007-01-25
130,1990-04-02,1999-08-16,3423 days,2008-12-29
151,1991-01-29,1999-08-05,3110 days,2008-02-09
229,1990-04-13,1998-11-17,3140 days,2007-06-23
314,1990-03-07,1999-12-25,3580 days,2009-10-13
331,1990-09-18,1999-12-19,3379 days,2009-03-20
348,1990-02-27,1999-01-04,3233 days,2007-11-11
392,1990-12-24,1999-12-04,3267 days,2008-11-13
590,1990-03-25,1998-12-20,3192 days,2007-09-16
634,1991-04-04,1999-07-21,3030 days,2007-11-06


In [128]:
shipping['Delivery_time'].max()

Timedelta('3583 days 00:00:00')

In [129]:
shipping['Delivery_time'].min()

Timedelta('8 days 00:00:00')