In [2]:
import pandas as pd
import datetime as dt

In [7]:
# datetime module
someday = dt.date(2016, 4, 12)
someday.year

2016

In [11]:
str(dt.datetime(2010,1,10, 17, 13, 57))

'2010-01-10 17:13:57'

In [14]:
#Timestamp object
pd.Timestamp('2015-03-31')
pd.Timestamp('12-2-2021')
pd.Timestamp('2-20-2021')

Timestamp('2021-02-20 00:00:00')

In [15]:
#DateTimeIndex obj
dates = ["2020-01-02", "2012-12-2", "2003-06-12"]
pd.DatetimeIndex(dates)

DatetimeIndex(['2020-01-02', '2012-12-02', '2003-06-12'], dtype='datetime64[ns]', freq=None)

In [17]:
dates = [dt.date(2020, 1, 20), dt.date(1995, 2, 12), dt.date(2005, 3, 4)]
dtIndex = pd.DatetimeIndex(dates)
values = [100, 200, 300]
pd.Series(data = values, index = dtIndex)

2020-01-20    100
1995-02-12    200
2005-03-04    300
dtype: int64

In [20]:
# pd.to_datetime() method
pd.to_datetime("2001-12-21")
pd.to_datetime(["2015-03-12", "2023/12/1", "June 4th, 1899"])

DatetimeIndex(['2015-03-12', '2023-12-01', '1899-06-04'], dtype='datetime64[ns]', freq=None)

In [21]:
times = pd.Series(["2015-03-12", "2023/12/1", "June 4th, 1899"])
times

0        2015-03-12
1         2023/12/1
2    June 4th, 1899
dtype: object

In [22]:
pd.to_datetime(times)

0   2015-03-12
1   2023-12-01
2   1899-06-04
dtype: datetime64[ns]

In [24]:
times = pd.Series(["Hello", "2023/2/31", "June 4th, 1899"])
times

0             Hello
1         2023/2/31
2    June 4th, 1899
dtype: object

In [26]:
pd.to_datetime(times, errors = "coerce")

0          NaT
1          NaT
2   1899-06-04
dtype: datetime64[ns]

In [29]:
#pd.date_range()
pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = 'D')
pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = 'B')
pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = 'W') #A lot of options for freq arguments, check the docs

DatetimeIndex(['2016-01-03', '2016-01-10'], dtype='datetime64[ns]', freq='W-SUN')

In [30]:
pd.date_range(start = "2016-01-01", periods = 25, freq = 'D')

DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',
               '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',
               '2016-01-09', '2016-01-10', '2016-01-11', '2016-01-12',
               '2016-01-13', '2016-01-14', '2016-01-15', '2016-01-16',
               '2016-01-17', '2016-01-18', '2016-01-19', '2016-01-20',
               '2016-01-21', '2016-01-22', '2016-01-23', '2016-01-24',
               '2016-01-25'],
              dtype='datetime64[ns]', freq='D')

In [31]:
pd.date_range(end="1999-12-31", periods = 20, freq = 'D')

DatetimeIndex(['1999-12-12', '1999-12-13', '1999-12-14', '1999-12-15',
               '1999-12-16', '1999-12-17', '1999-12-18', '1999-12-19',
               '1999-12-20', '1999-12-21', '1999-12-22', '1999-12-23',
               '1999-12-24', '1999-12-25', '1999-12-26', '1999-12-27',
               '1999-12-28', '1999-12-29', '1999-12-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='D')

In [35]:
# dt accessor
dates = pd.date_range(start="2000-01-01", end = "2010-12-31", freq = "24D")
s = pd.Series(dates)
s.head(3)

0   2000-01-01
1   2000-01-25
2   2000-02-18
dtype: datetime64[ns]

In [37]:
df = pd.DataFrame(dates)
df.rename(columns={0: "Date"}, inplace=True)
df["Date"].dt.day_name()

0       Saturday
1        Tuesday
2         Friday
3         Monday
4       Thursday
         ...    
163       Friday
164       Monday
165     Thursday
166       Sunday
167    Wednesday
Name: Date, Length: 168, dtype: object

In [38]:
mask = s.dt.is_quarter_start
s[mask]

0     2000-01-01
19    2001-04-01
38    2002-07-01
137   2009-01-01
dtype: datetime64[ns]

In [1]:
# financial dataset
import pandas as pd
import datetime as dt
from pandas_datareader import data

In [5]:
stocks = data.DataReader(name="MSFT", data_source = "stooq", start = "2010-01-01", end="2020-12-31")
stocks.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-12-31,217.822,219.104,215.838,218.528,21315550.0
2020-12-30,221.277,221.674,217.595,217.802,20633810.0
2020-12-29,222.35,223.214,219.659,220.205,17713530.0


In [6]:
stocks.loc[pd.Timestamp("2010-01-04")]

Open      2.408540e+01
High      2.443890e+01
Low       2.403580e+01
Close     2.431780e+01
Volume    4.886916e+07
Name: 2010-01-04 00:00:00, dtype: float64

In [8]:
stocks.loc[[pd.Timestamp("2010-01-04"), pd.Timestamp("2010-01-05")]]

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,24.0854,24.4389,24.0358,24.3178,48869160.0
2010-01-05,24.2542,24.4389,24.0765,24.3267,63311740.0


In [10]:
stocks.loc["2013-01-01" : "2013-02-01"]
stocks.truncate(before="2013-01-01", after = "2013-01-10")
# not exclusive for last index

  stocks.loc["2013-01-01" : "2013-02-01"]


Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-01-10,21.9729,22.2498,21.6761,21.819,86609750.0
2013-01-09,22.0314,22.0543,21.9024,22.0166,59473700.0
2013-01-08,22.0543,22.0871,21.819,21.8935,54201770.0
2013-01-07,22.0691,22.1654,21.9639,22.0096,45010040.0
2013-01-04,22.4841,22.5427,22.0394,22.0464,63688220.0
2013-01-03,22.785,22.8018,22.3978,22.4693,58569830.0
2013-01-02,22.4693,22.8654,22.3908,22.776,64157100.0


In [12]:
#exclusive for last index
stocks.iloc[1000:1005]

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-11,57.6727,58.2436,57.5069,58.2079,22181170.0
2017-01-10,57.783,58.0976,57.3689,57.6816,18494590.0
2017-01-09,57.8097,58.1075,57.6073,57.7015,20185320.0
2017-01-06,57.3868,58.1701,57.1456,57.8842,19079290.0
2017-01-05,57.2856,57.7193,57.1357,57.3868,23183540.0


In [15]:
birthdays = pd.date_range(start="2001-06-24", end="2023-12-31", freq = pd.DateOffset(years = 1))

In [16]:
birthday_stocks = stocks.index.isin(birthdays)

In [17]:
stocks[birthday_stocks]

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-06-24,197.084,198.702,192.159,193.4,37584310.0
2019-06-24,132.295,133.646,132.295,133.041,21362210.0
2016-06-24,45.288,46.3195,45.0279,45.3079,69240240.0
2015-06-24,40.4055,40.9049,40.2993,40.3688,39437770.0
2014-06-24,36.0543,36.1496,35.8229,35.9768,30765060.0
2013-06-24,27.579,28.6334,27.2693,28.2322,67020980.0
2011-06-24,19.3956,19.4106,19.1306,19.2199,127772100.0
2010-06-24,20.0042,20.2097,19.5902,19.6428,108442200.0


In [18]:
stocks = data.DataReader(name="MSFT", data_source = "stooq", start = "2010-01-01", end="2020-12-31")
stocks.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-12-31,217.822,219.104,215.838,218.528,21315550.0
2020-12-30,221.277,221.674,217.595,217.802,20633810.0
2020-12-29,222.35,223.214,219.659,220.205,17713530.0


In [19]:
someday=stocks.index[500]

In [21]:
someday.month

1

In [22]:
stocks.insert(1, "Is Start of Month", stocks.index.is_month_start)

In [23]:
stocks[stocks["Is Start of Month"]]

Unnamed: 0_level_0,Open,Is Start of Month,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-12-01,210.7540,True,213.5230,209.6020,212.4220,3.148286e+07
2020-10-01,209.1950,True,209.6820,207.0900,208.2030,2.771505e+07
2020-09-01,220.9700,True,222.8960,219.9170,222.7170,2.631985e+07
2020-07-01,198.5920,True,201.7210,197.2420,200.1020,3.279741e+07
2020-06-01,178.4290,True,178.8860,177.3870,178.7180,2.318935e+07
...,...,...,...,...,...,...
2010-07-01,18.1428,True,18.3235,17.8589,18.1964,1.173523e+08
2010-06-01,20.0906,True,20.6763,20.0559,20.3438,9.687786e+07
2010-04-01,23.0619,True,23.2089,22.4861,22.9120,9.513903e+07
2010-03-01,22.6053,True,22.8277,22.4196,22.8048,5.570803e+07


In [24]:
stocks = data.DataReader(name="MSFT", data_source = "stooq", start = "2010-01-01", end="2020-12-31")
stocks.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-12-31,217.822,219.104,215.838,218.528,21315550.0
2020-12-30,221.277,221.674,217.595,217.802,20633810.0
2020-12-29,222.35,223.214,219.659,220.205,17713530.0


In [25]:
stocks.index + pd.DateOffset(days = 5)

DatetimeIndex(['2021-01-05', '2021-01-04', '2021-01-03', '2021-01-02',
               '2020-12-29', '2020-12-28', '2020-12-27', '2020-12-26',
               '2020-12-23', '2020-12-22',
               ...
               '2010-01-20', '2010-01-19', '2010-01-18', '2010-01-17',
               '2010-01-16', '2010-01-13', '2010-01-12', '2010-01-11',
               '2010-01-10', '2010-01-09'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [26]:
stocks.index - pd.DateOffset(days = 5)

DatetimeIndex(['2020-12-26', '2020-12-25', '2020-12-24', '2020-12-23',
               '2020-12-19', '2020-12-18', '2020-12-17', '2020-12-16',
               '2020-12-13', '2020-12-12',
               ...
               '2010-01-10', '2010-01-09', '2010-01-08', '2010-01-07',
               '2010-01-06', '2010-01-03', '2010-01-02', '2010-01-01',
               '2009-12-31', '2009-12-30'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [27]:
stocks.index + pd.DateOffset(weeks = 2)

DatetimeIndex(['2021-01-14', '2021-01-13', '2021-01-12', '2021-01-11',
               '2021-01-07', '2021-01-06', '2021-01-05', '2021-01-04',
               '2021-01-01', '2020-12-31',
               ...
               '2010-01-29', '2010-01-28', '2010-01-27', '2010-01-26',
               '2010-01-25', '2010-01-22', '2010-01-21', '2010-01-20',
               '2010-01-19', '2010-01-18'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [28]:
stocks = data.DataReader(name="MSFT", data_source = "stooq", start = "2010-01-01", end="2020-12-31")
stocks.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-12-31,217.822,219.104,215.838,218.528,21315550.0
2020-12-30,221.277,221.674,217.595,217.802,20633810.0
2020-12-29,222.35,223.214,219.659,220.205,17713530.0


In [29]:
stocks.index + pd.tseries.offsets.MonthBegin()

DatetimeIndex(['2021-01-01', '2021-01-01', '2021-01-01', '2021-01-01',
               '2021-01-01', '2021-01-01', '2021-01-01', '2021-01-01',
               '2021-01-01', '2021-01-01',
               ...
               '2010-02-01', '2010-02-01', '2010-02-01', '2010-02-01',
               '2010-02-01', '2010-02-01', '2010-02-01', '2010-02-01',
               '2010-02-01', '2010-02-01'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [30]:
from pandas.tseries import offsets

In [31]:
stocks.index + offsets.MonthEnd()

DatetimeIndex(['2021-01-31', '2020-12-31', '2020-12-31', '2020-12-31',
               '2020-12-31', '2020-12-31', '2020-12-31', '2020-12-31',
               '2020-12-31', '2020-12-31',
               ...
               '2010-01-31', '2010-01-31', '2010-01-31', '2010-01-31',
               '2010-01-31', '2010-01-31', '2010-01-31', '2010-01-31',
               '2010-01-31', '2010-01-31'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [32]:
stocks.tail(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-06,24.2642,24.423,23.9852,24.1758,74032540.0
2010-01-05,24.2542,24.4389,24.0765,24.3267,63311740.0
2010-01-04,24.0854,24.4389,24.0358,24.3178,48869160.0


In [33]:
stocks.index + offsets.BMonthEnd()

DatetimeIndex(['2021-01-29', '2020-12-31', '2020-12-31', '2020-12-31',
               '2020-12-31', '2020-12-31', '2020-12-31', '2020-12-31',
               '2020-12-31', '2020-12-31',
               ...
               '2010-01-29', '2010-01-29', '2010-01-29', '2010-01-29',
               '2010-01-29', '2010-01-29', '2010-01-29', '2010-01-29',
               '2010-01-29', '2010-01-29'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [35]:
time_a = pd.Timestamp("2020-03-31")
time_b = pd.Timestamp("2020-03-20")
time_a - time_b

Timedelta('11 days 00:00:00')

In [36]:
time_b - time_a

Timedelta('-11 days +00:00:00')

In [38]:
time_a + pd.Timedelta(days = 3, hours = 12, weeks = 2)

Timestamp('2020-04-17 12:00:00')

In [40]:
shipping = pd.read_csv("ecommerce.csv", index_col="ID", parse_dates = ["order_date", "delivery_date"])
shipping.head(3)

Unnamed: 0_level_0,order_date,delivery_date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1998-05-24,1999-02-05
2,1992-04-22,1998-03-06
4,1991-02-10,1992-08-26


In [42]:
shipping["Delivery Time"] = shipping["delivery_date"] - shipping["order_date"]
shipping.head(3)

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1998-05-24,1999-02-05,257 days
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days


In [43]:
shipping["delivery_date"] + shipping["Delivery Time"]

ID
1     1999-10-20
2     2004-01-18
4     1994-03-12
5     2003-03-22
7     2003-03-18
         ...    
990   2000-09-12
991   2004-10-18
993   2005-10-06
994   1993-06-23
997   1993-07-01
Length: 501, dtype: datetime64[ns]

In [44]:
mask = shipping["Delivery Time"] > "365 days"
shipping[mask]

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days
5,1992-07-21,1997-11-20,1948 days
7,1993-09-02,1998-06-10,1742 days
9,1990-01-25,1994-10-02,1711 days
...,...,...,...
986,1990-12-10,1992-12-16,737 days
990,1991-06-24,1996-02-02,1684 days
991,1991-09-09,1998-03-30,2394 days
993,1990-11-16,1998-04-27,2719 days


In [45]:
shipping["Delivery Time"].min()

Timedelta('8 days 00:00:00')