In [1]:
import pandas as pd
import numpy as np
from datetime import date, time, datetime

#### 231. The Python datetime Module

In [2]:
date_A = date(2022,4,21)

In [3]:
date_A.year

2022

In [4]:
date_A.day

21

In [5]:
date_A.isoformat()

'2022-04-21'

In [6]:
time_A = time(second=9)

In [7]:
time(minute=30).isoformat()

'00:30:00'

In [8]:
time_A = time(23,54,21)

In [9]:
time_A.isoformat()

'23:54:21'

In [10]:
datetime_A = datetime(2022,4,21,23,54,21,323423)

In [11]:
print(datetime_A.microsecond)

323423


#### 232. Parsing Dates From Text

https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes

In [12]:
dt_B = datetime.strptime('2019-10-31','%Y-%m-%d')
dt_B.isoformat()

'2019-10-31T00:00:00'

In [13]:
try_this = 'jan 20 2020 4pm'

datetime.strptime(try_this,'%b %d %Y %I%p').isoformat()

'2020-01-20T16:00:00'

#### 233. Even Better: dateutil

In [14]:
!pip show python-dateutil

Name: python-dateutil
Version: 2.8.2
Summary: Extensions to the standard Python datetime module
Home-page: https://github.com/dateutil/dateutil
Author: Gustavo Niemeyer
Author-email: gustavo@niemeyer.net
License: Dual License
Location: C:\Users\161230\AppData\Roaming\Python\Python311\site-packages
Requires: six
Required-by: arrow, celery, jupyter_client, matplotlib, pandas


In [15]:
from dateutil import parser

In [16]:
parser.parse('jan 21st 1990')

datetime.datetime(1990, 1, 21, 0, 0)

In [17]:
parser.parse('22 april 2068 at 4pm and 17 minutes;; 20 seconds')

datetime.datetime(2068, 4, 22, 16, 17, 20)

#### 234. From Datetime to String

In [18]:
dt = datetime.now()
dt

datetime.datetime(2024, 2, 21, 15, 40, 34, 345808)

In [19]:
dt.strftime('Year %Y; Month: %m; Day: %d')

'Year 2024; Month: 02; Day: 21'

In [20]:
dt.strftime('%c')

'Wed Feb 21 15:40:34 2024'

In [21]:
#an alternative approach:
"My date is {:%c}".format(dt)

'My date is Wed Feb 21 15:40:34 2024'

#### 235. Perdormant Datetimes Wiht Numpy

In [22]:
a = np.datetime64('2020-03-04')
b = np.datetime64(datetime.now())

In [23]:
b + 10

numpy.datetime64('2024-02-21T15:40:34.571878')

In [24]:
a + 10

numpy.datetime64('2020-03-14')

In [25]:
np.datetime64(b,'D') + 10

numpy.datetime64('2024-03-02')

In [26]:
# vectorize ops
dates = np.array([
    '2019-02-20',
    '2019-02-10',
    '2019-04-15'
],dtype=np.datetime64)

In [27]:
dates - 10

array(['2019-02-10', '2019-01-31', '2019-04-05'], dtype='datetime64[D]')

In [28]:
# ... what about business day?
print(dates)

np.busday_offset(dates,-10,'backward')

['2019-02-20' '2019-02-10' '2019-04-15']


array(['2019-02-06', '2019-01-25', '2019-04-01'], dtype='datetime64[D]')

#### 236. The Pandas Timestamp

In [29]:
pd.Timestamp("4th of July 1776")

Timestamp('1776-07-04 00:00:00')

In [30]:
pd.Timestamp("4/7/1776")

Timestamp('1776-04-07 00:00:00')

In [31]:
pdts = pd.to_datetime('4/7/1776',dayfirst = True)

In [32]:
pdts.day_name()

'Thursday'

In [33]:
pdts.days_in_month

31

In [34]:
pdts.isocalendar()

datetime.IsoCalendarDate(year=1776, week=27, weekday=4)

#### 237. Our Dataset: Brent Prices

https://andybek.com/pandas-oil

In [39]:
oil_url = 'https://andybek.com/pandas-oil'

brent = pd.read_csv(oil_url)

In [46]:
brent

Unnamed: 0,Date,Price
0,04-Jan-00,23.95
1,05-Jan-00,23.72
2,06-Jan-00,23.55
3,07-Jan-00,23.35
4,10-Jan-00,22.77
...,...,...
5011,24-Sep-19,64.13
5012,25-Sep-19,62.41
5013,26-Sep-19,62.08
5014,27-Sep-19,62.48


In [41]:
brent.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5016 entries, 0 to 5015
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    5016 non-null   object 
 1   Price   5016 non-null   float64
dtypes: float64(1), object(1)
memory usage: 362.6 KB


In [47]:
# brent['Date'] = pd.to_datetime(brent['Date'], format='%Y-%m-%d')
brent['Date'] = pd.to_datetime(brent['Date'], format='%d-%b-%y')


In [51]:
brent.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5016 entries, 0 to 5015
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    5016 non-null   datetime64[ns]
 1   Price   5016 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 78.5 KB


In [52]:
brent.set_index('Date',inplace=True)

In [53]:
brent.head()

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2000-01-04,23.95
2000-01-05,23.72
2000-01-06,23.55
2000-01-07,23.35
2000-01-10,22.77


In [54]:
brent.index

DatetimeIndex(['2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07',
               '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13',
               '2000-01-14', '2000-01-17',
               ...
               '2019-09-17', '2019-09-18', '2019-09-19', '2019-09-20',
               '2019-09-23', '2019-09-24', '2019-09-25', '2019-09-26',
               '2019-09-27', '2019-09-30'],
              dtype='datetime64[ns]', name='Date', length=5016, freq=None)

#### 239. A Cool Shortcut: read_csv() With parse_dates

In [55]:
brent2 = pd.read_csv(oil_url,index_col=0,parse_dates = True)

  brent2 = pd.read_csv(oil_url,index_col=0,parse_dates = True)


In [56]:
brent.index

DatetimeIndex(['2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07',
               '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13',
               '2000-01-14', '2000-01-17',
               ...
               '2019-09-17', '2019-09-18', '2019-09-19', '2019-09-20',
               '2019-09-23', '2019-09-24', '2019-09-25', '2019-09-26',
               '2019-09-27', '2019-09-30'],
              dtype='datetime64[ns]', name='Date', length=5016, freq=None)

#### 240.Indexing Dates

In [57]:
brent.loc['2019-01'].count()

Price    22
dtype: int64

In [58]:
brent.loc['2019-01':'2019-02-15'].count()

Price    33
dtype: int64

In [59]:
# this is only when in the index we have a datetime

#### 241. Skill Challenge

##### Using the brent time series, create a shorter dataframe that only contains the dates from 1 December 2015 to 31 March 2016

In [60]:
brent.loc['2015-12':'2016-03']

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2015-12-01,42.97
2015-12-02,41.92
2015-12-03,42.00
2015-12-04,41.44
2015-12-07,39.69
...,...
2016-03-24,38.33
2016-03-28,38.33
2016-03-29,36.75
2016-03-30,36.75


##### Repeat the above, but this time using a partial string indexing instead of specifying full dates. What was the standard deviation of prices during

In [61]:
brent.loc['2015-12':'2016-03'].std()

Price    3.990123
dtype: float64

##### Was the mean price from February 2018 greater or less than the median price from March 2017?

In [62]:
brent.loc['2018-02'].mean() > brent.loc['2017-03'].median()

Price    True
dtype: bool

#### 243. DateTimeIndex Attribuute Accessors

In [63]:
brent['Dayname'] = brent.index.day_name()

In [64]:
brent.head()

Unnamed: 0_level_0,Price,Dayname
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-01-04,23.95,Tuesday
2000-01-05,23.72,Wednesday
2000-01-06,23.55,Thursday
2000-01-07,23.35,Friday
2000-01-10,22.77,Monday


In [65]:
brent.groupby('Dayname').mean().idxmin()

Price    Monday
dtype: object

In [66]:
brent.drop(columns='Dayname',inplace=True)

In [67]:
brent.head()

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2000-01-04,23.95
2000-01-05,23.72
2000-01-06,23.55
2000-01-07,23.35
2000-01-10,22.77


In [68]:
# bool masks with datetimeindex  attributes

# Q: find the average Brent price in all leap year Februaries

brent[(brent.index.is_leap_year) & (brent.index.month == 2)].mean()

Price    60.696634
dtype: float64

#### Creating Date Ranges

In [69]:
brent.index

DatetimeIndex(['2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07',
               '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13',
               '2000-01-14', '2000-01-17',
               ...
               '2019-09-17', '2019-09-18', '2019-09-19', '2019-09-20',
               '2019-09-23', '2019-09-24', '2019-09-25', '2019-09-26',
               '2019-09-27', '2019-09-30'],
              dtype='datetime64[ns]', name='Date', length=5016, freq=None)

In [72]:
pd.date_range(start='10 may 2020',end= '20 may 2020')

DatetimeIndex(['2020-05-10', '2020-05-11', '2020-05-12', '2020-05-13',
               '2020-05-14', '2020-05-15', '2020-05-16', '2020-05-17',
               '2020-05-18', '2020-05-19', '2020-05-20'],
              dtype='datetime64[ns]', freq='D')

In [75]:
pd.date_range(start='10 may 2020',end= '20/5/2020')

DatetimeIndex(['2020-05-10', '2020-05-11', '2020-05-12', '2020-05-13',
               '2020-05-14', '2020-05-15', '2020-05-16', '2020-05-17',
               '2020-05-18', '2020-05-19', '2020-05-20'],
              dtype='datetime64[ns]', freq='D')

In [81]:
# 10 WEEKS
pd.date_range(start='10 may 2020',periods=10, freq='W')

DatetimeIndex(['2020-05-10', '2020-05-17', '2020-05-24', '2020-05-31',
               '2020-06-07', '2020-06-14', '2020-06-21', '2020-06-28',
               '2020-07-05', '2020-07-12'],
              dtype='datetime64[ns]', freq='W-SUN')

In [83]:
# 10 WEEK 2 WEEK 
pd.date_range(start='10 may 2020',periods=10, freq='2W')

DatetimeIndex(['2020-05-10', '2020-05-24', '2020-06-07', '2020-06-21',
               '2020-07-05', '2020-07-19', '2020-08-02', '2020-08-16',
               '2020-08-30', '2020-09-13'],
              dtype='datetime64[ns]', freq='2W-SUN')

In [87]:
# the 20 month ends before jan 2021
dti = pd.date_range(end='jan 2021',periods=10, freq='M')

In [91]:
pd.DataFrame(index=dti, data=np.random.randint(100,size=(10,4)),columns=list('abcd'))

Unnamed: 0,a,b,c,d
2020-03-31,38,64,88,40
2020-04-30,57,98,26,3
2020-05-31,34,14,57,53
2020-06-30,64,31,58,41
2020-07-31,43,41,21,71
2020-08-31,91,10,28,91
2020-09-30,93,91,84,95
2020-10-31,37,96,35,86
2020-11-30,55,56,25,4
2020-12-31,78,15,33,75


#### 245. Shifting Dates With pd.DateOffset

In [93]:
dob = pd.Timestamp('2020-03-09') 

In [97]:
try:
    dob - 18
except Exception as e:
    print(e)

Addition/subtraction of integers and integer-arrays with Timestamp is no longer supported.  Instead of adding/subtracting `n`, use `n * obj.freq`


In [99]:
dob - pd.DateOffset(days=18)

Timestamp('2020-02-20 00:00:00')

In [101]:
# Dateoffset suports many tempral params
pd.DateOffset(days=5, minutes=10,nanosecond=2)

<DateOffset: days=5, minutes=10, nanosecond=2>

In [102]:
brent.head()

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2000-01-04,23.95
2000-01-05,23.72
2000-01-06,23.55
2000-01-07,23.35
2000-01-10,22.77


In [105]:
brent.set_index(brent.index + pd.DateOffset(hours=18))

Unnamed: 0_level_0,Price
Date,Unnamed: 1_level_1
2000-01-04 18:00:00,23.95
2000-01-05 18:00:00,23.72
2000-01-06 18:00:00,23.55
2000-01-07 18:00:00,23.35
2000-01-10 18:00:00,22.77
...,...
2019-09-24 18:00:00,64.13
2019-09-25 18:00:00,62.41
2019-09-26 18:00:00,62.08
2019-09-27 18:00:00,62.48


In [107]:
brent.set_index(brent.index + pd.DateOffset(hours=18)).index

DatetimeIndex(['2000-01-04 18:00:00', '2000-01-05 18:00:00',
               '2000-01-06 18:00:00', '2000-01-07 18:00:00',
               '2000-01-10 18:00:00', '2000-01-11 18:00:00',
               '2000-01-12 18:00:00', '2000-01-13 18:00:00',
               '2000-01-14 18:00:00', '2000-01-17 18:00:00',
               ...
               '2019-09-17 18:00:00', '2019-09-18 18:00:00',
               '2019-09-19 18:00:00', '2019-09-20 18:00:00',
               '2019-09-23 18:00:00', '2019-09-24 18:00:00',
               '2019-09-25 18:00:00', '2019-09-26 18:00:00',
               '2019-09-27 18:00:00', '2019-09-30 18:00:00'],
              dtype='datetime64[ns]', name='Date', length=5016, freq=None)