In [2]:
import pandas as pd

1. Convert string to datetime

In [14]:
df = pd.DataFrame({'date': ['3/10/2000', '3/11/2000', '3/12/2000'],
                   'value': [2, 3, 4]})

- Default arguments (month first)

In [13]:
pd.to_datetime(df.date)

0   2000-03-10
1   2000-03-11
2   2000-03-12
Name: date, dtype: datetime64[ns]

- Day first

In [15]:
pd.to_datetime(df.date, dayfirst=True)

0   2000-10-03
1   2000-11-03
2   2000-12-03
Name: date, dtype: datetime64[ns]

- Custom format 

In [23]:
df = pd.DataFrame({'date': ['2016-6-10 20:30:0', 
                            '2016-7-1 19:45:30', 
                            '2013-10-12 4:5:1'],
                   'value': [2, 3, 4]})

df['date'] = pd.to_datetime(df['date'], format="%Y-%d-%m %H:%M:%S")
df

Unnamed: 0,date,value
0,2016-10-06 20:30:00,2
1,2016-01-07 19:45:30,3
2,2013-12-10 04:05:01,4


- Infer the format 

In [24]:
df = pd.DataFrame({'date': ['3/11/2000', '3/12/2000', '3/13/2000'] * 1000 })

pd.to_datetime(df.date, infer_datetime_format=True)

0      2000-03-11
1      2000-03-12
2      2000-03-13
3      2000-03-11
4      2000-03-12
          ...    
2995   2000-03-12
2996   2000-03-13
2997   2000-03-11
2998   2000-03-12
2999   2000-03-13
Name: date, Length: 3000, dtype: datetime64[ns]

2. Assemble a datetime from multiple columns 

In [27]:
df = pd.DataFrame({'year': [2015, 2016],
                   'month': [2, 3],
                   'day': [4, 5], 
                   'sales': [100, 200]})

In [30]:
pd.to_datetime(df[['year', 'month', 'day']])

0   2015-02-04
1   2016-03-05
dtype: datetime64[ns]

3. Get year, month, day

In [31]:
df = pd.DataFrame({'name': ['Tom', 'Andy', 'Lucas'],
                 'DoB': ['08-05-1997', '04-28-1996', '12-16-1995']})
df['DoB'] = pd.to_datetime(df['DoB'])

In [32]:
df

Unnamed: 0,name,DoB
0,Tom,1997-08-05
1,Andy,1996-04-28
2,Lucas,1995-12-16


In [35]:
df['DoB'].dt.year

0    1997
1    1996
2    1995
Name: DoB, dtype: int64

In [36]:
df['DoB'].dt.month

0     8
1     4
2    12
Name: DoB, dtype: int64

In [37]:
df['DoB'].dt.day

0     5
1    28
2    16
Name: DoB, dtype: int64

4. Get week, the day of week, is leap year

In [40]:
df['DoB'].dt.day_of_year

0    217
1    119
2    350
Name: DoB, dtype: int64

In [41]:
## 0: Monday ... 6: Sunday
df['DoB'].dt.day_of_week

0    1
1    6
2    5
Name: DoB, dtype: int64

In [42]:
df['DoB'].dt.is_leap_year

0    False
1     True
2    False
Name: DoB, dtype: bool

5. Calculate age 

In [43]:
df

Unnamed: 0,name,DoB
0,Tom,1997-08-05
1,Andy,1996-04-28
2,Lucas,1995-12-16


In [44]:
today = pd.to_datetime('today')

Timestamp('2023-07-16 00:21:40.657824')

In [49]:
year_diff = today.year - df['DoB'].dt.year

In [53]:
not_full_year = (today.month, today.day) < df['DoB'].apply(lambda x: (x.month, x.day))
not_full_year

0     True
1    False
2     True
Name: DoB, dtype: bool

In [55]:
age = year_diff - not_full_year
age

0    25
1    27
2    27
Name: DoB, dtype: int64

6. Improve performance by setting date as index 

When doing a large scale boolean selection on date column

In [68]:
df = pd.read_csv('data/city_sales.csv',parse_dates=['date'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1795144 entries, 0 to 1795143
Data columns (total 3 columns):
 #   Column  Dtype         
---  ------  -----         
 0   date    datetime64[ns]
 1   num     int64         
 2   city    object        
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 41.1+ MB


In [57]:
df = df.set_index(['date'])
df

Unnamed: 0_level_0,num,city
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-01 09:00:00,4,London
2015-01-01 09:01:00,4,London
2015-01-01 09:02:00,3,London
2015-01-01 09:03:00,3,London
2015-01-01 09:04:00,3,London
...,...,...
2019-01-31 15:56:00,3,Cambridge
2019-01-31 15:57:00,3,Cambridge
2019-01-31 15:58:00,3,Cambridge
2019-01-31 15:59:00,3,Cambridge


In [58]:
df.loc['2015-01-10':'2015-01-20']

  df.loc['2015-01-10':'2015-01-20']


Unnamed: 0_level_0,num,city
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-12 09:00:00,2,London
2015-01-12 09:01:00,2,London
2015-01-12 09:02:00,3,London
2015-01-12 09:03:00,3,London
2015-01-12 09:04:00,3,London
...,...,...
2015-01-20 15:56:00,4,Cambridge
2015-01-20 15:57:00,2,Cambridge
2015-01-20 15:58:00,4,Cambridge
2015-01-20 15:59:00,4,Cambridge


7. Selecting on year, month, day and time 

In [60]:
df.loc['2018-01']

Unnamed: 0_level_0,num,city
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-01 09:00:00,2,London
2018-01-01 09:01:00,1,London
2018-01-01 09:02:00,3,London
2018-01-01 09:03:00,3,London
2018-01-01 09:04:00,3,London
...,...,...
2018-01-31 15:56:00,2,Cambridge
2018-01-31 15:57:00,3,Cambridge
2018-01-31 15:58:00,3,Cambridge
2018-01-31 15:59:00,1,Cambridge


In [63]:
df.loc['2018-01-10 10']

Unnamed: 0_level_0,num,city
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-10 10:00:00,4,London
2018-01-10 10:01:00,1,London
2018-01-10 10:02:00,4,London
2018-01-10 10:03:00,4,London
2018-01-10 10:04:00,1,London
...,...,...
2018-01-10 10:55:00,3,Cambridge
2018-01-10 10:56:00,3,Cambridge
2018-01-10 10:57:00,3,Cambridge
2018-01-10 10:58:00,3,Cambridge


In [66]:
df.loc['2018-01-10 10' : '2018-01-10 12']

Unnamed: 0_level_0,num,city
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-10 10:00:00,4,London
2018-01-10 10:01:00,1,London
2018-01-10 10:02:00,4,London
2018-01-10 10:03:00,4,London
2018-01-10 10:04:00,1,London
...,...,...
2018-01-10 12:55:00,4,Cambridge
2018-01-10 12:56:00,3,Cambridge
2018-01-10 12:57:00,3,Cambridge
2018-01-10 12:58:00,4,Cambridge


In [67]:
df.between_time('10:00', '10:30')

Unnamed: 0_level_0,num,city
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-01 10:00:00,2,London
2015-01-01 10:01:00,3,London
2015-01-01 10:02:00,3,London
2015-01-01 10:03:00,3,London
2015-01-01 10:04:00,3,London
...,...,...
2019-01-31 10:26:00,4,Cambridge
2019-01-31 10:27:00,4,Cambridge
2019-01-31 10:28:00,2,Cambridge
2019-01-31 10:29:00,1,Cambridge
