```
>>> dates = pd.Series(
...     ["2024-01-01", "2024-01-15", "2024-02-5"], dtype="datetime64[ns]"
... )
>>> dates.dt.day
0     1
1    15
2     5
dtype: int32
>>> dates.dt.month
0    1
1    1
2    2
dtype: int32
```

In [1]:
import numpy as np
import pandas as pd

In [2]:
date = pd.read_csv('orders.csv')
time = pd.read_csv('messages.csv')

**working with date using orders.csv, working with time using messages.csv**

In [3]:
date.head()

Unnamed: 0,date,product_id,city_id,orders
0,2019-12-10,5628,25,3
1,2018-08-15,3646,14,157
2,2018-10-23,1859,25,1
3,2019-08-17,7292,25,1
4,2019-01-06,4344,25,3


In [5]:
time.head()
# chatting dataset

Unnamed: 0,date,msg
0,2013-12-15 00:50:00,ищу на сегодня мужика 37
1,2014-04-29 23:40:00,ПАРЕНЬ БИ ИЩЕТ ДРУГА СЕЙЧАС!! СМС ММС 0955532826
2,2012-12-30 00:21:00,Днепр.м 43 позн.с д/ж *.о 067.16.34.576
3,2014-11-28 00:31:00,КИЕВ ИЩУ Д/Ж ДО 45 МНЕ СЕЙЧАС СКУЧНО 093 629 9...
4,2013-10-26 23:11:00,Зая я тебя никогда не обижу люблю тебя!) Даше


**usually date and time are in object (i.e string) dtype, we need to convert to datetime64 dtype to perform datetime operations**

In [6]:
date.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   date        1000 non-null   object
 1   product_id  1000 non-null   int64 
 2   city_id     1000 non-null   int64 
 3   orders      1000 non-null   int64 
dtypes: int64(3), object(1)
memory usage: 31.4+ KB


In [7]:
time.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   date    1000 non-null   object
 1   msg     1000 non-null   object
dtypes: object(2)
memory usage: 15.8+ KB


**WORKING WITH DATES**

In [8]:
# converting to datetime datatype
# This function converts a scalar, array-like, Series or
# DataFrame/dict-like to a pandas datetime object.

#  returns Series of datetime64 dtype


date['date'] = pd.to_datetime(date['date'])

In [9]:
date.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   date        1000 non-null   datetime64[ns]
 1   product_id  1000 non-null   int64         
 2   city_id     1000 non-null   int64         
 3   orders      1000 non-null   int64         
dtypes: datetime64[ns](1), int64(3)
memory usage: 31.4 KB


**Extract Year**

In [10]:
date['date_year'] = date['date'].dt.year
date.sample(5)

Unnamed: 0,date,product_id,city_id,orders,date_year
902,2019-03-24,5031,0,6,2019
713,2018-10-14,4269,22,2,2018
492,2019-06-24,2306,14,12,2019
529,2019-07-30,5698,0,1,2019
615,2018-10-19,2846,22,3,2018


**Extract month**

In [11]:
date['date_month_no'] = date['date'].dt.month
date.sample(5)

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month_no
428,2018-07-14,4183,22,8,2018,7
907,2019-08-13,5073,13,1,2019,8
501,2019-01-24,1881,14,14,2019,1
929,2019-12-07,6801,3,7,2019,12
831,2019-06-20,652,25,5,2019,6


In [12]:
type(date['date'].dt)

-> https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DatetimeIndex.html#pandas.DatetimeIndex

In [13]:
date['date_month_name'] = date['date'].dt.month_name()
date.head()

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month_no,date_month_name
0,2019-12-10,5628,25,3,2019,12,December
1,2018-08-15,3646,14,157,2018,8,August
2,2018-10-23,1859,25,1,2018,10,October
3,2019-08-17,7292,25,1,2019,8,August
4,2019-01-06,4344,25,3,2019,1,January


**Extract days**

In [14]:
date['date_day'] = date['date'].dt.day
date.head()

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month_no,date_month_name,date_day
0,2019-12-10,5628,25,3,2019,12,December,10
1,2018-08-15,3646,14,157,2018,8,August,15
2,2018-10-23,1859,25,1,2018,10,October,23
3,2019-08-17,7292,25,1,2019,8,August,17
4,2019-01-06,4344,25,3,2019,1,January,6


In [15]:
# day of week
date['date_dow'] = date['date'].dt.dayofweek
date.head()

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month_no,date_month_name,date_day,date_dow
0,2019-12-10,5628,25,3,2019,12,December,10,1
1,2018-08-15,3646,14,157,2018,8,August,15,2
2,2018-10-23,1859,25,1,2018,10,October,23,1
3,2019-08-17,7292,25,1,2019,8,August,17,5
4,2019-01-06,4344,25,3,2019,1,January,6,6


In [16]:
# name of "day of week"
date['date_dow_name'] = date['date'].dt.day_name()
date.drop(columns=['product_id','city_id','orders'],inplace=True)
date.head()

Unnamed: 0,date,date_year,date_month_no,date_month_name,date_day,date_dow,date_dow_name
0,2019-12-10,2019,12,December,10,1,Tuesday
1,2018-08-15,2018,8,August,15,2,Wednesday
2,2018-10-23,2018,10,October,23,1,Tuesday
3,2019-08-17,2019,8,August,17,5,Saturday
4,2019-01-06,2019,1,January,6,6,Sunday


**is a weekend ?**

In [19]:
date['date_is_weekend'] = np.where(
    date['date_dow_name'].isin(['Sunday','Saturday']),
                               1,0)

date.head()

Unnamed: 0,date,date_year,date_month_no,date_month_name,date_day,date_dow,date_dow_name,date_is_weekend
0,2019-12-10,2019,12,December,10,1,Tuesday,0
1,2018-08-15,2018,8,August,15,2,Wednesday,0
2,2018-10-23,2018,10,October,23,1,Tuesday,0
3,2019-08-17,2019,8,August,17,5,Saturday,1
4,2019-01-06,2019,1,January,6,6,Sunday,1


**Extract week of the year**

In [23]:
date['date_week'] = date['date'].dt.isocalendar().week
date.head()

Unnamed: 0,date,date_year,date_month_no,date_month_name,date_day,date_dow,date_dow_name,date_is_weekend,date_weekday,date_week
0,2019-12-10,2019,12,December,10,1,Tuesday,0,1,50
1,2018-08-15,2018,8,August,15,2,Wednesday,0,2,33
2,2018-10-23,2018,10,October,23,1,Tuesday,0,1,43
3,2019-08-17,2019,8,August,17,5,Saturday,1,5,33
4,2019-01-06,2019,1,January,6,6,Sunday,1,6,1


**Extract quarter**

In [24]:
date['quarter'] = date['date'].dt.quarter
date.head()

Unnamed: 0,date,date_year,date_month_no,date_month_name,date_day,date_dow,date_dow_name,date_is_weekend,date_weekday,date_week,quarter
0,2019-12-10,2019,12,December,10,1,Tuesday,0,1,50,4
1,2018-08-15,2018,8,August,15,2,Wednesday,0,2,33,3
2,2018-10-23,2018,10,October,23,1,Tuesday,0,1,43,4
3,2019-08-17,2019,8,August,17,5,Saturday,1,5,33,3
4,2019-01-06,2019,1,January,6,6,Sunday,1,6,1,1


**Extract Semester**

In [25]:
# first 6 months (1,2 quarter)
# next 6 months (3,4 quarter)
date['semsester'] = np.where(
    date['quarter'].isin([1,2]),
    1,2
)
date.head()

Unnamed: 0,date,date_year,date_month_no,date_month_name,date_day,date_dow,date_dow_name,date_is_weekend,date_weekday,date_week,quarter,semsester
0,2019-12-10,2019,12,December,10,1,Tuesday,0,1,50,4,2
1,2018-08-15,2018,8,August,15,2,Wednesday,0,2,33,3,2
2,2018-10-23,2018,10,October,23,1,Tuesday,0,1,43,4,2
3,2019-08-17,2019,8,August,17,5,Saturday,1,5,33,3,2
4,2019-01-06,2019,1,January,6,6,Sunday,1,6,1,1,1


**Extract time elapsed between dates**

In [27]:
import datetime
# datetime class is present in the datetime module
today = datetime.datetime.today()
today

datetime.datetime(2026, 2, 13, 6, 49, 0, 262849)

In [28]:
date.sample(2)

Unnamed: 0,date,date_year,date_month_no,date_month_name,date_day,date_dow,date_dow_name,date_is_weekend,date_weekday,date_week,quarter,semsester
685,2019-02-28,2019,2,February,28,3,Thursday,0,3,9,1,1
276,2019-11-18,2019,11,November,18,0,Monday,0,0,47,4,2


In [29]:
today - date['date']

Unnamed: 0,date
0,2257 days 06:49:00.262849
1,2739 days 06:49:00.262849
2,2670 days 06:49:00.262849
3,2372 days 06:49:00.262849
4,2595 days 06:49:00.262849
...,...
995,2685 days 06:49:00.262849
996,2626 days 06:49:00.262849
997,2474 days 06:49:00.262849
998,2539 days 06:49:00.262849


In [30]:
(today - date['date']).dt.days

Unnamed: 0,date
0,2257
1,2739
2,2670
3,2372
4,2595
...,...
995,2685
996,2626
997,2474
998,2539


**Months passed**

In [36]:
(today-date['date'])/np.timedelta64(30,'D')

Unnamed: 0,date
0,75.242801
1,91.309468
2,89.009468
3,79.076134
4,86.509468
...,...
995,89.509468
996,87.542801
997,82.476134
998,84.642801


In [37]:
2257/30

75.23333333333333

In [41]:
np.round((today-date['date'])/np.timedelta64(30,'D'),0)

Unnamed: 0,date
0,75.0
1,91.0
2,89.0
3,79.0
4,87.0
...,...
995,90.0
996,88.0
997,82.0
998,85.0


**Working with time**

In [42]:
time.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   date    1000 non-null   object
 1   msg     1000 non-null   object
dtypes: object(2)
memory usage: 15.8+ KB


In [44]:
# convert to datetime64 dtype
time['date'] = pd.to_datetime(time['date'])
time.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    1000 non-null   datetime64[ns]
 1   msg     1000 non-null   object        
dtypes: datetime64[ns](1), object(1)
memory usage: 15.8+ KB


In [45]:
time.sample(1)

Unnamed: 0,date,msg
772,2013-10-30 23:13:00,1063 .ника мышонок в доследе отказали дело ве...


In [50]:
time['hour'] = time['date'].dt.hour
time['min'] = time['date'].dt.minute
time['sec'] = time['date'].dt.second
time.head()

Unnamed: 0,date,hour,min,sec
0,2013-12-15 00:50:00,0,50,0
1,2014-04-29 23:40:00,23,40,0
2,2012-12-30 00:21:00,0,21,0
3,2014-11-28 00:31:00,0,31,0
4,2013-10-26 23:11:00,23,11,0


**Extract time part**

In [51]:
time['time'] = time['date'].dt.time
time.head()

Unnamed: 0,date,hour,min,sec,time
0,2013-12-15 00:50:00,0,50,0,00:50:00
1,2014-04-29 23:40:00,23,40,0,23:40:00
2,2012-12-30 00:21:00,0,21,0,00:21:00
3,2014-11-28 00:31:00,0,31,0,00:31:00
4,2013-10-26 23:11:00,23,11,0,23:11:00


**Time difference**

In [52]:
today - time['date']

Unnamed: 0,date
0,4443 days 05:59:00.262849
1,4307 days 07:09:00.262849
2,4793 days 06:28:00.262849
3,4095 days 06:18:00.262849
4,4492 days 07:38:00.262849
...,...
995,5082 days 05:59:00.262849
996,4403 days 07:35:00.262849
997,4868 days 07:12:00.262849
998,4984 days 07:15:00.262849


In [53]:
# in seconds
(today - time['date'])/np.timedelta64(1,'s')

Unnamed: 0,date
0,3.838967e+08
1,3.721505e+08
2,4.141385e+08
3,3.538307e+08
4,3.881363e+08
...,...
995,4.391063e+08
996,3.804465e+08
997,4.206211e+08
998,4.306437e+08


In [54]:
# in minutes
# minutes passed
(today - time['date'])/np.timedelta64(1,'m')

Unnamed: 0,date
0,6.398279e+06
1,6.202509e+06
2,6.902308e+06
3,5.897178e+06
4,6.468938e+06
...,...
995,7.318439e+06
996,6.340775e+06
997,7.010352e+06
998,7.177395e+06


In [55]:
# hours passed
(today - time['date'])/np.timedelta64(1,'h')

Unnamed: 0,date
0,106637.983406
1,103375.150073
2,115038.466740
3,98286.300073
4,107815.633406
...,...
995,121973.983406
996,105679.583406
997,116839.200073
998,119623.250073
