In [1]:
import numpy as np
import pandas as pd
date = pd.read_csv('orders.csv')
time = pd.read_csv('messages.csv')


In [2]:
date.head()

Unnamed: 0,date,product_id,city_id,orders
0,2019-12-10,5628,25,3
1,2018-08-15,3646,14,157
2,2018-10-23,1859,25,1
3,2019-08-17,7292,25,1
4,2019-01-06,4344,25,3


In [3]:
time.head()

Unnamed: 0,date,msg
0,2013-12-15 00:50:00,ищу на сегодня мужика 37
1,2014-04-29 23:40:00,ПАРЕНЬ БИ ИЩЕТ ДРУГА СЕЙЧАС!! СМС ММС 0955532826
2,2012-12-30 00:21:00,Днепр.м 43 позн.с д/ж *.о 067.16.34.576
3,2014-11-28 00:31:00,КИЕВ ИЩУ Д/Ж ДО 45 МНЕ СЕЙЧАС СКУЧНО 093 629 9...
4,2013-10-26 23:11:00,Зая я тебя никогда не обижу люблю тебя!) Даше


In [4]:
'''
here date is a string, and hence you can't do any datetime operations.
'''
date.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   date        1000 non-null   object
 1   product_id  1000 non-null   int64 
 2   city_id     1000 non-null   int64 
 3   orders      1000 non-null   int64 
dtypes: int64(3), object(1)
memory usage: 31.4+ KB


In [5]:
'''
here, date has date as weell as time combined, which is still valid. here too, date is string and not datetime, due to which datetime
operations cannot be performed.
'''
time.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   date    1000 non-null   object
 1   msg     1000 non-null   object
dtypes: object(2)
memory usage: 15.8+ KB


### Working with DATES

In [6]:
date['date'] = pd.to_datetime(date['date'])

In [7]:
date.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   date        1000 non-null   datetime64[ns]
 1   product_id  1000 non-null   int64         
 2   city_id     1000 non-null   int64         
 3   orders      1000 non-null   int64         
dtypes: datetime64[ns](1), int64(3)
memory usage: 31.4 KB


###1. Extracting Year

In [8]:
date['date_year'] = date['date'].dt.year #new column called date_year which stores the year from the date.
date.head()

Unnamed: 0,date,product_id,city_id,orders,date_year
0,2019-12-10,5628,25,3,2019
1,2018-08-15,3646,14,157,2018
2,2018-10-23,1859,25,1,2018
3,2019-08-17,7292,25,1,2019
4,2019-01-06,4344,25,3,2019


###2. Extracting Month

In [9]:
date['date_month'] = date['date'].dt.month #new column called date_month which stores the month from the date.
date.head()

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month
0,2019-12-10,5628,25,3,2019,12
1,2018-08-15,3646,14,157,2018,8
2,2018-10-23,1859,25,1,2018,10
3,2019-08-17,7292,25,1,2019,8
4,2019-01-06,4344,25,3,2019,1


###3. Extracting month name

In [10]:
date['date_month_name'] = date['date'].dt.month_name() #new column called date_month which stores the month from the date.
date.head()

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month,date_month_name
0,2019-12-10,5628,25,3,2019,12,December
1,2018-08-15,3646,14,157,2018,8,August
2,2018-10-23,1859,25,1,2018,10,October
3,2019-08-17,7292,25,1,2019,8,August
4,2019-01-06,4344,25,3,2019,1,January


### 4. Extracting days from date

In [11]:
date['date_day'] = date['date'].dt.day

date.head()

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month,date_month_name,date_day
0,2019-12-10,5628,25,3,2019,12,December,10
1,2018-08-15,3646,14,157,2018,8,August,15
2,2018-10-23,1859,25,1,2018,10,October,23
3,2019-08-17,7292,25,1,2019,8,August,17
4,2019-01-06,4344,25,3,2019,1,January,6


### 5. Extracting name of the days

In [12]:
date['date_day_name'] = date['date'].dt.day_name()

date.head()

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month,date_month_name,date_day,date_day_name
0,2019-12-10,5628,25,3,2019,12,December,10,Tuesday
1,2018-08-15,3646,14,157,2018,8,August,15,Wednesday
2,2018-10-23,1859,25,1,2018,10,October,23,Tuesday
3,2019-08-17,7292,25,1,2019,8,August,17,Saturday
4,2019-01-06,4344,25,3,2019,1,January,6,Sunday


###6. Checking if the day is weekend or not.

In [13]:
date['is_weekend'] = np.where(date['date_day_name'].isin(['Sunday', 'Saturday']), 1, 0)

date.head()


Unnamed: 0,date,product_id,city_id,orders,date_year,date_month,date_month_name,date_day,date_day_name,is_weekend
0,2019-12-10,5628,25,3,2019,12,December,10,Tuesday,0
1,2018-08-15,3646,14,157,2018,8,August,15,Wednesday,0
2,2018-10-23,1859,25,1,2018,10,October,23,Tuesday,0
3,2019-08-17,7292,25,1,2019,8,August,17,Saturday,1
4,2019-01-06,4344,25,3,2019,1,January,6,Sunday,1


### 7. Extracting the number of week of the year

In [17]:
date['date_week'] = date['date'].dt.isocalendar().week\

date.head()

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month,date_month_name,date_day,date_day_name,is_weekend,date_week
0,2019-12-10,5628,25,3,2019,12,December,10,Tuesday,0,50
1,2018-08-15,3646,14,157,2018,8,August,15,Wednesday,0,33
2,2018-10-23,1859,25,1,2018,10,October,23,Tuesday,0,43
3,2019-08-17,7292,25,1,2019,8,August,17,Saturday,1,33
4,2019-01-06,4344,25,3,2019,1,January,6,Sunday,1,1


### 8. Extracting the quarter in which the date is

In [18]:
date['quarter'] = date['date'].dt.quarter

date.head()

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month,date_month_name,date_day,date_day_name,is_weekend,date_week,quarter
0,2019-12-10,5628,25,3,2019,12,December,10,Tuesday,0,50,4
1,2018-08-15,3646,14,157,2018,8,August,15,Wednesday,0,33,3
2,2018-10-23,1859,25,1,2018,10,October,23,Tuesday,0,43,4
3,2019-08-17,7292,25,1,2019,8,August,17,Saturday,1,33,3
4,2019-01-06,4344,25,3,2019,1,January,6,Sunday,1,1,1


### 9. Extracting the semester of the year according to the date.

In [19]:
date['semester'] = np.where(date['quarter'].isin([1, 2]), 1, 2)

date.head()


Unnamed: 0,date,product_id,city_id,orders,date_year,date_month,date_month_name,date_day,date_day_name,is_weekend,date_week,quarter,semester
0,2019-12-10,5628,25,3,2019,12,December,10,Tuesday,0,50,4,2
1,2018-08-15,3646,14,157,2018,8,August,15,Wednesday,0,33,3,2
2,2018-10-23,1859,25,1,2018,10,October,23,Tuesday,0,43,4,2
3,2019-08-17,7292,25,1,2019,8,August,17,Saturday,1,33,3,2
4,2019-01-06,4344,25,3,2019,1,January,6,Sunday,1,1,1,1


###10. Extracting today's date time info

In [20]:
import datetime

today = datetime.datetime.today()

today

datetime.datetime(2025, 5, 27, 17, 16, 8, 517877)

### 11. Extracting the difference between the current date and dates from the date table.

In [21]:
'''
here, the differences include days, hours, minutes, seconds.
'''
today - date['date']

Unnamed: 0,date
0,1995 days 17:16:08.517877
1,2477 days 17:16:08.517877
2,2408 days 17:16:08.517877
3,2110 days 17:16:08.517877
4,2333 days 17:16:08.517877
...,...
995,2423 days 17:16:08.517877
996,2364 days 17:16:08.517877
997,2212 days 17:16:08.517877
998,2277 days 17:16:08.517877


### 12. Extracting only days in the difference.

In [22]:
(today - date['date']).dt.days

Unnamed: 0,date
0,1995
1,2477
2,2408
3,2110
4,2333
...,...
995,2423
996,2364
997,2212
998,2277


### 13. Extracting the number of months passed from the difference

In [24]:
np.round((today - date['date']).dt.days / 30.44, 0)

Unnamed: 0,date
0,66.0
1,81.0
2,79.0
3,69.0
4,77.0
...,...
995,80.0
996,78.0
997,73.0
998,75.0


### Working with TIME

In [25]:
time['date'] = pd.to_datetime(time['date'])
time.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    1000 non-null   datetime64[ns]
 1   msg     1000 non-null   object        
dtypes: datetime64[ns](1), object(1)
memory usage: 15.8+ KB


###1. Extracting hours, minutes, seconds.

In [26]:
time['hour'] = time['date'].dt.hour
time['min'] = time['date'].dt.minute
time['sec'] = time['date'].dt.second

time.head()

Unnamed: 0,date,msg,hour,min,sec
0,2013-12-15 00:50:00,ищу на сегодня мужика 37,0,50,0
1,2014-04-29 23:40:00,ПАРЕНЬ БИ ИЩЕТ ДРУГА СЕЙЧАС!! СМС ММС 0955532826,23,40,0
2,2012-12-30 00:21:00,Днепр.м 43 позн.с д/ж *.о 067.16.34.576,0,21,0
3,2014-11-28 00:31:00,КИЕВ ИЩУ Д/Ж ДО 45 МНЕ СЕЙЧАС СКУЧНО 093 629 9...,0,31,0
4,2013-10-26 23:11:00,Зая я тебя никогда не обижу люблю тебя!) Даше,23,11,0


###2. Extracting just time (date column here is a mixture of date and time)

In [27]:
time['time'] = time['date'].dt.time

time.head()

Unnamed: 0,date,msg,hour,min,sec,time
0,2013-12-15 00:50:00,ищу на сегодня мужика 37,0,50,0,00:50:00
1,2014-04-29 23:40:00,ПАРЕНЬ БИ ИЩЕТ ДРУГА СЕЙЧАС!! СМС ММС 0955532826,23,40,0,23:40:00
2,2012-12-30 00:21:00,Днепр.м 43 позн.с д/ж *.о 067.16.34.576,0,21,0,00:21:00
3,2014-11-28 00:31:00,КИЕВ ИЩУ Д/Ж ДО 45 МНЕ СЕЙЧАС СКУЧНО 093 629 9...,0,31,0,00:31:00
4,2013-10-26 23:11:00,Зая я тебя никогда не обижу люблю тебя!) Даше,23,11,0,23:11:00


###3. Extracting time difference

In [28]:
today - time['date']

Unnamed: 0,date
0,4181 days 16:26:08.517877
1,4045 days 17:36:08.517877
2,4531 days 16:55:08.517877
3,3833 days 16:45:08.517877
4,4230 days 18:05:08.517877
...,...
995,4820 days 16:26:08.517877
996,4141 days 18:02:08.517877
997,4606 days 17:39:08.517877
998,4722 days 17:42:08.517877


In [29]:
# in seconds
 (today - time['date'])/np.timedelta64(1,'s')

Unnamed: 0,date
0,3.612976e+08
1,3.495514e+08
2,3.915393e+08
3,3.312315e+08
4,3.655371e+08
...,...
995,4.165072e+08
996,3.578473e+08
997,3.980219e+08
998,4.080445e+08


In [30]:
# in minutes
(today - time['date'])/np.timedelta64(1,'m')

Unnamed: 0,date
0,6.021626e+06
1,5.825856e+06
2,6.525655e+06
3,5.520525e+06
4,6.092285e+06
...,...
995,6.941786e+06
996,5.964122e+06
997,6.633699e+06
998,6.800742e+06


In [31]:
# in hours
(today - time['date'])/np.timedelta64(1,'h')

Unnamed: 0,date
0,100360.435699
1,97097.602366
2,108760.919033
3,92008.752366
4,101538.085699
...,...
995,115696.435699
996,99402.035699
997,110561.652366
998,113345.702366
