# Features related to date and time

## Importing and loading data

In [1]:
# Importing necessary libraries
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
%matplotlib inline

print(pd.__version__)
print(np.__version__)

2.1.3
1.26.1


In [2]:
# Loading the data
data = pd.read_csv('datasets/No2 dataset.csv')

# Check the data
data.head()

Unnamed: 0,Date_Time,NO2(GT)
0,10/03/2004 18.00.00,166
1,10/03/2004 19.00.00,1174
2,10/03/2004 20.00.00,131
3,10/03/2004 21.00.00,172
4,10/03/2004 22.00.00,131


In [4]:
# Check the max and min date and time
print('Max Date and Time:', data['Date_Time'].max())
print('Min Date and Time:', data['Date_Time'].min())

Max Date and Time: 31/08/2004 23.00.00
Min Date and Time: 01/04/2004 00.00.00


In [5]:
# Printing the data types
print(data.dtypes)

Date_Time    object
NO2(GT)       int64
dtype: object


## Convert to datetime

In [6]:
data['Date_Time'] = pd.to_datetime(data['Date_Time'], format= '%d/%m/%Y %H.%M.%S')

print(data.dtypes)

Date_Time    datetime64[ns]
NO2(GT)               int64
dtype: object


The format depends on your data.

1. **1/12/2001**: %d/%m/%Y
2. **2001/12/1**: %Y/%m/%d
3. **2001-12-01**: %Y-%m-%d
4. **2001-12-01 09:00**: %Y-%m-%d %H:%M

In [7]:
# Check the max and min date and time
print('Max Date and Time:', data['Date_Time'].max())
print('Min Date and Time:', data['Date_Time'].min())

Max Date and Time: 2004-09-24 02:00:00
Min Date and Time: 2004-03-10 18:00:00


## Extract hours and minutes

In [8]:
# Extract the hours
data['Date_Time'].dt.hour.head()

0    18
1    19
2    20
3    21
4    22
Name: Date_Time, dtype: int32

In [9]:
# Extract the minutes
data['Date_Time'].dt.minute.head()

0    0
1    0
2    0
3    0
4    0
Name: Date_Time, dtype: int32

## Extract the day of the work and month

In [11]:
# Monday (0) to Sunday (6)
data['Date_Time'].dt.dayofweek.head() 

0    2
1    2
2    2
3    2
4    2
Name: Date_Time, dtype: int32

In [15]:
# The name of the day
data['Date_Time'].dt.day_name().head()

0    Wednesday
1    Wednesday
2    Wednesday
3    Wednesday
4    Wednesday
Name: Date_Time, dtype: object

In [16]:
# The month number
data['Date_Time'].dt.month.head()

0    3
1    3
2    3
3    3
4    3
Name: Date_Time, dtype: int32

In [18]:
# Check if the month is the end month
data['Date_Time'].dt.is_month_end.head()

0    False
1    False
2    False
3    False
4    False
Name: Date_Time, dtype: bool

## Combine everything together

In [21]:
# Create a new dataframe named as the datetime_data
datetime_data = pd.DataFrame({
    'year': data['Date_Time'].dt.year, 'month': data['Date_Time'].dt.month, 'day': data['Date_Time'].dt.day,
    'hour': data['Date_Time'].dt.hour, 'dayofyear': data['Date_Time'].dt.dayofyear, 
    'week': data['Date_Time'].dt.isocalendar().week, 'dayofweek': data['Date_Time'].dt.dayofweek, 
    'dayofweekname': data['Date_Time'].dt.day_name(), 'quarter': data['Date_Time'].dt.quarter,
})

# Checking the data
datetime_data.head()

Unnamed: 0,year,month,day,hour,dayofyear,week,dayofweek,dayofweekname,quarter
0,2004,3,10,18,70,11,2,Wednesday,1
1,2004,3,10,19,70,11,2,Wednesday,1
2,2004,3,10,20,70,11,2,Wednesday,1
3,2004,3,10,21,70,11,2,Wednesday,1
4,2004,3,10,22,70,11,2,Wednesday,1


In [23]:
# Concatenating the data
new_data = pd.concat([data, datetime_data], axis=1)

# Check the data
new_data.head()

Unnamed: 0,Date_Time,NO2(GT),year,month,day,hour,dayofyear,week,dayofweek,dayofweekname,quarter
0,2004-03-10 18:00:00,166,2004,3,10,18,70,11,2,Wednesday,1
1,2004-03-10 19:00:00,1174,2004,3,10,19,70,11,2,Wednesday,1
2,2004-03-10 20:00:00,131,2004,3,10,20,70,11,2,Wednesday,1
3,2004-03-10 21:00:00,172,2004,3,10,21,70,11,2,Wednesday,1
4,2004-03-10 22:00:00,131,2004,3,10,22,70,11,2,Wednesday,1


## is_weekday to check if the day is a weekday or not

In [24]:
new_data['is_weekday'] = 0 

for i in range(0, len(new_data)):
    # If the day is saturday (5) or sunday (6), then 0, else 1
    if ((new_data['dayofweek'][i] == 5) | (new_data['dayofweek'][i] == 6)):
        new_data['is_weekday'][i] = 0
    else: 
        new_data['is_weekday'][i] = 1

In [25]:
# Checking the data
new_data[['dayofweek', 'is_weekday']].head()

Unnamed: 0,dayofweek,is_weekday
0,2,1
1,2,1
2,2,1
3,2,1
4,2,1


## Difference Between Dates

In [27]:
# Import new data
agent_data = pd.read_csv('datasets/agent_classification.csv')

# Check the data
agent_data.head()

Unnamed: 0,ID,Office_PIN,Application_Receipt_Date,Applicant_City_PIN,Applicant_Gender,Applicant_BirthDate,Applicant_Marital_Status,Applicant_Occupation,Applicant_Qualification,Manager_DOJ,...,Manager_Status,Manager_Gender,Manager_DoB,Manager_Num_Application,Manager_Num_Coded,Manager_Business,Manager_Num_Products,Manager_Business2,Manager_Num_Products2,Business_Sourced
0,FIN1000001,842001,4/16/2007,844120.0,M,12/19/1971,M,Others,Graduate,11/10/2005,...,Confirmation,M,2/17/1978,2.0,1.0,335249.0,28.0,335249.0,28.0,0
1,FIN1000002,842001,4/16/2007,844111.0,M,2/17/1983,S,Others,Class XII,11/10/2005,...,Confirmation,M,2/17/1978,2.0,1.0,335249.0,28.0,335249.0,28.0,1
2,FIN1000003,800001,4/16/2007,844101.0,M,1/16/1966,M,Business,Class XII,5/27/2006,...,Confirmation,M,3/3/1969,0.0,0.0,357184.0,24.0,357184.0,24.0,0
3,FIN1000004,814112,4/16/2007,814112.0,M,2/3/1988,S,Salaried,Class XII,8/21/2003,...,Confirmation,F,8/14/1978,0.0,0.0,318356.0,22.0,318356.0,22.0,0
4,FIN1000005,814112,4/16/2007,815351.0,M,7/4/1985,M,Others,Class XII,5/8/2006,...,Confirmation,M,2/7/1971,2.0,1.0,230402.0,17.0,230402.0,17.0,0


In [28]:
# Print specific columns of the data
agent_data[['Application_Receipt_Date','Applicant_BirthDate']].head()

Unnamed: 0,Application_Receipt_Date,Applicant_BirthDate
0,4/16/2007,12/19/1971
1,4/16/2007,2/17/1983
2,4/16/2007,1/16/1966
3,4/16/2007,2/3/1988
4,4/16/2007,7/4/1985


### Calculate the difference of the dates

In [29]:
# Converting to datetime feature
agent_data['Application_Receipt_Date'] = pd.to_datetime(agent_data['Application_Receipt_Date'], format = '%m/%d/%Y')
agent_data['Applicant_BirthDate'] = pd.to_datetime(agent_data['Applicant_BirthDate'], format = '%m/%d/%Y')

# Print specific columns of the data
agent_data[['Application_Receipt_Date','Applicant_BirthDate']].head()

Unnamed: 0,Application_Receipt_Date,Applicant_BirthDate
0,2007-04-16,1971-12-19
1,2007-04-16,1983-02-17
2,2007-04-16,1966-01-16
3,2007-04-16,1988-02-03
4,2007-04-16,1985-07-04


In [35]:
# Print the age in years
from datetime import date
from dateutil.relativedelta import relativedelta

print(relativedelta(agent_data['Application_Receipt_Date'][0], agent_data['Applicant_BirthDate'][0]).years)

35


In [39]:
# Deduce the applicant age
agent_data['Applicant_age'] = agent_data.apply(lambda x: (x['Application_Receipt_Date'] - x['Applicant_BirthDate']).days, axis=1)

# Checking the data
agent_data.head()

Unnamed: 0,ID,Office_PIN,Application_Receipt_Date,Applicant_City_PIN,Applicant_Gender,Applicant_BirthDate,Applicant_Marital_Status,Applicant_Occupation,Applicant_Qualification,Manager_DOJ,...,Manager_Gender,Manager_DoB,Manager_Num_Application,Manager_Num_Coded,Manager_Business,Manager_Num_Products,Manager_Business2,Manager_Num_Products2,Business_Sourced,Applicant_age
0,FIN1000001,842001,2007-04-16,844120.0,M,1971-12-19,M,Others,Graduate,11/10/2005,...,M,2/17/1978,2.0,1.0,335249.0,28.0,335249.0,28.0,0,12902.0
1,FIN1000002,842001,2007-04-16,844111.0,M,1983-02-17,S,Others,Class XII,11/10/2005,...,M,2/17/1978,2.0,1.0,335249.0,28.0,335249.0,28.0,1,8824.0
2,FIN1000003,800001,2007-04-16,844101.0,M,1966-01-16,M,Business,Class XII,5/27/2006,...,M,3/3/1969,0.0,0.0,357184.0,24.0,357184.0,24.0,0,15065.0
3,FIN1000004,814112,2007-04-16,814112.0,M,1988-02-03,S,Salaried,Class XII,8/21/2003,...,F,8/14/1978,0.0,0.0,318356.0,22.0,318356.0,22.0,0,7012.0
4,FIN1000005,814112,2007-04-16,815351.0,M,1985-07-04,M,Others,Class XII,5/8/2006,...,M,2/7/1971,2.0,1.0,230402.0,17.0,230402.0,17.0,0,7956.0
