Dataset Link : https://www.kaggle.com/datasets/nextbigwhat/loan-data

# Goal : Basic Datetime Features

df['date'].dt.year  
df['date'].dt.month  
df['date'].dt.day  
df['date'].dt.dayofweek  
df['date'].dt.day_name()  
df['date'].dt.isocalendar().week  
df['date'].dt.quarter  
df['date'].dt.dayofweek >= 5  
df['date'].dt.is_month_start  
df['date'].dt.is_month_end  
df['date'].dt.is_quarter_start  
df['date'].dt.is_quarter_end  
df['date'].dt.is_year_start  
df['date'].dt.is_year_end  


In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('loan_data.csv')
df

Unnamed: 0,customer_id,disbursed_amount,interest,market,employment,time_employed,householder,income,date_issued,target,loan_purpose,number_open_accounts,date_last_payment,number_credit_lines_12
0,0,23201.5,15.4840,C,Teacher,<=5 years,RENT,84600.0,2013-06-11,0,Debt consolidation,4.0,2016-01-14,
1,1,7425.0,11.2032,B,Accountant,<=5 years,OWNER,102000.0,2014-05-08,0,Car purchase,13.0,2016-01-25,
2,2,11150.0,8.5100,A,Statistician,<=5 years,RENT,69840.0,2013-10-26,0,Debt consolidation,8.0,2014-09-26,
3,3,7600.0,5.8656,A,Other,<=5 years,RENT,100386.0,2015-08-20,0,Debt consolidation,20.0,2016-01-26,
4,4,31960.0,18.7392,E,Bus driver,>5 years,RENT,95040.0,2014-07-22,0,Debt consolidation,14.0,2016-01-11,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9995,23750.0,11.0019,B,Civil Servant,<=5 years,MORTGAGE,98000.0,2010-01-14,0,Other,12.0,2010-11-03,
9996,9996,11880.0,10.4923,B,Civil Servant,<=5 years,RENT,36270.0,2015-03-20,0,Debt consolidation,17.0,2016-01-26,
9997,9997,19950.0,8.4364,B,Accountant,>5 years,OWNER,52250.0,2015-04-03,0,Debt consolidation,9.0,2015-12-12,
9998,9998,4850.0,13.6409,C,Bus driver,>5 years,RENT,80040.0,2014-11-23,0,Debt consolidation,26.0,2016-01-01,


In [3]:
# check data types of columns
df.dtypes

customer_id                 int64
disbursed_amount          float64
interest                  float64
market                     object
employment                 object
time_employed              object
householder                object
income                    float64
date_issued                object
target                      int64
loan_purpose               object
number_open_accounts      float64
date_last_payment          object
number_credit_lines_12    float64
dtype: object

In [4]:
# two columns contains dates - 1) date_issued   2) date_last_payment
# check if missing values in them

df[['date_last_payment','date_issued']].isnull().sum()

date_last_payment    0
date_issued          0
dtype: int64

In [5]:
# check unique values in them
df['date_issued'].unique()

array(['2013-06-11', '2014-05-08', '2013-10-26', ..., '2010-08-22',
       '2009-06-02', '2010-01-14'], dtype=object)

In [6]:
df['date_last_payment'].unique()

array(['2016-01-14', '2016-01-25', '2014-09-26', ..., '2014-01-21',
       '2009-10-19', '2010-11-03'], dtype=object)

In [7]:
# change data type of those two columns to date_time
df['date_issued'] = pd.to_datetime(df['date_issued'], errors='raise') 
df['date_last_payment'] = pd.to_datetime(df['date_last_payment'])

df.dtypes

# errors = raise is default, other parameters are coerce, ignore

customer_id                        int64
disbursed_amount                 float64
interest                         float64
market                            object
employment                        object
time_employed                     object
householder                       object
income                           float64
date_issued               datetime64[ns]
target                             int64
loan_purpose                      object
number_open_accounts             float64
date_last_payment         datetime64[ns]
number_credit_lines_12           float64
dtype: object

**different parameters of errors**

1. **'raise' (default):** Throws an error if any date is invalid.

2. **'coerce':** Replaces invalid parsing with NaT (Not a Time). Useful when you want to handle invalid entries gracefully.

3. **'ignore':** Returns the original values unmodified if any are invalid.

In [8]:
# Year
df['date_issued_Year'] = df['date_issued'].dt.year # adds year column at last
df

Unnamed: 0,customer_id,disbursed_amount,interest,market,employment,time_employed,householder,income,date_issued,target,loan_purpose,number_open_accounts,date_last_payment,number_credit_lines_12,date_issued_Year
0,0,23201.5,15.4840,C,Teacher,<=5 years,RENT,84600.0,2013-06-11,0,Debt consolidation,4.0,2016-01-14,,2013
1,1,7425.0,11.2032,B,Accountant,<=5 years,OWNER,102000.0,2014-05-08,0,Car purchase,13.0,2016-01-25,,2014
2,2,11150.0,8.5100,A,Statistician,<=5 years,RENT,69840.0,2013-10-26,0,Debt consolidation,8.0,2014-09-26,,2013
3,3,7600.0,5.8656,A,Other,<=5 years,RENT,100386.0,2015-08-20,0,Debt consolidation,20.0,2016-01-26,,2015
4,4,31960.0,18.7392,E,Bus driver,>5 years,RENT,95040.0,2014-07-22,0,Debt consolidation,14.0,2016-01-11,,2014
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9995,23750.0,11.0019,B,Civil Servant,<=5 years,MORTGAGE,98000.0,2010-01-14,0,Other,12.0,2010-11-03,,2010
9996,9996,11880.0,10.4923,B,Civil Servant,<=5 years,RENT,36270.0,2015-03-20,0,Debt consolidation,17.0,2016-01-26,,2015
9997,9997,19950.0,8.4364,B,Accountant,>5 years,OWNER,52250.0,2015-04-03,0,Debt consolidation,9.0,2015-12-12,,2015
9998,9998,4850.0,13.6409,C,Bus driver,>5 years,RENT,80040.0,2014-11-23,0,Debt consolidation,26.0,2016-01-01,,2014


In [9]:
# Month
df['date_issued'].dt.month # gives month number

0        6
1        5
2       10
3        8
4        7
        ..
9995     1
9996     3
9997     4
9998    11
9999     1
Name: date_issued, Length: 10000, dtype: int32

In [10]:
# Day
df['date_issued'].dt.day # gives day in number

0       11
1        8
2       26
3       20
4       22
        ..
9995    14
9996    20
9997     3
9998    23
9999    19
Name: date_issued, Length: 10000, dtype: int32

In [11]:
# Day of week
df['date_issued'].dt.dayofweek  # Monday=0, Sunday=6

0       1
1       3
2       5
3       3
4       1
       ..
9995    3
9996    4
9997    4
9998    6
9999    0
Name: date_issued, Length: 10000, dtype: int32

In [12]:
# Day name
df['date_issued'].dt.day_name()  # gives name of the week day

0        Tuesday
1       Thursday
2       Saturday
3       Thursday
4        Tuesday
          ...   
9995    Thursday
9996      Friday
9997      Friday
9998      Sunday
9999      Monday
Name: date_issued, Length: 10000, dtype: object

**What it does:**
- Returns ISO 8601 calendar components from a datetime column:
    - year: ISO year
    - week: ISO week number (1 to 52/53)
    - weekday: ISO weekday (1=Monday, 7=Sunday)
- ISO Calendar System follows 1-based week day system.    --> Monday=1, Sunday=7
- Python/Pandas Standard follows 0-based week day system. --> Monday=0, Sunday=6

In [13]:
# isocalendar (year,week,day)
df['date_issued'].dt.isocalendar()

Unnamed: 0,year,week,day
0,2013,24,2
1,2014,19,4
2,2013,43,6
3,2015,34,4
4,2014,30,2
...,...,...,...
9995,2010,2,4
9996,2015,12,5
9997,2015,14,5
9998,2014,47,7


In [15]:
# week of the year
df['date_issued'].dt.isocalendar().week

0       24
1       19
2       43
3       34
4       30
        ..
9995     2
9996    12
9997    14
9998    47
9999     4
Name: week, Length: 10000, dtype: UInt32

In [16]:
# Quarter
df['date_issued'].dt.quarter  # gives quarter number

# gives quarter as per calendar year (Jan - Dec)
# to get quarter as per financial year, create custom solution

0       2
1       2
2       4
3       3
4       3
       ..
9995    1
9996    1
9997    2
9998    4
9999    1
Name: date_issued, Length: 10000, dtype: int32

In [17]:
def financial_quarter(month):
    return ((month - 4) % 12) // 3 + 1

df['fin_quarter'] = df['date_issued'].dt.month.apply(financial_quarter)
df

Unnamed: 0,customer_id,disbursed_amount,interest,market,employment,time_employed,householder,income,date_issued,target,loan_purpose,number_open_accounts,date_last_payment,number_credit_lines_12,date_issued_Year,fin_quarter
0,0,23201.5,15.4840,C,Teacher,<=5 years,RENT,84600.0,2013-06-11,0,Debt consolidation,4.0,2016-01-14,,2013,1
1,1,7425.0,11.2032,B,Accountant,<=5 years,OWNER,102000.0,2014-05-08,0,Car purchase,13.0,2016-01-25,,2014,1
2,2,11150.0,8.5100,A,Statistician,<=5 years,RENT,69840.0,2013-10-26,0,Debt consolidation,8.0,2014-09-26,,2013,3
3,3,7600.0,5.8656,A,Other,<=5 years,RENT,100386.0,2015-08-20,0,Debt consolidation,20.0,2016-01-26,,2015,2
4,4,31960.0,18.7392,E,Bus driver,>5 years,RENT,95040.0,2014-07-22,0,Debt consolidation,14.0,2016-01-11,,2014,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9995,23750.0,11.0019,B,Civil Servant,<=5 years,MORTGAGE,98000.0,2010-01-14,0,Other,12.0,2010-11-03,,2010,4
9996,9996,11880.0,10.4923,B,Civil Servant,<=5 years,RENT,36270.0,2015-03-20,0,Debt consolidation,17.0,2016-01-26,,2015,4
9997,9997,19950.0,8.4364,B,Accountant,>5 years,OWNER,52250.0,2015-04-03,0,Debt consolidation,9.0,2015-12-12,,2015,1
9998,9998,4850.0,13.6409,C,Bus driver,>5 years,RENT,80040.0,2014-11-23,0,Debt consolidation,26.0,2016-01-01,,2014,3


In [19]:
# Is weekend
df['date_issued'].dt.dayofweek >= 5  # gives boolean flag

0       False
1       False
2        True
3       False
4       False
        ...  
9995    False
9996    False
9997    False
9998     True
9999    False
Name: date_issued, Length: 10000, dtype: bool

In [21]:
# Is month start / end

df['date_issued'].dt.is_month_start
df['date_issued'].dt.is_month_end

0       False
1       False
2       False
3       False
4       False
        ...  
9995    False
9996    False
9997    False
9998    False
9999    False
Name: date_issued, Length: 10000, dtype: bool

In [22]:
# Is quarter start / end

df['date_issued'].dt.is_quarter_start
df['date_issued'].dt.is_quarter_end

0       False
1       False
2       False
3       False
4       False
        ...  
9995    False
9996    False
9997    False
9998    False
9999    False
Name: date_issued, Length: 10000, dtype: bool

In [23]:
# Is year start / end

df['date_issued'].dt.is_year_start
df['date_issued'].dt.is_year_end

0       False
1       False
2       False
3       False
4       False
        ...  
9995    False
9996    False
9997    False
9998    False
9999    False
Name: date_issued, Length: 10000, dtype: bool