# Examples in pandas for datetime processing

In [1]:
# Get data
import pandas as pd

dat = [ 
["2015-12-21 03:21", 11],
["a missing datetime", -1],
["another missing datetime", -99],
["2016-01-13 05:33", 22],
["2016-02-02 06:43", 33],
["2017-06-29 07:56", 44]]

df = pd.DataFrame(dat, columns=['datetime', 'val'])

## Convert to datetime

In [2]:
# Process date column:
# Coerce errors i.e. convert unparseable data to NaT (Not a Time) 
# For faster parsing, always specify the date format if possible, see: http://strftime.org
print('Add columns and parse data string to type: datetime')
df['datetime'] = pd.to_datetime(df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M') 
df['day'] = df['datetime'].apply(lambda x: x.day)
df['hour'] = df['datetime'].apply(lambda x: x.hour)
df['minute'] = df['datetime'].apply(lambda x: x.minute)
print(df)

Add columns and parse data string to type: datetime
             datetime  val   day  hour  minute
0 2015-12-21 03:21:00   11  21.0   3.0    21.0
1                 NaT   -1   NaN   NaN     NaN
2                 NaT  -99   NaN   NaN     NaN
3 2016-01-13 05:33:00   22  13.0   5.0    33.0
4 2016-02-02 06:43:00   33   2.0   6.0    43.0
5 2017-06-29 07:56:00   44  29.0   7.0    56.0


## Remove NaT (Not a Time) rows

In [3]:
# Remove rows with missing datetime values:
print("Remove NaT rows:")
print(df[df['datetime'].isnull()])
print()
print("Remaining rows with valid datetime:")
df = df[df['datetime'].notnull()]
print(df)

Remove NaT rows:
  datetime  val  day  hour  minute
1      NaT   -1  NaN   NaN     NaN
2      NaT  -99  NaN   NaN     NaN

Remaining rows with valid datetime:
             datetime  val   day  hour  minute
0 2015-12-21 03:21:00   11  21.0   3.0    21.0
3 2016-01-13 05:33:00   22  13.0   5.0    33.0
4 2016-02-02 06:43:00   33   2.0   6.0    43.0
5 2017-06-29 07:56:00   44  29.0   7.0    56.0


## Calculate days & months since first entry

In [4]:
# Calculate number of days and months since first row entry 
df['days_diff'] = df['datetime'].apply(lambda x: x.to_period('D') - df['datetime'].iloc[0].to_period('D'))
df['month_diff'] = df['datetime'].apply(lambda x: x.to_period('M') - df['datetime'].iloc[0].to_period('M'))
print(df.to_string())
print(">> Note that the above use of 'to_period' results in the calculation of 'month_diff' based \
only on transitions between months, regardless of the day of the month e.g. 22 days are counted as \
a whole month because (simplistically) Jan 2016 comes one month after Dec 2015.")

             datetime  val   day  hour  minute  days_diff  month_diff
0 2015-12-21 03:21:00   11  21.0   3.0    21.0          0           0
3 2016-01-13 05:33:00   22  13.0   5.0    33.0         23           1
4 2016-02-02 06:43:00   33   2.0   6.0    43.0         43           2
5 2017-06-29 07:56:00   44  29.0   7.0    56.0        556          18
>> Note that the above use of 'to_period' results in the calculation of 'month_diff' based only on transitions between months, regardless of the day of the month e.g. 22 days are counted as a whole month because (simplistically) Jan 2016 comes one month after Dec 2015.


## Add a time delta to a datetime object

In [5]:
# Add d days to calculate a new datetime
d = 30
df['date_after_30d'] = df['datetime'].apply(lambda x: x + pd.Timedelta(d, unit='d'))
print(df.to_string())

             datetime  val   day  hour  minute  days_diff  month_diff      date_after_30d
0 2015-12-21 03:21:00   11  21.0   3.0    21.0          0           0 2016-01-20 03:21:00
3 2016-01-13 05:33:00   22  13.0   5.0    33.0         23           1 2016-02-12 05:33:00
4 2016-02-02 06:43:00   33   2.0   6.0    43.0         43           2 2016-03-03 06:43:00
5 2017-06-29 07:56:00   44  29.0   7.0    56.0        556          18 2017-07-29 07:56:00


## Convert Unix time 

In [6]:
# The unix epoch is the number of seconds that have elapsed since January 1, 1970 (midnight UTC/GMT), not counting leap seconds)
print(pd.to_datetime(1490195805433502912, unit='ns'))

2017-03-22 15:16:45.433502912
