# Examples in pandas for datetime processing

In [27]:
# Get data
import pandas as pd
import datetime

dat = [ 
["2015-12-21 03:21", 11],
["first missing datetime", -1],
["second missing datetime", -2],
["2016-01-13 05:33", 22],
["2016-02-02 06:43", 33],
["2017-06-29 07:56", 44]
]

df = pd.DataFrame(dat, columns=['datetime', 'val'])
print(df)

                  datetime  val
0         2015-12-21 03:21   11
1   first missing datetime   -1
2  second missing datetime   -2
3         2016-01-13 05:33   22
4         2016-02-02 06:43   33
5         2017-06-29 07:56   44


## Convert string fields to datetime fields

In [28]:
# Coerce errors i.e. convert unparseable data to NaT (Not a Time) 
# For faster parsing, always specify the date format if possible, see: http://strftime.org
print('Add columns and parse data string to type: datetime')
df['datetime'] = pd.to_datetime(df['datetime'], errors='coerce', format='%Y-%m-%d %H:%M') 
df['day'] = df['datetime'].apply(lambda x: x.day)
df['hour'] = df['datetime'].apply(lambda x: x.hour)
df['minute'] = df['datetime'].apply(lambda x: x.minute)

print(df.to_string())

Add columns and parse data string to type: datetime
             datetime  val   day  hour  minute
0 2015-12-21 03:21:00   11  21.0   3.0    21.0
1                 NaT   -1   NaN   NaN     NaN
2                 NaT   -2   NaN   NaN     NaN
3 2016-01-13 05:33:00   22  13.0   5.0    33.0
4 2016-02-02 06:43:00   33   2.0   6.0    43.0
5 2017-06-29 07:56:00   44  29.0   7.0    56.0


## Handle missing data, remove NaT (Not a Time) rows

In [29]:
# Remove rows with missing datetime values:
print("Remove NaT rows:")
print(df[df['datetime'].isnull()])
print()
print("Remaining rows with valid datetime:")
df = df[df['datetime'].notnull()]

# Convert floats to ints
df['day'] = df['day'].astype(int)
df['hour'] = df['hour'].astype(int)
df['minute'] = df['minute'].astype(int)

print(df.to_string())

Remove NaT rows:
  datetime  val  day  hour  minute
1      NaT   -1  NaN   NaN     NaN
2      NaT   -2  NaN   NaN     NaN

Remaining rows with valid datetime:
             datetime  val  day  hour  minute
0 2015-12-21 03:21:00   11   21     3      21
3 2016-01-13 05:33:00   22   13     5      33
4 2016-02-02 06:43:00   33    2     6      43
5 2017-06-29 07:56:00   44   29     7      56


## Calculate days & months since first entry

In [30]:
# Calculate number of days and months since first row entry 
df['days_diff_period'] = df['datetime'].apply(lambda x: x.to_period('D') - df['datetime'].iloc[0].to_period('D'))
df['month_diff_period'] = df['datetime'].apply(lambda x: x.to_period('M') - df['datetime'].iloc[0].to_period('M'))

# Note that the above use of 'to_period' results in the calculation of 
# 'month_diff' calculated by only using the month value (regardless of day).
print(df.to_string())

             datetime  val  day  hour  minute  days_diff_period  month_diff_period
0 2015-12-21 03:21:00   11   21     3      21                 0                  0
3 2016-01-13 05:33:00   22   13     5      33                23                  1
4 2016-02-02 06:43:00   33    2     6      43                43                  2
5 2017-06-29 07:56:00   44   29     7      56               556                 18


## Add a time delta to a datetime object

In [31]:
# Add 'd' days to calculate a new datetime
d = 30
df['date_after_30d'] = df['datetime'].apply(lambda x: x + pd.Timedelta(d, unit='d'))
print(df.to_string())

             datetime  val  day  hour  minute  days_diff_period  month_diff_period      date_after_30d
0 2015-12-21 03:21:00   11   21     3      21                 0                  0 2016-01-20 03:21:00
3 2016-01-13 05:33:00   22   13     5      33                23                  1 2016-02-12 05:33:00
4 2016-02-02 06:43:00   33    2     6      43                43                  2 2016-03-03 06:43:00
5 2017-06-29 07:56:00   44   29     7      56               556                 18 2017-07-29 07:56:00


## Convert Unix time 

In [32]:
# The unix epoch is the number of nanoseconds that have elapsed 
# since the universe was created
# i.e. January 1, 1970 (midnight UTC/GMT), not counting leap seconds)
print("1 second:", pd.to_datetime(1000000000, unit='ns'))
print("Example 2:", pd.to_datetime(1490195805433502912, unit='ns'))

1 second: 1970-01-01 00:00:01
Example 2: 2017-03-22 15:16:45.433502912
