In [1]:
# parse dates and times using Pandas' "to_datetime" function
import pandas as pd

pd.to_datetime('2019-07-22')

Timestamp('2019-07-22 00:00:00')

In [2]:
# pandas uses the terms "datetime" and "timestamp" to mean
# pretty much the same thing

In [3]:
# pandas will infer the format if it's not ambiguous
pd.to_datetime('July 22, 2019 12:34:56')

Timestamp('2019-07-22 12:34:56')

In [4]:
pd.to_datetime('7/22/2019')

Timestamp('2019-07-22 00:00:00')

In [5]:
pd.to_datetime('22 Jul 2019 1:30pm')

Timestamp('2019-07-22 13:30:00')

In [6]:
# you should probably be using the UTC timezone for everything
# to do this, pass utc=True to to_datetime
# note how a timezone offset is shown
pd.to_datetime('22 July 2019 6:30pm', utc=True)

Timestamp('2019-07-22 18:30:00+0000', tz='UTC')

In [7]:
# represent relative times
pd.to_timedelta('2d')

Timedelta('2 days 00:00:00')

In [8]:
# do date arithmetic
from datetime import datetime

current_time = pd.to_datetime(datetime.utcnow(), utc=True)
current_time + pd.to_timedelta('12h')

Timestamp('2019-07-19 07:18:45.413542+0000', tz='UTC')

In [9]:
current_time - pd.to_datetime('July 22 2019 16:00', utc=True)

Timedelta('-4 days +03:18:45.413542')

In [10]:
# timestamps in pandas are either "timezone-aware" or not.
# temporal operations won't work between timezone-aware and
# non-timezone-aware timestamps

# here's a non-timezone-aware timestamp
time_a = pd.to_datetime('2019-07-22 12:34:56')
time_a

Timestamp('2019-07-22 12:34:56')

In [11]:
# if your string has a timezone notation in it, Pandas will parse it
time_b = pd.to_datetime('2019-07-22 12:34:56 +0000')
time_b

Timestamp('2019-07-22 12:34:56+0000', tz='UTC')

In [12]:
# if it doesn't, use utc=True when parsing
time_c = pd.to_datetime('2019-07-22 12:34:56', utc=True)
time_c

Timestamp('2019-07-22 12:34:56+0000', tz='UTC')

In [13]:
# time operations like comparison do not work unless
# either both timezones are non-timezone-aware or
# they are both timezone-aware

try:
    time_a > time_b
except TypeError as e:
    print(e)

Cannot compare tz-naive and tz-aware timestamps


In [14]:
# ^ this comes up in Pandas applications all the time
# because a lot of tabular data uses timestamps with no timezone notation

# always use utc=True, unless you're using a different timezone

In [15]:
# note that to_datetime works for iterables including arrays
# and Pandas Series

time_strings = [
    '1969-10-27',
    '1971-03-14',
    '2009-07-04'
]
timestamps = pd.to_datetime(time_strings, utc=True)
timestamps

DatetimeIndex(['1969-10-27 00:00:00+00:00', '1971-03-14 00:00:00+00:00',
               '2009-07-04 00:00:00+00:00'],
              dtype='datetime64[ns, UTC]', freq=None)

In [16]:
# and operations on them are vectorized
a_day = pd.to_timedelta('1d')

timestamps + a_day

DatetimeIndex(['1969-10-28 00:00:00+00:00', '1971-03-15 00:00:00+00:00',
               '2009-07-05 00:00:00+00:00'],
              dtype='datetime64[ns, UTC]', freq=None)

In [17]:
timestamps > pd.to_datetime('2000-01-01', utc=True)

array([False, False,  True])