In [1]:
import numpy as np
import pandas as pd

# General examples of manipulating datetime, time stamps, time delta

## Epoch time

https://en.wikipedia.org/wiki/Unix_time

https://www.epochconverter.com/clock

In [2]:
# convert from epoch time (in UTC) to pandas
nparray = np.array([1604962800, 1610146800]).astype('datetime64[s]')
nparray

array(['2020-11-09T23:00:00', '2021-01-08T23:00:00'],
      dtype='datetime64[s]')

In [3]:
# create a pandas date time index
pd.to_datetime(nparray)

DatetimeIndex(['2020-11-09 23:00:00', '2021-01-08 23:00:00'], dtype='datetime64[ns]', freq=None)

In [4]:
# each element is of the type timestamp
pd.to_datetime(nparray)[-1]

Timestamp('2021-01-08 23:00:00')

In [5]:
# so far it was not time zone aware, tell it it was the time in UTC
# careful if you go all the way to 1970s you need to consider the leap seconds
pd.to_datetime(nparray, utc = True)

DatetimeIndex(['2020-11-09 23:00:00+00:00', '2021-01-08 23:00:00+00:00'], dtype='datetime64[ns, UTC]', freq=None)

In [6]:
pd.to_datetime(nparray, utc = True)[-1]

Timestamp('2021-01-08 23:00:00+0000', tz='UTC')

In [7]:
# it is already time zone (tz) aware, so let's find out the local time at New York for these times
pd.to_datetime(nparray, utc = True).tz_convert("America/New_York")

DatetimeIndex(['2020-11-09 18:00:00-05:00', '2021-01-08 18:00:00-05:00'], dtype='datetime64[ns, America/New_York]', freq=None)

In [8]:
pd.to_datetime(nparray, utc = True).tz_convert("America/New_York")[-1]

Timestamp('2021-01-08 18:00:00-0500', tz='America/New_York')

## The current time on the local computer

In [9]:
# it is indeed the local time, but it is not time zone aware
pd.Timestamp.today()

Timestamp('2021-01-21 23:05:26.944385')

In [10]:
# keep just the date
pd.Timestamp.today().date()

datetime.date(2021, 1, 21)

In [11]:
# convert date to a string
str(pd.Timestamp.today().date())

'2021-01-21'

In [12]:
# create a datetime (Timestamp) from just the date
pd.to_datetime(str(pd.Timestamp.today().date()))

Timestamp('2021-01-21 00:00:00')

In [13]:
# the one liner to do the same thing is
pd.Timestamp.today().normalize()

Timestamp('2021-01-21 00:00:00')

In [14]:
# if this time was at New York
pd.Timestamp.today().normalize().tz_localize("America/New_York")

Timestamp('2021-01-21 00:00:00-0500', tz='America/New_York')

In [15]:
# but it is in Berlin, so if we want to find out what is the current time in new York
# first we let the current time know its time zone, then we convert
pd.Timestamp.today().tz_localize("Europe/Berlin")

Timestamp('2021-01-21 23:05:27.233573+0100', tz='Europe/Berlin')

In [16]:
pd.Timestamp.today().tz_localize("Europe/Berlin").tz_convert("America/New_York")

Timestamp('2021-01-21 17:05:27.272952-0500', tz='America/New_York')

In [17]:
# get the string of the date
str(pd.Timestamp.today().tz_localize("Europe/Berlin").tz_convert("America/New_York").date())

'2021-01-21'

## Adding or subtracting time intervals using timedelta

In [18]:
ts = pd.Timestamp.today().tz_localize("Europe/Berlin").tz_convert("America/New_York")
ts

Timestamp('2021-01-21 17:05:27.349954-0500', tz='America/New_York')

In [19]:
# add one day
ts + pd.Timedelta(1, "d")

Timestamp('2021-01-22 17:05:27.349954-0500', tz='America/New_York')

In [20]:
# subtract one day
ts - pd.Timedelta(1, "d")

Timestamp('2021-01-20 17:05:27.349954-0500', tz='America/New_York')

In [21]:
# add 30 minutes

In [22]:
ts + pd.Timedelta(30, "m")

Timestamp('2021-01-21 17:35:27.349954-0500', tz='America/New_York')

In [23]:
str(ts.normalize())

'2021-01-21 00:00:00-05:00'

In [24]:
pd.Timestamp.today() - pd.Timedelta(30, "d")

Timestamp('2020-12-22 23:05:27.583382')

In [25]:
(pd.Timestamp.today() - pd.Timedelta(30, "d")).tz_localize("Europe/Berlin").tz_convert("America/New_York")

Timestamp('2020-12-22 17:05:27.619647-0500', tz='America/New_York')

In [26]:
nparray = np.array([1607591400, 1607591700]).astype('datetime64[s]')
nparray

array(['2020-12-10T09:10:00', '2020-12-10T09:15:00'],
      dtype='datetime64[s]')

# What info can be extract from a datetime

In [27]:
# public dataset: https://archive.ics.uci.edu/ml/datasets/Seoul+Bike+Sharing+Demand
INPUT_FOLDER_STEM = "/Users/abuzatu/Work/data/geo_spatial_time_series/bike_rental_seoul"
input_file_name = f"{INPUT_FOLDER_STEM}/SeoulBikeData.csv"

# to avoid error of UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb0 in position 12: invalid start byte
# due to symbol of degree, use encoding = "ISO-8859-1"
df_ = pd.read_csv(input_file_name, encoding = "ISO-8859-1")
df_

Unnamed: 0,Date,Rented Bike Count,Hour,Temperature(°C),Humidity(%),Wind speed (m/s),Visibility (10m),Dew point temperature(°C),Solar Radiation (MJ/m2),Rainfall(mm),Snowfall (cm),Seasons,Holiday,Functioning Day
0,01/12/2017,254,0,-5.2,37,2.2,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
1,01/12/2017,204,1,-5.5,38,0.8,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
2,01/12/2017,173,2,-6.0,39,1.0,2000,-17.7,0.0,0.0,0.0,Winter,No Holiday,Yes
3,01/12/2017,107,3,-6.2,40,0.9,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
4,01/12/2017,78,4,-6.0,36,2.3,2000,-18.6,0.0,0.0,0.0,Winter,No Holiday,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,30/11/2018,1003,19,4.2,34,2.6,1894,-10.3,0.0,0.0,0.0,Autumn,No Holiday,Yes
8756,30/11/2018,764,20,3.4,37,2.3,2000,-9.9,0.0,0.0,0.0,Autumn,No Holiday,Yes
8757,30/11/2018,694,21,2.6,39,0.3,1968,-9.9,0.0,0.0,0.0,Autumn,No Holiday,Yes
8758,30/11/2018,712,22,2.1,41,1.0,1859,-9.8,0.0,0.0,0.0,Autumn,No Holiday,Yes


In [28]:
df = df_.copy()
df["date"] = pd.to_datetime(df.Date)
df = df[["date"]]
df.set_index("date", inplace = True)
df.head(1)

2017-01-12


In [29]:
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timestamp.dst.html
# extract info from Timestamp
#
df["date"] = df.index.date
df["time"] = df.index.time
#
df["year"] = df.index.year
df["month"] = df.index.month
df["day"] = df.index.day
df["hour"] = df.index.hour
df["minute"] = df.index.minute
df["second"] = df.index.second
df["nanosecond"] = df.index.nanosecond
#
df["day_name"] = df.index.day_name()
df["month_name"] = df.index.month_name()
#
df["quarter"] = df.index.quarter
df["dayofyear"] = df.index.dayofyear
df["dayofweek"] = df.index.dayofweek
df["day_of_week"] = df.index.dayofweek
df["days_in_month"] = df.index.days_in_month
df["days_in_month"] = df.index.daysinmonth

df["is_leap_year"] = df.index.is_leap_year
#
df["is_month_start"] = df.index.is_month_start
df["is_month_end"] = df.index.is_month_end
df["is_quarter_start"] = df.index.is_quarter_start
df["is_quarter_end"] = df.index.is_quarter_end
df["is_year_start"] = df.index.is_year_start
df["is_year_end"] = df.index.is_year_end
#

#
df["tz"] = df.index.tz
df["tzinfo"] = df.index.tzinfo
df["resolution"] = df.index.resolution
#
df["now"] = pd.Timestamp.now()
#
df.iloc[0]

date                                2017-01-12
time                                  00:00:00
year                                      2017
month                                        1
day                                         12
hour                                         0
minute                                       0
second                                       0
nanosecond                                   0
day_name                              Thursday
month_name                             January
quarter                                      1
dayofyear                                   12
dayofweek                                    3
day_of_week                                  3
days_in_month                               31
is_leap_year                             False
is_month_start                           False
is_month_end                             False
is_quarter_start                         False
is_quarter_end                           False
is_year_start