https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html  

| Concept      | Scalar Class | Array Class    | pandas Data Type                     | Primary Creation Method         |
| ------------ | ------------ | -------------- | ------------------------------------ | ------------------------------- |
| Date times   | Timestamp    | DatetimeIndex  | datetime64[ns] or datetime64[ns, tz] | to_datetime or date_range       |
| Time deltas  | Timedelta    | TimedeltaIndex | timedelta64[ns]                      | to_timedelta or timedelta_range |
| Time spans   | Period       | PeriodIndex    | period[freq]                         | Period or period_range          |
| Date offsets | DateOffset   | None           | None                                 | DateOffset                      |

In [1]:
import datetime
import numpy as np
import pandas as pd

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
def random_datetimes_or_dates(start, end, out_format='datetime', size=10): 
    '''   
    https://stackoverflow.com/questions/50559078/generating-random-dates-within-a-given-range-in-pandas
    
    unix timestamp is in ns by default. 
    I divide the unix time value by 10**9 to make it seconds (or 24*60*60*10**9 to make it days).
    The corresponding unit variable is passed to the pd.to_datetime function. 
    Values for the (divide_by, unit) pair to select is defined by the out_format parameter.
    for 1 -> out_format='datetime'
    for 2 -> out_format=anything else
    '''
    (divide_by, unit) = (10**9, 's') if out_format=='datetime' else (24*60*60*10**9, 'D')

    start_u = start.value//divide_by
    end_u = end.value//divide_by

    return pd.to_datetime(np.random.randint(start_u, end_u, size), unit=unit) 

# Time

In [4]:
today = pd.to_datetime('today')
now   = pd.to_datetime('now')

In [5]:
pd.Timestamp(year=2017, month=1, day=1, hour=12)

Timestamp('2017-01-01 12:00:00')

In [6]:
pd.to_datetime(32400, unit='s')

Timestamp('1970-01-01 09:00:00')

In [7]:
timestamp = pd.to_datetime('1970-01-01 09:00')
# timestamp = timestamp + pd.Timedelta("1 day")
# timestamp = timestamp + pd.Timedelta(hours=24)
# timestamp = timestamp + pd.Timedelta(hours=1, minutes=1)

In [8]:
timestamp

Timestamp('1970-01-01 09:00:00')

In [9]:
timestamp.strftime('%Y-%m-%d %H:%M:%S')

'1970-01-01 09:00:00'

In [10]:
(timestamp.year, 
timestamp.month, 
timestamp.day, 
timestamp.hour, 
timestamp.minute, 
timestamp.second)

(1970, 1, 1, 9, 0, 0)

In [11]:
timestamp.timestamp()

32400.0

In [12]:
timestamp.date()

datetime.date(1970, 1, 1)

In [13]:
timestamp.time()

datetime.time(9, 0)

In [14]:
timestamp.replace(day=2, 
                  month=10, 
                  year=2022,
                  
                  hour=1,
                  minute=9,
                  second=7)

Timestamp('2022-10-02 01:09:07')

In [15]:
# datetime.datetime.strptime('Jun 1 2005  1:33PM', '%b %d %Y %I:%M%p')
# datetime.datetime.strptime('20211213', '%Y%m%d')
pd.to_datetime('20211213', format='%Y%m%d')

Timestamp('2021-12-13 00:00:00')

In [16]:
str(timestamp)

'1970-01-01 09:00:00'

In [18]:
pd.to_datetime(10, unit='D').isoformat()

'1970-01-11T00:00:00'

# Time Zone

In [19]:
ts = pd.Timestamp('2020-03-14T15:32:52.192548651')
ts.tz_localize(tz='Europe/Stockholm')
pd.Timestamp('2020-03-14 15:32:52.192548651+0100', tz='Europe/Stockholm').isoformat()

'2020-03-14T15:32:52.192548651+01:00'

# Rounding

In [13]:
timestamp = pd.to_datetime('1907-01-01 09:32')
timestamp = timestamp.round('H')

# Timedelta

https://stackoverflow.com/questions/8906926/formatting-timedelta-objects/8907269#8907269

In [19]:
nb_mins = np.random.normal(60, 10)
timedelta = pd.Timedelta(days=0, hours=1, minutes=nb_mins)

In [20]:
nb_mins

59.2558260884738

In [21]:
timedelta.seconds

7155

In [22]:
str(datetime.timedelta(seconds=timedelta.seconds))

'1:59:15'

In [23]:
# duration.length

# Series

In [30]:
# ts.between_time('0:15', '0:45')

# Interval

In [18]:
interval = pd.Interval(pd.to_datetime('1970-01-01 09:00'), 
                       pd.to_datetime('1970-01-01 10:00'), 
                       closed='left')

In [19]:
interval

Interval('1970-01-01 09:00:00', '1970-01-01 10:00:00', closed='left')

# Dataframe

In [29]:
data = pd.DataFrame()
data['datetime'] = random_datetimes_or_dates(start=pd.to_datetime('2021-10-01 09:00'), 
                                             end=pd.to_datetime('2021-10-01 18:00'), 
                                             size=10)
# data['datetime'] = data['datetime'].apply(lambda x: x.time())

In [12]:
data

Unnamed: 0,datetime
0,2021-10-01 10:46:35
1,2021-10-01 12:54:44
2,2021-10-01 13:35:00
3,2021-10-01 17:12:19
4,2021-10-01 14:09:06
5,2021-10-01 11:21:47
6,2021-10-01 11:03:41
7,2021-10-01 13:12:13
8,2021-10-01 13:43:50
9,2021-10-01 15:03:32


In [13]:
for _,df in data.groupby(pd.Grouper(key='datetime', freq='60Min')):
    display(df)

Unnamed: 0,datetime
0,2021-10-01 10:46:35


Unnamed: 0,datetime
6,2021-10-01 11:03:41
5,2021-10-01 11:21:47


Unnamed: 0,datetime
1,2021-10-01 12:54:44


Unnamed: 0,datetime
7,2021-10-01 13:12:13
2,2021-10-01 13:35:00
8,2021-10-01 13:43:50


Unnamed: 0,datetime
4,2021-10-01 14:09:06


Unnamed: 0,datetime
9,2021-10-01 15:03:32


Unnamed: 0,datetime


Unnamed: 0,datetime
3,2021-10-01 17:12:19


# Interval

In [33]:
interval_00 = pd.Interval(pd.to_datetime('1970-01-01 09:00'), 
                       pd.to_datetime('1970-01-01 10:00'), 
                       closed='left')

interval_01 = pd.Interval(pd.to_datetime('1970-01-01 09:30'), 
                       pd.to_datetime('1970-01-01 10:30'), 
                       closed='left')

In [34]:
interval_00

Interval('1970-01-01 09:00:00', '1970-01-01 10:00:00', closed='left')

In [36]:
pd.Timestamp('1970-01-01 09:30') in interval_00

True

In [38]:
interval_00.overlaps(interval_01)

True

# Date / Time Range  

https://pandas.pydata.org/docs/user_guide/timeseries.html#timeseries-offset-aliases

In [4]:
pd.date_range(start='09:00:00', 
              end='12:00:00',
              freq='H')

DatetimeIndex(['2022-02-16 09:00:00', '2022-02-16 10:00:00',
               '2022-02-16 11:00:00', '2022-02-16 12:00:00'],
              dtype='datetime64[ns]', freq='H')

In [6]:
pd.date_range(start='09:00:00', 
              freq='H', 
              periods=4)

DatetimeIndex(['2022-02-16 09:00:00', '2022-02-16 10:00:00',
               '2022-02-16 11:00:00', '2022-02-16 12:00:00'],
              dtype='datetime64[ns]', freq='H')

In [18]:
pd.date_range(start='1/1/2018', 
              end='1/08/2018',
              freq='D')

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'],
              dtype='datetime64[ns]', freq='D')

In [19]:
pd.date_range(start='1/1/2018', 
              periods=8)

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'],
              dtype='datetime64[ns]', freq='D')

In [20]:
pd.date_range(start='2018-04-24', 
              end='2018-04-27', 
              periods=3)

DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00',
               '2018-04-27 00:00:00'],
              dtype='datetime64[ns]', freq=None)

# Resampling

In [4]:
# index = pd.date_range('1/1/1970', periods=9, freq='2T')

index = random_datetimes_or_dates(start=pd.to_datetime('1970-01-01 09:00'), 
                                        end=pd.to_datetime('1970-01-01 09:30'), 
                                        size=9)

series = pd.Series(range(9), index=index)
series = series.sort_index()

In [5]:
series

1970-01-01 09:05:10    3
1970-01-01 09:05:38    7
1970-01-01 09:09:13    2
1970-01-01 09:12:38    5
1970-01-01 09:13:40    6
1970-01-01 09:19:30    8
1970-01-01 09:21:52    0
1970-01-01 09:24:55    4
1970-01-01 09:26:47    1
dtype: int64

In [55]:
series.resample('T').sum() #.head()

1970-01-01 09:03:00    14
1970-01-01 09:04:00     3
1970-01-01 09:05:00     0
1970-01-01 09:06:00     0
1970-01-01 09:07:00     2
1970-01-01 09:08:00     0
1970-01-01 09:09:00     0
1970-01-01 09:10:00     0
1970-01-01 09:11:00     0
1970-01-01 09:12:00     0
1970-01-01 09:13:00     0
1970-01-01 09:14:00     0
1970-01-01 09:15:00     4
1970-01-01 09:16:00     0
1970-01-01 09:17:00     5
1970-01-01 09:18:00     0
1970-01-01 09:19:00     1
1970-01-01 09:20:00     0
1970-01-01 09:21:00     0
1970-01-01 09:22:00     0
1970-01-01 09:23:00     7
Freq: T, dtype: int64

In [47]:
series.resample('4T').sum().head()

2000-01-01 00:00:00     1
2000-01-01 00:04:00     5
2000-01-01 00:08:00     9
2000-01-01 00:12:00    13
2000-01-01 00:16:00     8
Freq: 4T, dtype: int64

method 2

In [26]:
index = pd.date_range('1/1/1970', periods=9, freq='T')
series = pd.Series(range(9), index=index, name='datetime')

data = pd.DataFrame()
data['timestamp'] = random_datetimes_or_dates(start=pd.to_datetime('1970-01-01 09:00'), 
                                              end=pd.to_datetime('1970-01-01 11:00'), 
                                              size=10)
data['value']    = np.random.randint(low=0, high=100+1, size=len(data))

In [31]:
# series

In [32]:
# data

In [30]:
data.merge(series, how='outer', left_on='timestamp', right_index=True).sort_values('timestamp')

Unnamed: 0,timestamp,value,datetime
,1970-01-01 00:00:00,,0.0
,1970-01-01 00:01:00,,1.0
,1970-01-01 00:02:00,,2.0
,1970-01-01 00:03:00,,3.0
,1970-01-01 00:04:00,,4.0
,1970-01-01 00:05:00,,5.0
,1970-01-01 00:06:00,,6.0
,1970-01-01 00:07:00,,7.0
,1970-01-01 00:08:00,,8.0
8.0,1970-01-01 09:19:05,10.0,


# Overlaps

https://stackoverflow.com/questions/66661492/identifying-overlapping-events-datetime-records-in-a-pandas-dataframe

In [60]:
def overlap(df, flag=True):
    
    if flag:
        s, e = df[['start_datetime', 'end_datetime']].to_numpy().T
    else:
        s, e = np.array([data['interval'].map(lambda x: x.left), 
                         data['interval'].map(lambda x: x.right)])
    
    m1 = (s[:, None] > s) & (s[:, None] < e) # Check if start time overlap
    m2 = (e[:, None] < e) & (e[:, None] > s) # Check if ending time overlap

    return (m1 | m2).any(1)

In [61]:
data = pd.DataFrame()

data['start_datetime'] = [ '02:00:00', 
                           '04:00:00', 
                           '06:00:00', 
                           '08:00:00', 
                           '10:00:00', ]
data['end_datetime'] = [ '04:00:00', 
                         '08:00:00', 
                         '10:00:00', 
                         '11:00:00', 
                         '12:00:00', ]

# data['start_datetime'] = [ '2000-01-01 02:23:49', 
#                          '1997-12-20 07:22:10', 
#                          '2000-01-05 03:42:29', 
#                          '2002-02-25 17:20:09', 
#                          '1999-06-30 03:33:20' ]
# data['end_datetime'] = [ '2000-01-06 04:50:20', 
#                        '1998-12-20 01:24:12', 
#                        '2000-03-01 11:01:11', 
#                        '2003-02-25 22:05:02', 
#                        '2000-01-01 02:50:30' ]

data['start_datetime'] = pd.to_datetime(data['start_datetime'])
data['end_datetime'] = pd.to_datetime(data['end_datetime'])

data['interval'] = data.apply(lambda x: pd.Interval(x['start_datetime'], x['end_datetime'], closed='left'), axis=1)

data = data.sort_values('start_datetime')

In [62]:
data['overlap']  = overlap(data, flag=True)
data['overlap_'] = overlap(data, flag=False)

In [63]:
data

Unnamed: 0,start_datetime,end_datetime,interval,overlap,overlap_
0,2021-11-16 02:00:00,2021-11-16 04:00:00,"[2021-11-16 02:00:00, 2021-11-16 04:00:00)",False,False
1,2021-11-16 04:00:00,2021-11-16 08:00:00,"[2021-11-16 04:00:00, 2021-11-16 08:00:00)",True,True
2,2021-11-16 06:00:00,2021-11-16 10:00:00,"[2021-11-16 06:00:00, 2021-11-16 10:00:00)",True,True
3,2021-11-16 08:00:00,2021-11-16 11:00:00,"[2021-11-16 08:00:00, 2021-11-16 11:00:00)",True,True
4,2021-11-16 10:00:00,2021-11-16 12:00:00,"[2021-11-16 10:00:00, 2021-11-16 12:00:00)",True,True
