In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
date_string = np.array(['30-08-2023 07:12 PM', '29-08-2023 05:10 PM', '28-08-2023 03:14 PM'])

In [4]:
# Standard format: YYYYMMDDHHMMSS + TZ

[pd.to_datetime(date,format='%d-%m-%Y %I:%M %p') for date in date_string]

[Timestamp('2023-08-30 19:12:00'),
 Timestamp('2023-08-29 17:10:00'),
 Timestamp('2023-08-28 15:14:00')]

In [5]:
[pd.to_datetime(date, format='%d-%m-%Y %I:%M %p', errors='coerce') for date in date_string]

[Timestamp('2023-08-30 19:12:00'),
 Timestamp('2023-08-29 17:10:00'),
 Timestamp('2023-08-28 15:14:00')]

In [6]:
pd.Timestamp('2023-08-30 19:12:00', tz='Europe/London')

Timestamp('2023-08-30 19:12:00+0100', tz='Europe/London')

In [7]:
pd.Timestamp('2023-08-30 19:12:00', tz='America/Lima')

Timestamp('2023-08-30 19:12:00-0500', tz='America/Lima')

In [8]:
from pytz import all_timezones

In [9]:
all_timezones[500:550]

['Indian/Antananarivo',
 'Indian/Chagos',
 'Indian/Christmas',
 'Indian/Cocos',
 'Indian/Comoro',
 'Indian/Kerguelen',
 'Indian/Mahe',
 'Indian/Maldives',
 'Indian/Mauritius',
 'Indian/Mayotte',
 'Indian/Reunion',
 'Iran',
 'Israel',
 'Jamaica',
 'Japan',
 'Kwajalein',
 'Libya',
 'MET',
 'MST',
 'MST7MDT',
 'Mexico/BajaNorte',
 'Mexico/BajaSur',
 'Mexico/General',
 'NZ',
 'NZ-CHAT',
 'Navajo',
 'PRC',
 'PST8PDT',
 'Pacific/Apia',
 'Pacific/Auckland',
 'Pacific/Bougainville',
 'Pacific/Chatham',
 'Pacific/Chuuk',
 'Pacific/Easter',
 'Pacific/Efate',
 'Pacific/Enderbury',
 'Pacific/Fakaofo',
 'Pacific/Fiji',
 'Pacific/Funafuti',
 'Pacific/Galapagos',
 'Pacific/Gambier',
 'Pacific/Guadalcanal',
 'Pacific/Guam',
 'Pacific/Honolulu',
 'Pacific/Johnston',
 'Pacific/Kanton',
 'Pacific/Kiritimati',
 'Pacific/Kosrae',
 'Pacific/Kwajalein',
 'Pacific/Majuro']

In [10]:
df = pd.DataFrame()

In [11]:
df['date'] = pd.date_range('1/1/2023', periods=10000, freq='H')
df

Unnamed: 0,date
0,2023-01-01 00:00:00
1,2023-01-01 01:00:00
2,2023-01-01 02:00:00
3,2023-01-01 03:00:00
4,2023-01-01 04:00:00
...,...
9995,2024-02-21 11:00:00
9996,2024-02-21 12:00:00
9997,2024-02-21 13:00:00
9998,2024-02-21 14:00:00


In [12]:
df[(df.date > '2023-1-2 01:00:00') & (df.date < '2023-1-2 07:00:00')]

Unnamed: 0,date
26,2023-01-02 02:00:00
27,2023-01-02 03:00:00
28,2023-01-02 04:00:00
29,2023-01-02 05:00:00
30,2023-01-02 06:00:00


In [13]:
df_indexed = df.set_index(df.date).copy()
df_indexed

Unnamed: 0_level_0,date
date,Unnamed: 1_level_1
2023-01-01 00:00:00,2023-01-01 00:00:00
2023-01-01 01:00:00,2023-01-01 01:00:00
2023-01-01 02:00:00,2023-01-01 02:00:00
2023-01-01 03:00:00,2023-01-01 03:00:00
2023-01-01 04:00:00,2023-01-01 04:00:00
...,...
2024-02-21 11:00:00,2024-02-21 11:00:00
2024-02-21 12:00:00,2024-02-21 12:00:00
2024-02-21 13:00:00,2024-02-21 13:00:00
2024-02-21 14:00:00,2024-02-21 14:00:00


In [14]:
df_indexed.loc['2024-02-21 11:00:00':'2024-02-21 14:00:00']

Unnamed: 0_level_0,date
date,Unnamed: 1_level_1
2024-02-21 11:00:00,2024-02-21 11:00:00
2024-02-21 12:00:00,2024-02-21 12:00:00
2024-02-21 13:00:00,2024-02-21 13:00:00
2024-02-21 14:00:00,2024-02-21 14:00:00


In [15]:
df_break = pd.DataFrame()
df_break['date'] = pd.date_range('1/1/2023', periods=150, freq='W')
df_break

Unnamed: 0,date
0,2023-01-01
1,2023-01-08
2,2023-01-15
3,2023-01-22
4,2023-01-29
...,...
145,2025-10-12
146,2025-10-19
147,2025-10-26
148,2025-11-02


In [16]:
df_break['year'] = df_break['date'].dt.year
df_break['month'] = df_break['date'].dt.month
df_break['day'] = df_break['date'].dt.day
df_break['hour'] = df_break['date'].dt.hour
df_break['minutes'] = df_break['date'].dt.minute

df_break.sample(10)

Unnamed: 0,date,year,month,day,hour,minutes
26,2023-07-02,2023,7,2,0,0
144,2025-10-05,2025,10,5,0,0
125,2025-05-25,2025,5,25,0,0
69,2024-04-28,2024,4,28,0,0
3,2023-01-22,2023,1,22,0,0
126,2025-06-01,2025,6,1,0,0
59,2024-02-18,2024,2,18,0,0
6,2023-02-12,2023,2,12,0,0
31,2023-08-06,2023,8,6,0,0
72,2024-05-19,2024,5,19,0,0


In [17]:
# Deltas

df_delta = pd.DataFrame()
df_delta['arrival'] = [pd.Timestamp('01-01-2023'), pd.Timestamp('01-04-2023')]
df_delta['departure'] = [pd.Timestamp('01-02-2023'), pd.Timestamp('01-06-2023')]

df_delta

Unnamed: 0,arrival,departure
0,2023-01-01,2023-01-02
1,2023-01-04,2023-01-06


In [18]:
df_delta['departure'] - df_delta['arrival']

0   1 days
1   2 days
dtype: timedelta64[ns]

In [19]:
df_delta['difference'] = df_delta['departure'] - df_delta['arrival']

df_delta

Unnamed: 0,arrival,departure,difference
0,2023-01-01,2023-01-02,1 days
1,2023-01-04,2023-01-06,2 days


In [20]:
# Lagging

df_lagged = pd.DataFrame()

df_lagged['dates'] = pd.date_range('1/1/2023', periods=5, freq='D')

df_lagged['stock_price'] = [1.0, 2.0, 3.0, 4.5, 6.0]

df_lagged

Unnamed: 0,dates,stock_price
0,2023-01-01,1.0
1,2023-01-02,2.0
2,2023-01-03,3.0
3,2023-01-04,4.5
4,2023-01-05,6.0


In [21]:
df_lagged['previous_day_stock_price'] = df_lagged['stock_price'].shift(-1)

df_lagged

Unnamed: 0,dates,stock_price,previous_day_stock_price
0,2023-01-01,1.0,2.0
1,2023-01-02,2.0,3.0
2,2023-01-03,3.0,4.5
3,2023-01-04,4.5,6.0
4,2023-01-05,6.0,


In [22]:
df_lagged_indexed = df_lagged.set_index(df_lagged.dates)

df_lagged_indexed

Unnamed: 0_level_0,dates,stock_price,previous_day_stock_price
dates,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,2023-01-01,1.0,2.0
2023-01-02,2023-01-02,2.0,3.0
2023-01-03,2023-01-03,3.0,4.5
2023-01-04,2023-01-04,4.5,6.0
2023-01-05,2023-01-05,6.0,


In [None]:
df_lagged_indexed.rolling(window=3).mean()