In [1]:
import pandas as pd
import numpy as np

pd.set_option('display.float_format', '{:.2f}'.format)

## Creating Timestamps in Pandas

In [2]:
timestamp_str = '2023-10-19 14:30:00'
timestamp = pd.Timestamp(timestamp_str)
timestamp

Timestamp('2023-10-19 14:30:00')

**Timestamp Recognition**

Pandas is quite flexible when it comes to recognizing various timestamp formats. It
can handle different formats, including:

YYYY-MM-DD HH:MM:SS

YYYY/MM/DD HH:MM:SS

YYYY.MM.DD HH:MM:SS

YYYY-MM-DD

YYYY/MM/DD

YYYY.MM.DD

In [3]:
timestamp_str = '2023/10/19 14:30:00'
timestamp = pd.Timestamp(timestamp_str)
timestamp

Timestamp('2023-10-19 14:30:00')

In [4]:
timestamp_str = '2023-10-19 14:30:00'
timestamp = pd.Timestamp(timestamp_str)
timestamp

Timestamp('2023-10-19 14:30:00')

In [5]:
timestamp_str = '10-19-2023 14:30:00'
timestamp = pd.Timestamp(timestamp_str)
timestamp

Timestamp('2023-10-19 14:30:00')

In [6]:
timestamp = pd.Timestamp(year=2023, month=10, day=19, hour=14, minute=30, second=0)
timestamp

Timestamp('2023-10-19 14:30:00')

## Timestamp Attributes and Properties

In [7]:
timestamp.year

2023

In [8]:
timestamp.month

10

In [9]:
timestamp.day

19

In [10]:
timestamp.hour

14

In [11]:
timestamp.minute

30

In [12]:
timestamp.second

0

In [13]:
timestamp.microsecond

0

In [14]:
timestamp.dayofweek

3

In [15]:
timestamp.dayofyear

292

In [16]:
timestamp.is_leap_year

False

## Working with Datetimes in Python

In [17]:
from datetime import datetime

In [18]:
dt = datetime(2023, 10, 19, 14, 30, 0)
dt

datetime.datetime(2023, 10, 19, 14, 30)

In [19]:
dt.year

2023

In [20]:
dt.month

10

In [21]:
dt.day

19

In [22]:
dt.hour

14

In [23]:
dt.minute

30

In [24]:
dt.second

0

In [25]:
dt.microsecond

0

In [26]:
dt.dayofweek

AttributeError: 'datetime.datetime' object has no attribute 'dayofweek'

In [27]:
dt.dayofyear

AttributeError: 'datetime.datetime' object has no attribute 'dayofyear'

In [28]:
dt.is_leap_year

AttributeError: 'datetime.datetime' object has no attribute 'is_leap_year'

In [29]:
formatted_datetime = dt.strftime('%Y-%m-%d %H:%M:%S')
formatted_datetime

'2023-10-19 14:30:00'

## Data Structures in Pandas for Time Series

### Series with Datetime Index

In [30]:
import random

In [31]:
start_date = datetime(2023, 10, 1)
print(start_date)

2023-10-01 00:00:00


In [32]:
end_date = datetime(2023, 10, 15)

In [33]:
date_index = pd.date_range(start=start_date, end=end_date, freq='D')

In [36]:
temperature_data = [random.uniform(60, 80) for _ in date_index]

temperature_series = pd.Series(temperature_data, index=date_index)
temperature_series

2023-10-01   69.98
2023-10-02   62.90
2023-10-03   67.06
2023-10-04   74.07
2023-10-05   77.17
2023-10-06   66.00
2023-10-07   71.15
2023-10-08   61.36
2023-10-09   62.52
2023-10-10   76.75
2023-10-11   78.04
2023-10-12   65.86
2023-10-13   65.53
2023-10-14   62.22
2023-10-15   71.67
Freq: D, dtype: float64

### DataFrame with Datetime Index

In [37]:
date_index = pd.date_range(start='2023-01-01', end='2023-01-05', freq='D')

data = {
    'Temperature (°C)': [20.1, 21.3, 19.8, 22.5, 20.9],
    'Humidity (%)': [50, 48, 52, 47, 49]
}

df = pd.DataFrame(data, index=date_index)
df

Unnamed: 0,Temperature (°C),Humidity (%)
2023-01-01,20.1,50
2023-01-02,21.3,48
2023-01-03,19.8,52
2023-01-04,22.5,47
2023-01-05,20.9,49


## Common Operations

### Indexing and Slicing

In [38]:
specific_date_data = df.loc['2023-01-02']
specific_date_data

Temperature (°C)   21.30
Humidity (%)       48.00
Name: 2023-01-02 00:00:00, dtype: float64

In [39]:
date_range_data = df['2023-01-02':'2023-01-04']
date_range_data

Unnamed: 0,Temperature (°C),Humidity (%)
2023-01-02,21.3,48
2023-01-03,19.8,52
2023-01-04,22.5,47


In [40]:
date_index = pd.to_datetime(['2022-12-15', '2023-01-02', '2023-02-10', '2023-03-25', '2024-01-05'])

In [41]:
data = {
    'Temperature (°C)': [18.5, 20.1, 19.8, 21.5, 22.0],
    'Humidity (%)': [55, 50, 48, 53, 49]
}

In [42]:
df = pd.DataFrame(data, index=date_index)
df

Unnamed: 0,Temperature (°C),Humidity (%)
2022-12-15,18.5,55
2023-01-02,20.1,50
2023-02-10,19.8,48
2023-03-25,21.5,53
2024-01-05,22.0,49


In [43]:
january_data = df.loc['2023-01']
january_data

Unnamed: 0,Temperature (°C),Humidity (%)
2023-01-02,20.1,50


In [44]:
df.loc['2023']

Unnamed: 0,Temperature (°C),Humidity (%)
2023-01-02,20.1,50
2023-02-10,19.8,48
2023-03-25,21.5,53


In [45]:
january_2nd_data = df[df.index.day == 2]
january_2nd_data

Unnamed: 0,Temperature (°C),Humidity (%)
2023-01-02,20.1,50


## Resampling

### Downsampling: Reducing Frequency

In [46]:
date_index = pd.date_range(start='2023-01-01', end='2023-01-15', freq='D')

In [47]:
temperature_data = [20.1 + i * 0.2 for i in range(len(date_index))]

In [48]:
df = pd.DataFrame({'Temperature (°C)': temperature_data}, index=date_index)
df

Unnamed: 0,Temperature (°C)
2023-01-01,20.1
2023-01-02,20.3
2023-01-03,20.5
2023-01-04,20.7
2023-01-05,20.9
2023-01-06,21.1
2023-01-07,21.3
2023-01-08,21.5
2023-01-09,21.7
2023-01-10,21.9


In [50]:
# Downsample to weekly data with mean aggregation
weekly_mean = df['Temperature (°C)'].resample('W').mean()
weekly_mean

2023-01-01   20.10
2023-01-08   20.90
2023-01-15   22.30
Freq: W-SUN, Name: Temperature (°C), dtype: float64

### Upsampling: Increasing Frequency

In [51]:
data = {
    'Date': ['2023-01-01', '2023-02-01', '2023-03-01'],
    'Temperature (°C)': [20.1, 21.5, 22.0]
}

df = pd.DataFrame(data)

In [52]:
df

Unnamed: 0,Date,Temperature (°C)
0,2023-01-01,20.1
1,2023-02-01,21.5
2,2023-03-01,22.0


In [53]:
df['Date'] = pd.to_datetime(df['Date'])

df.set_index('Date', inplace=True)
df

Unnamed: 0_level_0,Temperature (°C)
Date,Unnamed: 1_level_1
2023-01-01,20.1
2023-02-01,21.5
2023-03-01,22.0


In [54]:
df_upsampled = df.resample('D').asfreq().interpolate(method='linear')
df_upsampled

Unnamed: 0_level_0,Temperature (°C)
Date,Unnamed: 1_level_1
2023-01-01,20.1
2023-01-02,20.15
2023-01-03,20.19
2023-01-04,20.24
2023-01-05,20.28
2023-01-06,20.33
2023-01-07,20.37
2023-01-08,20.42
2023-01-09,20.46
2023-01-10,20.51


In [55]:
df.resample('D').asfreq()

Unnamed: 0_level_0,Temperature (°C)
Date,Unnamed: 1_level_1
2023-01-01,20.1
2023-01-02,
2023-01-03,
2023-01-04,
2023-01-05,
2023-01-06,
2023-01-07,
2023-01-08,
2023-01-09,
2023-01-10,


## Shifting and Lagging

**Lag**: A lag refers to the number of time periods by which data is shifted backward in
time. A positive lag value means shifting data to the past, while a negative lag value
means shifting data into the future.

**Lead**: A lead is essentially a negative lag, meaning data is shifted forward in time.

### Creating Lagged Features

In [56]:
data = {
    'Date': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05'],
    'Temperature (°C)': [20.1, 21.5, 22.0, 23.2, 22.8]
}

In [57]:
df = pd.DataFrame(data)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df

Unnamed: 0_level_0,Temperature (°C)
Date,Unnamed: 1_level_1
2023-01-01,20.1
2023-01-02,21.5
2023-01-03,22.0
2023-01-04,23.2
2023-01-05,22.8


In [58]:
# Creating a lagged feature for temperature with a lag of 1 time period
df['Temperature (°C)_lag1'] = df['Temperature (°C)'].shift(1)

In [59]:
df

Unnamed: 0_level_0,Temperature (°C),Temperature (°C)_lag1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-01-01,20.1,
2023-01-02,21.5,20.1
2023-01-03,22.0,21.5
2023-01-04,23.2,22.0
2023-01-05,22.8,23.2


### Creating Lead Features

In [60]:
# Creating a lead feature for temperature with a lead of 1 time period
df['Temperature (°C)_lead1'] = df['Temperature (°C)'].shift(-1)
df

Unnamed: 0_level_0,Temperature (°C),Temperature (°C)_lag1,Temperature (°C)_lead1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,20.1,,21.5
2023-01-02,21.5,20.1,22.0
2023-01-03,22.0,21.5,23.2
2023-01-04,23.2,22.0,22.8
2023-01-05,22.8,23.2,


## Rolling Windows

Rolling windows are valuable for:

• **Smoothing Data**: Rolling windows can help smooth out noisy or volatile time
series data by calculating moving averages or other aggregates within the
window.

• **Identifying Trends and Patterns**: They are used to identify trends, patterns,
or anomalies in time series data by analyzing statistics calculated within the
rolling window.

• **Feature Engineering**: Rolling windows can be used to create lagged features
or other time-dependent features for predictive modeling.

### Rolling Mean (Moving Average)

In [61]:
rolling_mean_3d = df['Temperature (°C)'].rolling(window=3).mean()
rolling_mean_3d

Date
2023-01-01     NaN
2023-01-02     NaN
2023-01-03   21.20
2023-01-04   22.23
2023-01-05   22.67
Name: Temperature (°C), dtype: float64

### Rolling Standard Deviation

In [62]:
rolling_std_3d = df['Temperature (°C)'].rolling(window=3).std()
rolling_std_3d

Date
2023-01-01    NaN
2023-01-02    NaN
2023-01-03   0.98
2023-01-04   0.87
2023-01-05   0.61
Name: Temperature (°C), dtype: float64

### Rolling Sum

In [63]:
data = {
    'Date': ['2023-01-01', '2023-02-01', '2023-03-01', '2023-04-01', '2023-05-01'],
    'Monthly Sales': [1000, 1200, 900, 1100, 950]
}

In [64]:
df = pd.DataFrame(data)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df

Unnamed: 0_level_0,Monthly Sales
Date,Unnamed: 1_level_1
2023-01-01,1000
2023-02-01,1200
2023-03-01,900
2023-04-01,1100
2023-05-01,950


In [65]:
rolling_sum_3m = df['Monthly Sales'].rolling(window=3).sum()
rolling_sum_3m

Date
2023-01-01       NaN
2023-02-01       NaN
2023-03-01   3100.00
2023-04-01   3200.00
2023-05-01   2950.00
Name: Monthly Sales, dtype: float64

### Exponential Moving Average (EMA)

In [66]:
data = {
    'Date': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05'],
    'Closing Price': [100.2, 101.0, 101.5, 100.8, 101.2]
}

In [67]:
df = pd.DataFrame(data)
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df

Unnamed: 0_level_0,Closing Price
Date,Unnamed: 1_level_1
2023-01-01,100.2
2023-01-02,101.0
2023-01-03,101.5
2023-01-04,100.8
2023-01-05,101.2


In [68]:
# Calculate a 3-day Exponential Moving Average (EMA) of daily closing prices
ema_3d = df['Closing Price'].ewm(span=3).mean()
ema_3d

Date
2023-01-01   100.20
2023-01-02   100.73
2023-01-03   101.17
2023-01-04   100.97
2023-01-05   101.09
Name: Closing Price, dtype: float64

**ewm** stands for **Exponential Weighted Methods**.

In Pandas, ewm() is the entry point for computing exponentially weighted statistics, such as:

Exponentially Weighted Moving Average (EWMA)

Exponentially Weighted Moving Variance

Exponentially Weighted Covariance

Exponentially Weighted Standard Deviation

## Assembling Datetime from Multiple DataFrame Columns

In [69]:
data = {
    'Year': [2023, 2023, 2023],
    'Month': [1, 2, 3],
    'Day': [15, 20, 25],
    'Hour': [10, 15, 18],
    'Minute': [30, 45, 0],
    'Second': [0, 0, 0]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Year,Month,Day,Hour,Minute,Second
0,2023,1,15,10,30,0
1,2023,2,20,15,45,0
2,2023,3,25,18,0,0


In [70]:
df['Datetime'] = pd.to_datetime(df[['Year', 'Month', 'Day', 'Hour', 'Minute', 'Second']], format='%Y-%m-%d %H:%M:%S')
df

Unnamed: 0,Year,Month,Day,Hour,Minute,Second,Datetime
0,2023,1,15,10,30,0,2023-01-15 10:30:00
1,2023,2,20,15,45,0,2023-02-20 15:45:00
2,2023,3,25,18,0,0,2023-03-25 18:00:00
