In [6]:
import pandas as pd

# Example data
data = {
    'date_str': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04'],
    'value': [10, 20, 30, 40]
}

df = pd.DataFrame(data)

# Convert 'date_str' column to datetime
df['date'] = pd.to_datetime(df['date_str'])
df

Unnamed: 0,date_str,value,date
0,2023-01-01,10,2023-01-01
1,2023-01-02,20,2023-01-02
2,2023-01-03,30,2023-01-03
3,2023-01-04,40,2023-01-04


### 1.1 Extracting components from datetime

In [8]:
# Convert 'date_str' column to datetime
df['date'] = pd.to_datetime(df['date_str'])

# Extract components
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['hour'] = df['date'].dt.hour
df['minute'] = df['date'].dt.minute
df['second'] = df['date'].dt.second
df

Unnamed: 0,date_str,value,date,year,month,day,hour,minute,second
0,2023-01-01,10,2023-01-01,2023,1,1,0,0,0
1,2023-01-02,20,2023-01-02,2023,1,2,0,0,0
2,2023-01-03,30,2023-01-03,2023,1,3,0,0,0
3,2023-01-04,40,2023-01-04,2023,1,4,0,0,0


## 1.2 Filtering data based on datetime

In [9]:
# Convert 'date_str' column to datetime
df['date'] = pd.to_datetime(df['date_str'])

# Filter data to include only dates after 2023-01-02
filtered_df = df[df['date'] > '2023-01-02']
filtered_df

Unnamed: 0,date_str,value,date,year,month,day,hour,minute,second
2,2023-01-03,30,2023-01-03,2023,1,3,0,0,0
3,2023-01-04,40,2023-01-04,2023,1,4,0,0,0


## 1.3 Setting a datetime column as the index

In [10]:
# Convert 'date_str' column to datetime
df['date'] = pd.to_datetime(df['date_str'])

# Set 'date' as the index
df.set_index('date', inplace=True)
df

Unnamed: 0_level_0,date_str,value,year,month,day,hour,minute,second
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-01-01,2023-01-01,10,2023,1,1,0,0,0
2023-01-02,2023-01-02,20,2023,1,2,0,0,0
2023-01-03,2023-01-03,30,2023,1,3,0,0,0
2023-01-04,2023-01-04,40,2023,1,4,0,0,0


## 1.4 Resampling time series data

In [13]:
date_rng = pd.date_range(start='2022-01-01', end='2024-01-10', freq='D')
df_time = pd.DataFrame(date_rng, columns=['date'])
df_time['data'] = np.random.randint(0, 10, size=(len(date_rng)))
df_time = df_time.set_index('date')
df_time

Unnamed: 0_level_0,data
date,Unnamed: 1_level_1
2022-01-01,9
2022-01-02,7
2022-01-03,9
2022-01-04,9
2022-01-05,4
...,...
2024-01-06,9
2024-01-07,4
2024-01-08,8
2024-01-09,0


In [14]:
df_resample = df_time.resample('D').sum()  # 3-day moving average
df_resample

Unnamed: 0_level_0,data
date,Unnamed: 1_level_1
2022-01-01,9
2022-01-02,7
2022-01-03,9
2022-01-04,9
2022-01-05,4
...,...
2024-01-06,9
2024-01-07,4
2024-01-08,8
2024-01-09,0


### rolling

In [16]:
df_rolling = df_time.rolling(window=3).mean()  # 3-day moving average
df_rolling

Unnamed: 0_level_0,data
date,Unnamed: 1_level_1
2022-01-01,
2022-01-02,
2022-01-03,8.333333
2022-01-04,8.333333
2022-01-05,7.333333
...,...
2024-01-06,4.000000
2024-01-07,4.666667
2024-01-08,7.000000
2024-01-09,4.000000
