Import bibliotek

In [29]:
from datetime import datetime, timezone, timedelta
import pandas as pd
import numpy as np

from feature_engine.datetime import DatetimeFeatures

Tworzenie przykładowego df

In [30]:
# tworzymy zakres dat z czasem (co 1 godzina)
daty = pd.date_range(start="2025-10-14 08:00:00", periods=10, freq="h")

# tworzymy DataFrame
df = pd.DataFrame({
    "datetime": daty,
    "value": np.random.randint(10, 100, size=10),
    "category": np.random.choice(["A", "B", "C"], size=10)
})

print(df)

             datetime  value category
0 2025-10-14 08:00:00     56        A
1 2025-10-14 09:00:00     82        B
2 2025-10-14 10:00:00     20        A
3 2025-10-14 11:00:00     90        A
4 2025-10-14 12:00:00     65        B
5 2025-10-14 13:00:00     82        B
6 2025-10-14 14:00:00     87        A
7 2025-10-14 15:00:00     26        A
8 2025-10-14 16:00:00     13        B
9 2025-10-14 17:00:00     44        A


### using `pandas`

In [31]:
df["date"] = df["datetime"].dt.date

In [32]:
df["time"] = df["datetime"].dt.time

In [33]:
for t in ["year", "month", "day"]:
    df[t] = getattr(df["datetime"].dt, t)
df

Unnamed: 0,datetime,value,category,date,time,year,month,day
0,2025-10-14 08:00:00,56,A,2025-10-14,08:00:00,2025,10,14
1,2025-10-14 09:00:00,82,B,2025-10-14,09:00:00,2025,10,14
2,2025-10-14 10:00:00,20,A,2025-10-14,10:00:00,2025,10,14
3,2025-10-14 11:00:00,90,A,2025-10-14,11:00:00,2025,10,14
4,2025-10-14 12:00:00,65,B,2025-10-14,12:00:00,2025,10,14
5,2025-10-14 13:00:00,82,B,2025-10-14,13:00:00,2025,10,14
6,2025-10-14 14:00:00,87,A,2025-10-14,14:00:00,2025,10,14
7,2025-10-14 15:00:00,26,A,2025-10-14,15:00:00,2025,10,14
8,2025-10-14 16:00:00,13,B,2025-10-14,16:00:00,2025,10,14
9,2025-10-14 17:00:00,44,A,2025-10-14,17:00:00,2025,10,14


In [34]:
df["weekday"] = df["datetime"].dt.weekday
df["day_of_week"] = df["datetime"].dt.day_name()
df

Unnamed: 0,datetime,value,category,date,time,year,month,day,weekday,day_of_week
0,2025-10-14 08:00:00,56,A,2025-10-14,08:00:00,2025,10,14,1,Tuesday
1,2025-10-14 09:00:00,82,B,2025-10-14,09:00:00,2025,10,14,1,Tuesday
2,2025-10-14 10:00:00,20,A,2025-10-14,10:00:00,2025,10,14,1,Tuesday
3,2025-10-14 11:00:00,90,A,2025-10-14,11:00:00,2025,10,14,1,Tuesday
4,2025-10-14 12:00:00,65,B,2025-10-14,12:00:00,2025,10,14,1,Tuesday
5,2025-10-14 13:00:00,82,B,2025-10-14,13:00:00,2025,10,14,1,Tuesday
6,2025-10-14 14:00:00,87,A,2025-10-14,14:00:00,2025,10,14,1,Tuesday
7,2025-10-14 15:00:00,26,A,2025-10-14,15:00:00,2025,10,14,1,Tuesday
8,2025-10-14 16:00:00,13,B,2025-10-14,16:00:00,2025,10,14,1,Tuesday
9,2025-10-14 17:00:00,44,A,2025-10-14,17:00:00,2025,10,14,1,Tuesday


In [35]:
data = pd.DataFrame(
    [(x.hour, x.minute, x.second) for x in df["datetime"]], columns=["h", "m", "s"]
)
data

Unnamed: 0,h,m,s
0,8,0,0
1,9,0,0
2,10,0,0
3,11,0,0
4,12,0,0
5,13,0,0
6,14,0,0
7,15,0,0
8,16,0,0
9,17,0,0


In [36]:
[(x.hour, x.minute, x.second) for x in df["datetime"]]

[(8, 0, 0),
 (9, 0, 0),
 (10, 0, 0),
 (11, 0, 0),
 (12, 0, 0),
 (13, 0, 0),
 (14, 0, 0),
 (15, 0, 0),
 (16, 0, 0),
 (17, 0, 0)]

### using `feature-engine`

In [66]:
df = pd.DataFrame({
    "datetime": daty,
    "value": np.random.randint(10, 100, size=10),
    "category": np.random.choice(["A", "B", "C"], size=10)
})
df

Unnamed: 0,datetime,value,category
0,2025-10-14 08:00:00,69,A
1,2025-10-14 09:00:00,26,C
2,2025-10-14 10:00:00,44,C
3,2025-10-14 11:00:00,62,A
4,2025-10-14 12:00:00,76,C
5,2025-10-14 13:00:00,43,A
6,2025-10-14 14:00:00,35,C
7,2025-10-14 15:00:00,76,A
8,2025-10-14 16:00:00,74,B
9,2025-10-14 17:00:00,14,B


In [67]:
dtt = DatetimeFeatures(variables=None, features_to_extract="all", drop_original=False)
df_trans = dtt.fit_transform(df)
df_trans.head()

Unnamed: 0,datetime,value,category,datetime_month,datetime_quarter,datetime_semester,datetime_year,datetime_week,datetime_day_of_week,datetime_day_of_month,...,datetime_month_end,datetime_quarter_start,datetime_quarter_end,datetime_year_start,datetime_year_end,datetime_leap_year,datetime_days_in_month,datetime_hour,datetime_minute,datetime_second
0,2025-10-14 08:00:00,69,A,10,4,2,2025,42,1,14,...,0,0,0,0,0,0,31,8,0,0
1,2025-10-14 09:00:00,26,C,10,4,2,2025,42,1,14,...,0,0,0,0,0,0,31,9,0,0
2,2025-10-14 10:00:00,44,C,10,4,2,2025,42,1,14,...,0,0,0,0,0,0,31,10,0,0
3,2025-10-14 11:00:00,62,A,10,4,2,2025,42,1,14,...,0,0,0,0,0,0,31,11,0,0
4,2025-10-14 12:00:00,76,C,10,4,2,2025,42,1,14,...,0,0,0,0,0,0,31,12,0,0


### teraz się pobawię timezones, jak było w data camp

In [68]:
df["datetime_aware"] = df["datetime"].dt.tz_localize("Europe/Warsaw")
df["datetime_UTC"] = df["datetime_aware"].dt.tz_convert("UTC")
df

Unnamed: 0,datetime,value,category,datetime_aware,datetime_UTC
0,2025-10-14 08:00:00,69,A,2025-10-14 08:00:00+02:00,2025-10-14 06:00:00+00:00
1,2025-10-14 09:00:00,26,C,2025-10-14 09:00:00+02:00,2025-10-14 07:00:00+00:00
2,2025-10-14 10:00:00,44,C,2025-10-14 10:00:00+02:00,2025-10-14 08:00:00+00:00
3,2025-10-14 11:00:00,62,A,2025-10-14 11:00:00+02:00,2025-10-14 09:00:00+00:00
4,2025-10-14 12:00:00,76,C,2025-10-14 12:00:00+02:00,2025-10-14 10:00:00+00:00
5,2025-10-14 13:00:00,43,A,2025-10-14 13:00:00+02:00,2025-10-14 11:00:00+00:00
6,2025-10-14 14:00:00,35,C,2025-10-14 14:00:00+02:00,2025-10-14 12:00:00+00:00
7,2025-10-14 15:00:00,76,A,2025-10-14 15:00:00+02:00,2025-10-14 13:00:00+00:00
8,2025-10-14 16:00:00,74,B,2025-10-14 16:00:00+02:00,2025-10-14 14:00:00+00:00
9,2025-10-14 17:00:00,14,B,2025-10-14 17:00:00+02:00,2025-10-14 15:00:00+00:00
