üü¶ 1. Import Libraries

In [1]:
import pandas as pd

üü¶ 2. Sample Dataset With Different Date Formats

In [15]:
data = {
    "event_id": [1, 2, 3, 4, 5, 6],
    "raw_date": [
        "2024-01-15",
        "15/02/2024",
        "03-20-2024",
        "2024.04.01",
        "invalid_date",
        None
    ]
}

df = pd.DataFrame(data)
df

Unnamed: 0,event_id,raw_date
0,1,2024-01-15
1,2,15/02/2024
2,3,03-20-2024
3,4,2024.04.01
4,5,invalid_date
5,6,


üü¶ 3. Convert Strings to Datetime

In [16]:
df["converted_date"] = pd.to_datetime(df["raw_date"], errors="coerce")
df

Unnamed: 0,event_id,raw_date,converted_date
0,1,2024-01-15,2024-01-15
1,2,15/02/2024,NaT
2,3,03-20-2024,NaT
3,4,2024.04.01,NaT
4,5,invalid_date,NaT
5,6,,NaT


üü¶ 4. Checking Which Rows Failed Conversion

In [17]:
df[df["converted_date"].isna()]

Unnamed: 0,event_id,raw_date,converted_date
1,2,15/02/2024,NaT
2,3,03-20-2024,NaT
3,4,2024.04.01,NaT
4,5,invalid_date,NaT
5,6,,NaT


üü¶ 5. Parsing Different Known Formats Manually

In [18]:
df["date_format_custom"] = pd.to_datetime(
    df["raw_date"], 
    format="%d/%m/%Y",
    errors="coerce"
)

df

Unnamed: 0,event_id,raw_date,converted_date,date_format_custom
0,1,2024-01-15,2024-01-15,NaT
1,2,15/02/2024,NaT,2024-02-15
2,3,03-20-2024,NaT,NaT
3,4,2024.04.01,NaT,NaT
4,5,invalid_date,NaT,NaT
5,6,,NaT,NaT


üü¶ 6. Convert Timestamps (Integers) to Datetime

In [26]:
timestamp_data = {
    "trip_id": [101, 102, 103],
    "timestamp_unix": [
        1700000000,
        1700100000,
        1700200000
    ]
}

df2 = pd.DataFrame(timestamp_data)

df2["datetime"] = pd.to_datetime(df2["timestamp_unix"], unit="s")
df2

Unnamed: 0,trip_id,timestamp_unix,datetime
0,101,1700000000,2023-11-14 22:13:20
1,102,1700100000,2023-11-16 02:00:00
2,103,1700200000,2023-11-17 05:46:40


üü¶ 7. Convert Float Timestamps

In [27]:
import pandas as pd

# Float timestamps (seconds since 1970)
df2["timestamp_float"] = pd.Series([
    1704067200.0,   # 2024-01-01 00:00:00
    1704153600.5,   # 2024-01-02 00:00:00.500
    1704240000.0,   # 2024-01-03 00:00:00
])

df2["timestamp_float"] = pd.to_datetime(df2["timestamp_float"], unit="s")

df2


Unnamed: 0,trip_id,timestamp_unix,datetime,timestamp_float
0,101,1700000000,2023-11-14 22:13:20,2024-01-01 00:00:00.000
1,102,1700100000,2023-11-16 02:00:00,2024-01-02 00:00:00.500
2,103,1700200000,2023-11-17 05:46:40,2024-01-03 00:00:00.000


üü¶ 8. Handling Various Date Formats Together

In [22]:
df['mixed_date_format'] = pd.Series([
    "01-04-2024",
    "2024/04/02",
    "April 3, 2024",
    "2024-04-04 14:30:00",
    "04/05/24",
    "September 22, 1993"
])

df['mixed_date_format'] = pd.to_datetime(
    df['mixed_date_format'], 
    format="mixed", 
    dayfirst=False,
    errors="coerce"
)
df

Unnamed: 0,event_id,raw_date,converted_date,date_format_custom,mixed_date_format
0,1,2024-01-15,2024-01-15,NaT,2024-01-04 00:00:00
1,2,15/02/2024,NaT,2024-02-15,2024-04-02 00:00:00
2,3,03-20-2024,NaT,NaT,2024-04-03 00:00:00
3,4,2024.04.01,NaT,NaT,2024-04-04 14:30:00
4,5,invalid_date,NaT,NaT,2024-04-05 00:00:00
5,6,,NaT,NaT,1993-09-22 00:00:00


üü¶ 9. Checking Datetime Type

In [23]:
df.dtypes


event_id                       int64
raw_date                      object
converted_date        datetime64[ns]
date_format_custom    datetime64[ns]
mixed_date_format     datetime64[ns]
dtype: object

üü¶ 10. Convert Column to Datetime Index

In [24]:
df = df.set_index("mixed_date_format")
df


Unnamed: 0_level_0,event_id,raw_date,converted_date,date_format_custom
mixed_date_format,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-01-04 00:00:00,1,2024-01-15,2024-01-15,NaT
2024-04-02 00:00:00,2,15/02/2024,NaT,2024-02-15
2024-04-03 00:00:00,3,03-20-2024,NaT,NaT
2024-04-04 14:30:00,4,2024.04.01,NaT,NaT
2024-04-05 00:00:00,5,invalid_date,NaT,NaT
1993-09-22 00:00:00,6,,NaT,NaT


#üü¶ Summary Cell (Markdown)

üéØ What You Learned in This Section

‚úîÔ∏è Convert string dates to datetime using pd.to_datetime()

‚úîÔ∏è Handle invalid values using errors='coerce'

‚úîÔ∏è Convert unix timestamps (integers and floats) into datetime

‚úîÔ∏è Parse multiple date formats

‚úîÔ∏è Set datetime columns as index