ðŸŸ¦ 1. Import & Sample Trip Data

In [9]:
import pandas as pd

data = {
    "trip_id": [101, 102, 103, 104, 105],
    "depart_time": [
        "2024-07-01 07:30:00",
        "2024-07-01 08:15:00",
        "2024-07-01 09:00:00",
        "2024-07-01 16:45:00",
        "2024-07-01 18:10:00"
    ],
    "arrival_time": [
        "2024-07-01 08:05:00",
        "2024-07-01 08:50:00",
        "2024-07-01 09:50:00",
        "2024-07-01 17:40:00",
        "2024-07-01 18:55:00"
    ],
    "scheduled_duration_min": [30, 35, 45, 50, 40]
}

df = pd.DataFrame(data)

df["depart_time"] = pd.to_datetime(df["depart_time"])
df["arrival_time"] = pd.to_datetime(df["arrival_time"])

df

Unnamed: 0,trip_id,depart_time,arrival_time,scheduled_duration_min
0,101,2024-07-01 07:30:00,2024-07-01 08:05:00,30
1,102,2024-07-01 08:15:00,2024-07-01 08:50:00,35
2,103,2024-07-01 09:00:00,2024-07-01 09:50:00,45
3,104,2024-07-01 16:45:00,2024-07-01 17:40:00,50
4,105,2024-07-01 18:10:00,2024-07-01 18:55:00,40


ðŸŸ¦ 2. Calculate Time Difference

In [10]:
df["trip_duration"] = df["arrival_time"] - df["depart_time"]

df

Unnamed: 0,trip_id,depart_time,arrival_time,scheduled_duration_min,trip_duration
0,101,2024-07-01 07:30:00,2024-07-01 08:05:00,30,0 days 00:35:00
1,102,2024-07-01 08:15:00,2024-07-01 08:50:00,35,0 days 00:35:00
2,103,2024-07-01 09:00:00,2024-07-01 09:50:00,45,0 days 00:50:00
3,104,2024-07-01 16:45:00,2024-07-01 17:40:00,50,0 days 00:55:00
4,105,2024-07-01 18:10:00,2024-07-01 18:55:00,40,0 days 00:45:00


ðŸŸ¦ 3. Convert Duration to Minutes & Seconds

In [11]:
df["duration_minutes"] = df["trip_duration"].dt.total_seconds() / 60
df["duration_seconds"] = df["trip_duration"].dt.total_seconds()

df

Unnamed: 0,trip_id,depart_time,arrival_time,scheduled_duration_min,trip_duration,duration_minutes,duration_seconds
0,101,2024-07-01 07:30:00,2024-07-01 08:05:00,30,0 days 00:35:00,35.0,2100.0
1,102,2024-07-01 08:15:00,2024-07-01 08:50:00,35,0 days 00:35:00,35.0,2100.0
2,103,2024-07-01 09:00:00,2024-07-01 09:50:00,45,0 days 00:50:00,50.0,3000.0
3,104,2024-07-01 16:45:00,2024-07-01 17:40:00,50,0 days 00:55:00,55.0,3300.0
4,105,2024-07-01 18:10:00,2024-07-01 18:55:00,40,0 days 00:45:00,45.0,2700.0


ðŸŸ¦ 5. Compare Scheduled vs Actual Trip Durations

In [12]:
# Convert scheduled minutes to timedeltas
df["scheduled_timedelta"] = pd.to_timedelta(df["scheduled_duration_min"], unit="m")

# Delay calculation
df["delay"] = df["trip_duration"] - df["scheduled_timedelta"]

df[["trip_id", "trip_duration", "scheduled_timedelta", "delay"]]

Unnamed: 0,trip_id,trip_duration,scheduled_timedelta,delay
0,101,0 days 00:35:00,0 days 00:30:00,0 days 00:05:00
1,102,0 days 00:35:00,0 days 00:35:00,0 days 00:00:00
2,103,0 days 00:50:00,0 days 00:45:00,0 days 00:05:00
3,104,0 days 00:55:00,0 days 00:50:00,0 days 00:05:00
4,105,0 days 00:45:00,0 days 00:40:00,0 days 00:05:00


ðŸŸ¦ 5. Late vs On-time Classification

In [13]:
df["status"] = df["delay"].apply(lambda x: "Late" if x > pd.Timedelta(0) else "On Time")

df[["trip_id", "trip_duration", "delay", "status"]]

Unnamed: 0,trip_id,trip_duration,delay,status
0,101,0 days 00:35:00,0 days 00:05:00,Late
1,102,0 days 00:35:00,0 days 00:00:00,On Time
2,103,0 days 00:50:00,0 days 00:05:00,Late
3,104,0 days 00:55:00,0 days 00:05:00,Late
4,105,0 days 00:45:00,0 days 00:05:00,Late


ðŸŸ¦ 6. Create Timedelta from Raw Values

In [14]:
# Example: convert seconds to timedelta
df["extra_stop_delay"] = pd.to_timedelta([120, 0, 300, 60, 180], unit="s")

df

Unnamed: 0,trip_id,depart_time,arrival_time,scheduled_duration_min,trip_duration,duration_minutes,duration_seconds,scheduled_timedelta,delay,status,extra_stop_delay
0,101,2024-07-01 07:30:00,2024-07-01 08:05:00,30,0 days 00:35:00,35.0,2100.0,0 days 00:30:00,0 days 00:05:00,Late,0 days 00:02:00
1,102,2024-07-01 08:15:00,2024-07-01 08:50:00,35,0 days 00:35:00,35.0,2100.0,0 days 00:35:00,0 days 00:00:00,On Time,0 days 00:00:00
2,103,2024-07-01 09:00:00,2024-07-01 09:50:00,45,0 days 00:50:00,50.0,3000.0,0 days 00:45:00,0 days 00:05:00,Late,0 days 00:05:00
3,104,2024-07-01 16:45:00,2024-07-01 17:40:00,50,0 days 00:55:00,55.0,3300.0,0 days 00:50:00,0 days 00:05:00,Late,0 days 00:01:00
4,105,2024-07-01 18:10:00,2024-07-01 18:55:00,40,0 days 00:45:00,45.0,2700.0,0 days 00:40:00,0 days 00:05:00,Late,0 days 00:03:00


ðŸŸ¦ 7. Cumulative Travel Time

In [15]:
df["cumulative_travel_time"] = df["trip_duration"].cumsum()

df[["trip_id", "trip_duration", "cumulative_travel_time"]]

Unnamed: 0,trip_id,trip_duration,cumulative_travel_time
0,101,0 days 00:35:00,0 days 00:35:00
1,102,0 days 00:35:00,0 days 01:10:00
2,103,0 days 00:50:00,0 days 02:00:00
3,104,0 days 00:55:00,0 days 02:55:00
4,105,0 days 00:45:00,0 days 03:40:00


ðŸŸ¦ 8. Extract Time Difference Components

In [16]:
df["duration_hours"] = df["trip_duration"].dt.components["hours"]
df["duration_minutes_part"] = df["trip_duration"].dt.components["minutes"]

df[["trip_duration", "duration_hours", "duration_minutes_part"]]

Unnamed: 0,trip_duration,duration_hours,duration_minutes_part
0,0 days 00:35:00,0,35
1,0 days 00:35:00,0,35
2,0 days 00:50:00,0,50
3,0 days 00:55:00,0,55
4,0 days 00:45:00,0,45


##ðŸŸ¦ Summary

ðŸ•’ What You Learned in This Section

âœ… Calculate time differences between timestamps

âœ… Use pd.to_timedelta() for duration conversions

âœ… Convert differences into seconds, minutes, and hours

âœ… Compare scheduled vs actual travel times

âœ… Detect late arrivals using time deltas

âœ… Work with cumulative time intervals