Feature Engineering 

In [1]:
import pandas as pd
import numpy as np

# Load cleaned dataset
df = pd.read_csv("ev_charging_sessions.csv", parse_dates=["start_time", "end_time"])

print("Cleaned dataset loaded for feature engineering.")
print("Shape:", df.shape)
print("Columns:", df.columns.tolist())

print("\nDate range:")
print("Start:", df["start_time"].min())
print("End:  ", df["start_time"].max())


Cleaned dataset loaded for feature engineering.
Shape: (3500, 10)
Columns: ['session_id', 'user_id', 'vehicle_id', 'station_id', 'start_time', 'end_time', 'duration_min', 'energy_kWh', 'session_day', 'session_type']

Date range:
Start: 2024-11-01 06:03:00
End:   2024-11-28 22:32:00


Making time based features

In [2]:
# Basic time-based features
df["hour"] = df["start_time"].dt.hour
df["day_of_week"] = df["start_time"].dt.day_name()
df["is_weekend"] = df["day_of_week"].isin(["Saturday", "Sunday"])
df["session_length_hours"] = df["duration_min"] / 60.0

print(df[["start_time", "hour", "day_of_week", "is_weekend", "session_length_hours"]].head())


           start_time  hour day_of_week  is_weekend  session_length_hours
0 2024-11-11 12:09:00    12      Monday       False              1.283333
1 2024-11-10 19:51:00    19      Sunday        True              1.616667
2 2024-11-26 18:46:00    18     Tuesday       False              1.950000
3 2024-11-28 19:53:00    19    Thursday       False              1.816667
4 2024-11-27 13:09:00    13   Wednesday       False              1.316667


Making cyclical ecodings for time features

In [3]:
# Map day_of_week to an index 0â€“6
day_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
day_to_idx = {day: idx for idx, day in enumerate(day_order)}
df["day_of_week_idx"] = df["day_of_week"].map(day_to_idx)

# Cyclical encoding for hour (24-hour cycle)
df["hour_sin"] = np.sin(2 * np.pi * df["hour"] / 24)
df["hour_cos"] = np.cos(2 * np.pi * df["hour"] / 24)

# Cyclical encoding for day of week (7-day cycle)
df["dow_sin"] = np.sin(2 * np.pi * df["day_of_week_idx"] / 7)
df["dow_cos"] = np.cos(2 * np.pi * df["day_of_week_idx"] / 7)

df[["hour", "hour_sin", "hour_cos", "day_of_week", "day_of_week_idx", "dow_sin", "dow_cos"]].head()


Unnamed: 0,hour,hour_sin,hour_cos,day_of_week,day_of_week_idx,dow_sin,dow_cos
0,12,1.224647e-16,-1.0,Monday,0,0.0,1.0
1,19,-0.9659258,0.258819,Sunday,6,-0.781831,0.62349
2,18,-1.0,-1.83697e-16,Tuesday,1,0.781831,0.62349
3,19,-0.9659258,0.258819,Thursday,3,0.433884,-0.900969
4,13,-0.258819,-0.9659258,Wednesday,2,0.974928,-0.222521


Making station level aggregate features

In [4]:
# Aggregate stats per station
station_agg = df.groupby("station_id").agg(
    station_total_sessions=("session_id", "count"),
    station_mean_duration_min=("duration_min", "mean"),
    station_mean_energy_kwh=("energy_kWh", "mean"),
    station_weekend_share=("is_weekend", "mean"),
).reset_index()

print("Station-level aggregate features (head):")
print(station_agg.head())

# Merge back onto main df
df = df.merge(station_agg, on="station_id", how="left")

df[[
    "station_id",
    "station_total_sessions",
    "station_mean_duration_min",
    "station_mean_energy_kwh",
    "station_weekend_share"
]].head()


Station-level aggregate features (head):
  station_id  station_total_sessions  station_mean_duration_min  \
0       S001                      29                  73.758621   
1       S002                      38                  75.842105   
2       S003                      36                  70.000000   
3       S004                      39                  74.794872   
4       S005                      39                  75.282051   

   station_mean_energy_kwh  station_weekend_share  
0                44.110345               0.241379  
1                41.329211               0.315789  
2                40.220556               0.222222  
3                40.866154               0.410256  
4                39.797949               0.384615  


Unnamed: 0,station_id,station_total_sessions,station_mean_duration_min,station_mean_energy_kwh,station_weekend_share
0,S091,37,75.675676,41.285676,0.27027
1,S025,40,80.2,45.0465,0.25
2,S007,41,74.853659,42.567805,0.268293
3,S008,27,78.407407,43.597037,0.185185
4,S037,43,75.930233,44.731163,0.302326


save the new features into a new csv

In [5]:
output_path = "ev_charging_sessions_features_v1.csv"
df.to_csv(output_path, index=False)
print(f"Feature-enhanced dataset saved to: {output_path}")
print("Final shape:", df.shape)


Feature-enhanced dataset saved to: ev_charging_sessions_features_v1.csv
Final shape: (3500, 23)


Final engineered features

In [7]:
df.columns

Index(['session_id', 'user_id', 'vehicle_id', 'station_id', 'start_time',
       'end_time', 'duration_min', 'energy_kWh', 'session_day', 'session_type',
       'hour', 'day_of_week', 'is_weekend', 'session_length_hours',
       'day_of_week_idx', 'hour_sin', 'hour_cos', 'dow_sin', 'dow_cos',
       'station_total_sessions', 'station_mean_duration_min',
       'station_mean_energy_kwh', 'station_weekend_share'],
      dtype='object')