In [1]:
import pandas as pd
import numpy as np

# Load the EV charging sessions dataset
df = pd.read_csv('ev_charging_sessions.csv')

print("EV Charging Sessions Dataset Loaded Successfully!")
print(f"\nDataset shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print("\nFirst 5 records:")
print(df.head())
print("\nData types:")
print(df.dtypes)
print("\nBasic statistics:")
print(df.describe())

EV Charging Sessions Dataset Loaded Successfully!

Dataset shape: (3500, 10)
Columns: ['session_id', 'user_id', 'vehicle_id', 'station_id', 'start_time', 'end_time', 'duration_min', 'energy_kWh', 'session_day', 'session_type']

First 5 records:
  session_id user_id vehicle_id station_id           start_time  \
0     CS0001    U339       V347       S091  2024-11-11 12:09:00   
1     CS0002    U286       V463       S025  2024-11-10 19:51:00   
2     CS0003    U092       V419       S007  2024-11-26 18:46:00   
3     CS0004    U369       V070       S008  2024-11-28 19:53:00   
4     CS0005    U185       V298       S037  2024-11-27 13:09:00   

              end_time  duration_min  energy_kWh session_day session_type  
0  2024-11-11 13:26:00            77       26.87     Weekday   Occasional  
1  2024-11-10 21:28:00            97       67.47     Weekend    Emergency  
2  2024-11-26 20:43:00           117       60.16     Weekend      Regular  
3  2024-11-28 21:42:00           109       39.19

In [3]:
# Unique values in categorical columns
print("Unique values in categorical columns:")
print(f"\nSession Types: {df['session_type'].unique()}")
print(f"Session Type Counts:\n{df['session_type'].value_counts()}")

print(f"\nSession Days: {df['session_day'].unique()}")
print(f"Session Day Counts:\n{df['session_day'].value_counts()}")

print(f"\nUnique Users: {df['user_id'].nunique()}")
print(f"Unique Vehicles: {df['vehicle_id'].nunique()}")
print(f"Unique Stations: {df['station_id'].nunique()}")


Unique values in categorical columns:

Session Types: ['Occasional' 'Emergency' 'Regular']
Session Type Counts:
session_type
Emergency     1198
Occasional    1152
Regular       1150
Name: count, dtype: int64

Session Days: ['Weekday' 'Weekend']
Session Day Counts:
session_day
Weekday    1760
Weekend    1740
Name: count, dtype: int64

Unique Users: 500
Unique Vehicles: 499
Unique Stations: 100


In [2]:
# Display the full dataframe
df

Unnamed: 0,session_id,user_id,vehicle_id,station_id,start_time,end_time,duration_min,energy_kWh,session_day,session_type
0,CS0001,U339,V347,S091,2024-11-11 12:09:00,2024-11-11 13:26:00,77,26.87,Weekday,Occasional
1,CS0002,U286,V463,S025,2024-11-10 19:51:00,2024-11-10 21:28:00,97,67.47,Weekend,Emergency
2,CS0003,U092,V419,S007,2024-11-26 18:46:00,2024-11-26 20:43:00,117,60.16,Weekend,Regular
3,CS0004,U369,V070,S008,2024-11-28 19:53:00,2024-11-28 21:42:00,109,39.19,Weekday,Emergency
4,CS0005,U185,V298,S037,2024-11-27 13:09:00,2024-11-27 14:28:00,79,61.71,Weekend,Occasional
...,...,...,...,...,...,...,...,...,...,...
3495,CS3496,U357,V387,S094,2024-11-12 19:19:00,2024-11-12 21:00:00,101,72.29,Weekday,Regular
3496,CS3497,U328,V308,S076,2024-11-20 18:50:00,2024-11-20 20:04:00,74,57.34,Weekend,Emergency
3497,CS3498,U358,V110,S011,2024-11-05 13:46:00,2024-11-05 15:33:00,107,29.87,Weekend,Regular
3498,CS3499,U396,V475,S092,2024-11-25 22:08:00,2024-11-25 23:20:00,72,30.67,Weekday,Emergency


In [4]:
# Verify no missing values (dataset is clean)
print("Checking for any missing values:")
print(df.isnull().sum())
print(f"\nDataset is clean: {df.isnull().sum().sum() == 0}")

Checking for any missing values:
session_id      0
user_id         0
vehicle_id      0
station_id      0
start_time      0
end_time        0
duration_min    0
energy_kWh      0
session_day     0
session_type    0
dtype: int64

Dataset is clean: True


In [5]:
import pandas as pd

df = pd.read_csv("ev_charging_sessions.csv", parse_dates=["start_time", "end_time"]).copy()


In [6]:
df = df.copy()  # ensure modifications are on a copy
df["start_hour"] = df["start_time"].dt.hour
df["day_of_week"] = df["start_time"].dt.dayofweek  # Monday=0

# Derived feature
df["energy_per_min"] = df["energy_kWh"] / df["duration_min"]


In [7]:
df_encoded = pd.get_dummies(df, columns=["session_type", "session_day"], drop_first=True).copy()


In [8]:
df = df.copy()  # ensure modifications are on a copy
df["start_hour"] = df["start_time"].dt.hour
df["day_of_week"] = df["start_time"].dt.dayofweek  # Monday=0

# Derived feature
df["energy_per_min"] = df["energy_kWh"] / df["duration_min"]


In [9]:
features = [
    "duration_min",
    "start_hour",
    "day_of_week",
    "session_type_Occasional",
    "session_type_Regular",
    "session_day_Weekend"
]

X = df_encoded[features].copy()
y = df_encoded["energy_kWh"].copy()


In [10]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X.copy(), y.copy(), test_size=0.2, random_state=42
)


In [11]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(
    n_estimators=300,
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    random_state=42,
    n_jobs=-1
)

rf.fit(X_train.copy(), y_train.copy())


0,1,2
,n_estimators,300
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

y_pred = rf.predict(X_test.copy())

mae = mean_absolute_error(y_test.copy(), y_pred)
rmse = np.sqrt(mean_squared_error(y_test.copy(), y_pred))
r2 = r2_score(y_test.copy(), y_pred)

print("MAE:", mae)
print("RMSE:", rmse)
print("R²:", r2)


MAE: 11.298719356575964
RMSE: 14.303424933158714
R²: 0.48348424047369776
