In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
data = pd.read_csv("/content/sample_data/Electric Vehicle Trip Energy Consumption Data.csv")

In [4]:
data.head()

Unnamed: 0,Trip Energy Consumption,Vehicle ID,Trip Distance,Time of Day,Day of the Week,Longitude,Latitude,Speed,Current,Total Voltage,Maximum Cell Temperature of Battery,Minimum Cell Temperature of Battery,Trip Time Length
0,0.672,1,6,10.333333,4,121.497948,31.281574,246.0,2.583348,308.283333,31.0,30.833333,13
1,0.896,1,6,16.0,4,121.587564,31.25607,393.714286,2.985729,304.485714,29.0,28.0,18
2,1.344,1,7,16.090909,2,121.576968,31.262034,192.0,2.35456,308.463636,31.272727,30.0,21
3,1.344,1,8,19.0,5,121.549709,31.257796,369.24,1.540015,308.06,30.0,30.0,16
4,0.896,1,6,14.166667,6,121.58228,31.21503,413.450617,9.659892,304.473457,28.0,28.0,129


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10151 entries, 0 to 10150
Data columns (total 13 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   Trip Energy Consumption              10151 non-null  float64
 1   Vehicle ID                           10151 non-null  int64  
 2   Trip Distance                        10151 non-null  int64  
 3   Time of Day                          10151 non-null  float64
 4   Day of the Week                      10151 non-null  int64  
 5   Longitude                            10151 non-null  float64
 6   Latitude                             10151 non-null  float64
 7   Speed                                10151 non-null  float64
 8   Current                              10151 non-null  float64
 9   Total Voltage                        10151 non-null  float64
 10  Maximum Cell Temperature of Battery  10151 non-null  float64
 11  Minimum Cell Temperature of 

In [6]:

# ---- Drop Faltu Columns ----
df = data.drop(columns=[
    "Vehicle ID",
    "Time of Day",
    "Day of the Week",
    "Longitude",
    "Latitude"
], errors="ignore")

# ---- Feature Engineering ----

# 1. Average Consumption per km
df["Energy_per_km"] = df["Trip Energy Consumption"] / df["Trip Distance"].replace(0, 1)

# 2. Average Speed
df["Avg_Speed"] = df["Trip Distance"] / df["Trip Time Length"].replace(0, 1)

# 3. Traffic Condition (categorical)
def traffic_condition(speed):
    if speed > 50:
        return "Light"
    elif speed > 30:
        return "Medium"
    else:
        return "Heavy"

df["Traffic_Condition"] = df["Avg_Speed"].apply(traffic_condition)

# 4. Battery Temperature Delta
df["Battery_Delta_T"] = (
    df["Maximum Cell Temperature of Battery"] - df["Minimum Cell Temperature of Battery"]
)

# 5. Power Draw (Watts)
df["Power_Draw"] = df["Current"] * df["Total Voltage"]

# 6. Efficiency (km per unit energy)
df["Efficiency"] = df["Trip Distance"] / df["Trip Energy Consumption"].replace(0, 1)

# ---- Save Cleaned Dataset ----
df.to_csv("processed_ev_data.csv", index=False)

print("✅ Dataset processed and saved as 'processed_ev_data.csv'")
print("Columns now available:", df.columns.tolist())


✅ Dataset processed and saved as 'processed_ev_data.csv'
Columns now available: ['Trip Energy Consumption', 'Trip Distance', 'Speed', 'Current', 'Total Voltage', 'Maximum Cell Temperature of Battery', 'Minimum Cell Temperature of Battery', 'Trip Time Length', 'Energy_per_km', 'Avg_Speed', 'Traffic_Condition', 'Battery_Delta_T', 'Power_Draw', 'Efficiency']


In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

# Load processed dataset
df = pd.read_csv("/content/processed_ev_data.csv")

# -------- Target & Features --------
target = "Energy_per_km"   # ya "Trip Energy Consumption"
X = df.drop(columns=[target])
y = df[target]

# Identify categorical & numerical features
categorical = ["Traffic_Condition", "Mode"] if "Mode" in df.columns else ["Traffic_Condition"]
numerical = [col for col in X.columns if col not in categorical]

# Preprocessing
preprocessor = ColumnTransformer([
    ("num", StandardScaler(), numerical),
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical)
])

# Model
models = {
    "RandomForest": RandomForestRegressor(n_estimators=200, random_state=42),
    "GradientBoosting": GradientBoostingRegressor(n_estimators=200, random_state=42)
}

# Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

results = {}

for name, model in models.items():
    pipe = Pipeline([
        ("preprocess", preprocessor),
        ("model", model)
    ])
    pipe.fit(X_train, y_train)
    y_pred = pipe.predict(X_test)

    results[name] = {
        "R2": r2_score(y_test, y_pred),
        "MAE": mean_absolute_error(y_test, y_pred),
        "RMSE": np.sqrt(mean_squared_error(y_test, y_pred)) # Removed squared=False and added np.sqrt
    }

# Show Results
for model, metrics in results.items():
    print(f"\n📊 {model} Performance:")
    for m, v in metrics.items():
        print(f"{m}: {v:.4f}")


📊 RandomForest Performance:
R2: 0.9313
MAE: 0.0006
RMSE: 0.0193

📊 GradientBoosting Performance:
R2: 0.9383
MAE: 0.0008
RMSE: 0.0183


In [9]:
import joblib
joblib.dump(pipe, "ev_energy_model.pkl")


['ev_energy_model.pkl']