In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import LabelEncoder

# Load dataset
df = pd.read_csv("3ae033f50fa345051652.csv")

# Clean column names
df.columns = df.columns.str.strip()

# Convert 'Date' into datetime
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year

# Handle non-numeric columns
df['Vehicle Primary Use'] = LabelEncoder().fit_transform(df['Vehicle Primary Use'].astype(str))
df['Vehicle'] = LabelEncoder().fit_transform(df['Vehicle'].astype(str))

# Convert Non-Electric Vehicle Total to numeric (remove commas)
df['Non-Electric Vehicle Total'] = df['Non-Electric Vehicle Total'].astype(str).str.replace(',', '')
df['Non-Electric Vehicle Total'] = pd.to_numeric(df['Non-Electric Vehicle Total'], errors='coerce')

# Convert Target column to numeric
df['Electric Vehicle (EV) Total'] = pd.to_numeric(df['Electric Vehicle (EV) Total'], errors='coerce')

# Drop missing values
df.dropna(subset=['Electric Vehicle (EV) Total', 'Non-Electric Vehicle Total', 'Month', 'Year'], inplace=True)

# Features and Target
X = df[['Vehicle', 'Vehicle Primary Use', 'Non-Electric Vehicle Total', 'Month', 'Year']]
y = df['Electric Vehicle (EV) Total']

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluation
print("R² Score:", r2_score(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))

# Plot Actual vs Predicted
plt.figure(figsize=(10,5))
plt.plot(y_test.values, label='Actual EV Count', marker='o')
plt.plot(y_pred, label='Predicted EV Count', marker='x')
plt.title("EV Count: Actual vs Predicted")
plt.xlabel("Sample")
plt.ylabel("EV Total")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


  df['Date'] = pd.to_datetime(df['Date'], errors='coerce')


KeyError: 'Vehicle'