In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import datetime

In [None]:
# Load the dataset
df = pd.read_csv("Electric_Vehicle_Population_Size_History_By_County_.csv")

In [None]:
# View columns and clean numeric columns
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df['Electric Vehicle (EV) Total'] = df['Electric Vehicle (EV) Total'].replace(',', '', regex=True).astype(float)

In [None]:
# Drop rows with missing date or EV total
df = df.dropna(subset=['Date', 'Electric Vehicle (EV) Total'])

In [None]:
# Extract year
df['year'] = df['Date'].dt.year
df = df.groupby('year')['Electric Vehicle (EV) Total'].sum().reset_index()
df.rename(columns={'Electric Vehicle (EV) Total': 'ev_count'}, inplace=True)

In [None]:
# Split data
X = df[['year']]
y = df['ev_count']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Train model
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
# Predict future years
future_years = pd.DataFrame({'year': list(range(2025, 2031))})
future_preds = model.predict(future_years)

In [None]:
# Plotting
plt.figure(figsize=(10, 6))
plt.plot(df['year'], df['ev_count'], marker='o', label="Historical EV Count")
plt.plot(future_years['year'], future_preds, marker='x', linestyle='--', color='red', label="Predicted EV Count")
plt.xlabel("Year")
plt.ylabel("EV Count")
plt.title("EV Adoption Forecast")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Print R2 Score
y_pred = model.predict(X_test)
print("R² Score:", r2_score(y_test, y_pred))