In [36]:
# Import only the most needed libraries
import numpy as np
import pandas as pd  # For handling data
from sklearn.model_selection import train_test_split  # To split data
from sklearn.ensemble import RandomForestClassifier  # Simple, accurate model
from sklearn.metrics import f1_score  # To check how good our model is

In [37]:
# Step 1: Load the CSV file
data = pd.read_csv("Electric_Vehicle_Population_Data.csv")

In [38]:
data.head()

Unnamed: 0,VIN (1-10),County,City,State,Postal Code,Model Year,Make,Model,Electric Vehicle Type,Clean Alternative Fuel Vehicle (CAFV) Eligibility,Electric Range,Base MSRP,Legislative District,DOL Vehicle ID,Vehicle Location,Electric Utility,2020 Census Tract
0,5YJ3E1EB5K,Yakima,Yakima,WA,98901.0,2019.0,TESLA,MODEL 3,Battery Electric Vehicle (BEV),Clean Alternative Fuel Vehicle Eligible,220.0,0.0,15.0,347724772.0,POINT (-120.50729 46.60464),PACIFICORP,53077000000.0
1,1C4RJXU67R,Kitsap,Port Orchard,WA,98367.0,2024.0,JEEP,WRANGLER,Plug-in Hybrid Electric Vehicle (PHEV),Not eligible due to low battery range,21.0,0.0,35.0,272165288.0,POINT (-122.68471 47.50524),PUGET SOUND ENERGY INC,53035090000.0
2,KNDCD3LD0N,Snohomish,Lynnwood,WA,98036.0,2022.0,KIA,NIRO,Plug-in Hybrid Electric Vehicle (PHEV),Not eligible due to low battery range,26.0,0.0,32.0,203182584.0,POINT (-122.29245 47.82557),PUGET SOUND ENERGY INC,53061050000.0
3,5UXKT0C37H,King,Auburn,WA,98001.0,2017.0,BMW,X5,Plug-in Hybrid Electric Vehicle (PHEV),Not eligible due to low battery range,14.0,0.0,30.0,349010287.0,POINT (-122.23035 47.3074),PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA),53033030000.0
4,1N4AZ0CP1D,Skagit,Mount Vernon,,98273.0,2013.0,NISSAN,LEAF,Battery Electric Vehicle (BEV),Clean Alternative Fuel Vehicle Eligible,75.0,0.0,40.0,131684150.0,POINT (-122.33891 48.41644),PUGET SOUND ENERGY INC,53057950000.0


In [39]:
data.tail()

Unnamed: 0,VIN (1-10),County,City,State,Postal Code,Model Year,Make,Model,Electric Vehicle Type,Clean Alternative Fuel Vehicle (CAFV) Eligibility,Electric Range,Base MSRP,Legislative District,DOL Vehicle ID,Vehicle Location,Electric Utility,2020 Census Tract
257630,2C4RC1L73L,Snohomish,Mukilteo,WA,98275.0,2020.0,CHRYSLER,PACIFICA,Plug-in Hybrid Electric Vehicle (PHEV),Clean Alternative Fuel Vehicle Eligible,32.0,0.0,21.0,161576068.0,POINT (-122.29196 47.89908),PUGET SOUND ENERGY INC,53061040000.0
257631,7SAYGDEE8N,King,Kirkland,WA,98033.0,2022.0,TESLA,MODEL Y,Battery Electric Vehicle (BEV),Eligibility unknown as battery range has not b...,0.0,0.0,48.0,194978738.0,POINT (-122.2066 47.67887),PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA),53033020000.0
257632,WBAJB1C53K,Clark,Vancouver,WA,98683.0,2019.0,BMW,530E,Plug-in Hybrid Electric Vehicle (PHEV),Not eligible due to low battery range,15.0,55700.0,18.0,278137940.0,POINT (-122.49212 45.60365),BONNEVILLE POWER ADMINISTRATION||PUD NO 1 OF C...,53011040000.0
257633,JTDKARFP9K,Whatcom,Bellingham,WA,98229.0,2019.0,TOYOTA,PRIUS PRIME (PHEV),Plug-in Hybrid Electric Vehicle (PHEV),Not eligible due to low battery range,25.0,0.0,40.0,177812232.0,POINT (-122.45486 48.7449),PUGET SOUND ENERGY INC||PUD NO 1 OF WHATCOM CO...,53073000000.0
257634,7YAKRDDC1S,King,Seattle,WA,98121.0,2025.0,HYUNDAI,IONIQ 5,Battery Electric Vehicle (BEV),Eligibility unknown as battery range has not b...,0.0,0.0,43.0,280545338.0,POINT (-122.34468 47.61578),CITY OF SEATTLE - (WA)|CITY OF TACOMA - (WA),53033010000.0


In [40]:
# Step 2: Remove rows with missing values (keep it clean)
data = data.dropna()

In [29]:
# Step 3: Make a column for CAFV Eligibility (1 = Yes, 0 = No)
data['CAFV_Eligible'] = data['Clean Alternative Fuel Vehicle (CAFV) Eligibility'].apply(
    lambda x: 1 if 'Eligible' in str(x) else 0)

In [30]:
# Step 4: Pick only 3 easy features
features = ['Model Year', 'Electric Vehicle Type', 'Electric Range']
X = data[features]  # Input data (features)
y = data['CAFV_Eligible']  # Output (what we predict)

In [31]:
# Step 5: Convert text (Electric Vehicle Type) to numbers
# Example: 'BEV' becomes 1, 'PHEV' becomes 0
X = pd.get_dummies(X, columns=['Electric Vehicle Type'], drop_first=True)

In [32]:
# Step 6: Split data into 80% training, 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [33]:

# Step 7: Create and train the model
model = RandomForestClassifier(n_estimators=50, random_state=42)  # Simple but strong model
model.fit(X_train, y_train)  # Teach the model with training data

In [34]:
# Step 8: Make predictions and check score
y_pred = model.predict(X_test)  # Predict on test data
score = f1_score(y_test, y_pred)  # Get F1-score (higher is better)
print(f"CAFV Eligibility F1-Score: {score:.2f}")

CAFV Eligibility F1-Score: 1.00
