In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import re
import os

# default data path
data_path = "/content/drive/MyDrive/CS/215"
os.chdir(data_path)

# Loading the CSV files and whatever columns we need
dfEv = pd.read_csv('Electric_Vehicle_Population_Data.csv', usecols=[1,2,5,6,14])
dfCh = gpd.read_file('EV-Charging-Stations.geojson', columns=['open_date','geometry'])

# Quick check
dfCh.head()


Unnamed: 0,open_date,geometry
0,2018-01-15,MULTIPOINT ((-122.8224 47.04401))
1,2010-03-01,MULTIPOINT ((-122.29623 47.44338))
2,2009-08-15,MULTIPOINT ((-122.93896 47.03542))
3,2010-04-15,MULTIPOINT ((-117.38893 47.67335))
4,2010-04-15,MULTIPOINT ((-117.42502 47.655))


In [None]:
# Cleaning up the data from the charger dataset
# Explode converts MultiPoint -> Point since the dataset has MultiPoint for individual coordinates for whatever reason
dfCh = dfCh.explode(index_parts=False)
dfCh['longitude'] = dfCh.geometry.x
dfCh['latitude'] = dfCh.geometry.y

# Extract year
dfCh['year'] = pd.to_datetime(dfCh['open_date'], errors='coerce').dt.year

# Filter valid rows
dfCh_parsed = dfCh[['year', 'latitude', 'longitude']].dropna().copy()
dfCh_parsed['year'] = dfCh_parsed['year'].astype(int)

In [None]:
# Cleaning up the data from the EV dataset
dfEv['year'] = pd.to_numeric(dfEv['Model Year'], errors='coerce')

# Parse Coordinates using regex
wkt = dfEv['Vehicle Location'].astype(str)
coords = wkt.str.extract(r'POINT \((-?\d+\.\d+)\s+(-?\d+\.\d+)\)')

dfEv['long'] = pd.to_numeric(coords[0], errors='coerce')
dfEv['lat'] = pd.to_numeric(coords[1], errors='coerce')

# Columns
cols = ['year', 'make', 'city', 'county', 'lat', 'long']
dfEv_parsed = dfEv[cols].dropna().reset_index(drop=True)
dfEv_parsed['year'] = dfEv_parsed['year'].astype(int)

In [None]:
# Multi line chart for growth rates
all_years = pd.DataFrame({'year': range(2015, 2026)})

# Count New EVs per year
ev_counts = dfEv_parsed.groupby('year').size().reset_index(name='New_EVs')
ev_data = pd.merge(all_years, ev_counts, on='year', how='left').fillna(0)
# Calculate % Growth Rate: (Current - Prev) / Prev
ev_data['Growth_Rate'] = ev_data['New_EVs'].pct_change() * 100

# Count New Chargers per year
ch_counts = dfCh_parsed.groupby('year').size().reset_index(name='New_Chargers')
ch_data = pd.merge(all_years, ch_counts, on='year', how='left').fillna(0)
ch_data['Growth_Rate'] = ch_data['New_Chargers'].pct_change() * 100

# Handle edge cases
# If a year has 0 new items, the next year's growth rate will be infinity
# We replace inf with NaN or 0 to keep the chart clean
ev_data.replace([np.inf, -np.inf], np.nan, inplace=True)
ch_data.replace([np.inf, -np.inf], np.nan, inplace=True)

# Create visualization
fig = go.Figure()

# Line 1: EV Growth Rate
fig.add_trace(go.Scatter(
    x=ev_data['year'],
    y=ev_data['Growth_Rate'],
    name='EV Growth Rate (%)',
    mode='lines+markers',
    line=dict(color='blue', width=2),
    customdata=ev_data['New_EVs'],
    hovertemplate=(
        '<b>Year: %{x}</b><br>'
        'Growth Rate: %{y:.1f}%<br>'
        'Actual EVs: %{customdata:,}'
        '<extra></extra>'
)))

# Line 2: Charger Growth Rate
fig.add_trace(go.Scatter(
    x=ch_data['year'],
    y=ch_data['Growth_Rate'],
    name='Charger Growth Rate (%)',
    mode='lines+markers',
    line=dict(color='green', width=2, dash='dash'),
    customdata=ch_data['New_Chargers'],
    hovertemplate=(
        '<b>Year: %{x}</b><br>'
        'Growth Rate: %{y:.1f}%<br>'
        'Actual Chargers: %{customdata:,}'
        '<extra></extra>'
)))

# Layout
fig.update_layout(
    title='Growth Rate of EVs and Chargers by Year in WA',
    xaxis_title='Year',
    yaxis_title='Growth Rate (%)',
    xaxis=dict(tickmode='linear', dtick=1),
    yaxis=dict(zeroline=True, zerolinecolor='black'),
    template="plotly_white",
    hovermode="x unified"
)

fig.show()