## Author: Aditya Sundar
## Email: aditya.1094@gmail.com

## <font color='blue'>Electric vehicles (EVs) are predicted to be an integral part of modern, sustainable economies.  Replacing combustion engines with batteries reduces our carbon footprint and improves air quality. However, the current costs associated with researching & manufacturing batteries makes EVs more expensive. As such, it is crucial for the buyer to be informed about the pros and cons of purchasing EVs. The project is intended to help vehicle buyers make informed selections of EVs, by providing </font> <font color='red'> 1) interactive dashboards to explore the performance metrics of commercial EVs </font><font color='blue'>and<font> <font color='red'>2) models to estimate vehicle costs.</font>
    
### Note: There is a heading before each cell or group of cells. Headings in black font denote code for data retrieval and cleaning. Headings in <font color='red'>red</font> font denote visualization codes that can be run directly using processed data

In [1]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
import time
import numpy as np
import requests
from bs4 import BeautifulSoup
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import ipywidgets as widgets
import unicodedata
import re
import seaborn as sns
import inspect
import matplotlib.pyplot as plt
import os
    
from raceplotly.plots import barplot
    
from DataCleaning import *
from BarPlot import *
from DataCleaning import *

# Web scraping to get data for electric vehicles (EV) sales in the USA
 Data includes various types of electric vehicles, manufactured by several automobile companies, showing purchasing trends from 2000 to 2022.

In [3]:
url = 'https://www.atlasevhub.com/materials/state-ev-registration-data/#data'
reqs = requests.get(url)
soup = BeautifulSoup(reqs.text, 'html.parser')
    
links=[link.get('href') for link in soup.find_all('a') ]
data=[i for i in links if len(i.split('/'))>1 and i.split('/')[1]=='public']
data=[i for i in data if i.split('.')[-1]=='csv']
state_name=[i.split('/')[3].split('_')[0].upper() for i in data]

print(state_name)

dt=[]
for i in data:
    dt.append(pd.read_csv('https://www.atlasevhub.com/'+i))

['CA', 'CO', 'CT', 'FL', 'MT', 'MI', 'MN', 'NJ', 'NY', 'OR', 'TN', 'TX', 'VT', 'VA', 'WA', 'WI']


In [None]:
for i in range(len(dt)):
    dt[i]=dt[i].dropna(axis=1)
    dt[i].rename({'Registration Valid Date':'Date'},axis=1, inplace=True)
    if 'Vehicle Name' in dt[i].columns:
        dt[i]['Make'] =dt[i]['Vehicle Name'].str.split().str[0]
        dt[i].rename({'Vehicle Name':'Model'},axis=1, inplace=True)

## Save data as json file"

In [None]:
for i in range(len(state_name)):
    dt[i].to_json('data/USA/'+state_name[i]+'_EVdata.json',orient='records')

## Visualize data for states (cumulative number of EV sales)

In [3]:
states=[]
for i in os.listdir('../dataUSA/USA'):
    states.append(i.split('_')[0])

In [14]:
data_map=pd.DataFrame()
for i in states:
    df=pd.read_json('../dataUSA/USA/'+i+'_EVdata.json', orient='records')
    if 'Date' in df.columns:
        p=patch_year(i,df)
        t=p.total_sales()
        t['State']=p.state
        data_map=pd.concat([data_map,t])

In [25]:
fig = px.choropleth(data_map,
                    locations='State', 
                    locationmode="USA-states",
                    scope="usa",
                    color='Count',
                    color_continuous_scale="Viridis_r",
                    animation_frame = 'Date'
                        )
fig.update_layout(
                title_text="Cumulative Electric Vehicle sales in the United States",
                title_xanchor="center",
                title_font=dict(size=20),
                title_x=0.5,
                title_y=0.95,
                geo=dict(scope='usa'),
                width=700,
                height=500,
                margin=dict(t=100, b=0,r=100,  l=0)
                )
    
fig.update_layout(sliders=[{"currentvalue": {"prefix": "Year="}}])
fig.show()

## Visualize data for states (top 5 EV makes per year)

In [4]:
dropdown_s=widgets.Dropdown(
        options=states,
        value='TN',
        description='State:',
        disabled=False,
    )

In [5]:
def plot_func(s):
    p=bp(s)
    data_hist=p.top_five()
    my_raceplot = barplot(data_hist,  item_column='Make', value_column='Count', time_column='Year')
    fig=my_raceplot.plot(item_label = 'Car Make', value_label = 'EVs sold', frame_duration = 800)
    fig.update_layout(sliders=[{"currentvalue": {"prefix": "Year="}}])
    return fig
widgets.interact(plot_func, s = dropdown_s);

interactive(children=(Dropdown(description='State:', index=2, options=('FL', 'MN', 'TN', 'NJ', 'CA', 'NY', 'CO…