In [None]:
import pandas as pd

df = pd.read_csv('Electric_Vehicle_Population_Data.csv')

df.head()

In [None]:
df.info()
df.describe(include='all')

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Distribution of Electric Vehicle Types
plt.figure(figsize=(10, 6))
sns.countplot(x='Electric Vehicle Type', data=df)
plt.title('Distribution of Electric Vehicle Types')
plt.show()

# Top 10 Makes
plt.figure(figsize=(10, 6))
df['Make'].value_counts().head(10).plot(kind='bar')
plt.title('Top 10 Vehicle Makes')
plt.show()

# Distribution of Model Years
plt.figure(figsize=(10, 6))
sns.histplot(df['Model Year'], kde=False, bins=30)
plt.title('Distribution of Model Years')
plt.show()

In [None]:
import geopandas as gpd
from shapely import wkt

# Convert the 'Vehicle Location' column to a GeoSeries
df['Vehicle Location'] = df['Vehicle Location'].apply(wkt.loads)
gdf = gpd.GeoDataFrame(df, geometry='Vehicle Location')

# Plot the data
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
fig, ax = plt.subplots(1, 1)
world[world.name == 'United States'].plot(ax=ax, color='white', edgecolor='black')
gdf.plot(ax=ax, color='red')
plt.show()

In [None]:
df['Vehicle Location'].head()

In [None]:
# Remove rows with missing 'Vehicle Location' data
df = df.dropna(subset=['Vehicle Location'])

# Try to convert the 'Vehicle Location' column to a GeoSeries again
try:
    df['Vehicle Location'] = df['Vehicle Location'].apply(wkt.loads)
except Exception as e:
    print(f'Error: {e}')

df['Vehicle Location'].head()

In [None]:
gdf = gpd.GeoDataFrame(df, geometry='Vehicle Location')

# Plot the data
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
fig, ax = plt.subplots(1, 1)
world[world.name == 'United States'].plot(ax=ax, color='white', edgecolor='black')
gdf.plot(ax=ax, color='red')
plt.show()

In [None]:
top_models_per_city = df.groupby('City')['Model'].apply(lambda x: x.value_counts().head(5))
top_models_per_city

In [None]:
# Remove rows with missing 'City' data
df = df.dropna(subset=['City'])

# Try to get the top 5 most popular electric vehicle models in each city again
try:
    top_models_per_city = df.groupby('City')['Model'].apply(lambda x: x.value_counts().head(5))
except Exception as e:
    print(f'Error: {e}')

top_models_per_city

In [None]:
top_models_per_city = df.groupby(['City', 'Model']).size().reset_index(name='Counts')
top_models_per_city = top_models_per_city.groupby('City').apply(lambda x: x.nlargest(5, 'Counts')).reset_index(drop=True)
top_models_per_city