In [None]:
import pandas as pd
dataset=pd.read_csv("dataset.csv")
dataset

In [None]:
dataset.columns

In [None]:
dataset.describe

In [None]:
dataset.info

In [None]:
# Task 1: This is an open ended problem. Apply Exploratory Data Analysis (Univariate and Bivariate) on the dataset available above.


In [None]:
# Import necessary libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


In [None]:
# ------------- Univariate Analysis -------------
# 1. Distribution of Model Year
plt.figure(figsize=(10, 6))
sns.histplot(dataset['Model Year'], kde=True, bins=30, color='skyblue')
plt.title('Distribution of Model Year')
plt.xlabel('Model Year')
plt.ylabel('Count')
plt.show()

In [None]:
# 2. Distribution of Electric Vehicle Type
plt.figure(figsize=(10, 6))
sns.countplot(data=dataset, x='Electric Vehicle Type', palette='Set2')
plt.title('Count of Electric Vehicle Types')
plt.xlabel('Electric Vehicle Type')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()


In [None]:
# 3. Distribution of Electric Range
plt.figure(figsize=(10, 6))
sns.histplot(dataset['Electric Range'], kde=True, bins=30, color='purple')
plt.title('Distribution of Electric Range')
plt.xlabel('Electric Range (miles)')
plt.ylabel('Count')
plt.show()

In [None]:
# ------------- Bivariate Analysis -------------
# 4. Electric Range vs. Model Year
plt.figure(figsize=(10, 6))
sns.scatterplot(data=dataset, x='Model Year', y='Electric Range', hue='Electric Vehicle Type', palette='coolwarm')
plt.title('Electric Range vs. Model Year')
plt.xlabel('Model Year')
plt.ylabel('Electric Range (miles)')
plt.legend(title='Electric Vehicle Type')
plt.show()


In [None]:
# 5. Electric Range by Make
plt.figure(figsize=(10, 6))
sns.boxplot(data=dataset, x='Make', y='Electric Range', palette='Set3')
plt.title('Electric Range by Vehicle Make')
plt.xlabel('Make')
plt.ylabel('Electric Range (miles)')
plt.xticks(rotation=90)
plt.show()

In [None]:
# 6. Base MSRP vs Electric Range
plt.figure(figsize=(10, 6))
sns.scatterplot(data=dataset, x='Base MSRP', y='Electric Range', hue='Electric Vehicle Type', palette='viridis')
plt.title('Base MSRP vs Electric Range')
plt.xlabel('Base MSRP (USD)')
plt.ylabel('Electric Range (miles)')
plt.legend(title='Electric Vehicle Type')
plt.show()


In [None]:
# 7. Count of Clean Alternative Fuel Vehicle Eligibility
plt.figure(figsize=(10, 6))
sns.countplot(data=dataset, x='Clean Alternative Fuel Vehicle (CAFV) Eligibility', palette='Set1')
plt.title('Count of CAFV Eligibility')
plt.xlabel('CAFV Eligibility')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Missing Data Check
print(dataset.isnull().sum())

import seaborn as sns
import matplotlib.pyplot as plt

# Heatmap to visualize missing data
plt.figure(figsize=(10, 6))
sns.heatmap(dataset.isnull(), cbar=False, cmap='viridis')
plt.title('Missing Data Heatmap')
plt.show()

In [None]:
# Boxplot for outlier detection in Electric Range
plt.figure(figsize=(10, 6))
sns.boxplot(data=dataset, y='Electric Range')
plt.title('Boxplot of Electric Range (Outlier Detection)')
plt.show()

# Boxplot for outlier detection in Base MSRP
plt.figure(figsize=(10, 6))
sns.boxplot(data=dataset, y='Base MSRP')
plt.title('Boxplot of Base MSRP (Outlier Detection)')
plt.show()


In [None]:
# Correlation matrix and heatmap
plt.figure(figsize=(10, 6))
corr_matrix = dataset[['Electric Range', 'Model Year', 'Base MSRP']].corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Heatmap')
plt.show()


In [None]:
# Boxplot for Electric Range by CAFV Eligibility
plt.figure(figsize=(10, 6))
sns.boxplot(data=dataset, x='Clean Alternative Fuel Vehicle (CAFV) Eligibility', y='Electric Range')
plt.title('Electric Range by CAFV Eligibility')
plt.xlabel('CAFV Eligibility')
plt.ylabel('Electric Range (miles)')
plt.xticks(rotation=45)
plt.show()


In [None]:

# Electric Range by City (Top 10 Cities)
top_cities = dataset['City'].value_counts().nlargest(10).index
filtered_data = dataset[dataset['City'].isin(top_cities)]

plt.figure(figsize=(10, 6))
sns.boxplot(data=filtered_data, x='City', y='Electric Range', palette='Set3')
plt.title('Electric Range by City (Top 10 Cities)')
plt.xlabel('City')
plt.ylabel('Electric Range (miles)')
plt.xticks(rotation=90)
plt.show()


In [None]:
# Task 2: Create a Choropleth using plotly.express to display the number of EV vehicles based on location.

In [None]:
!pip install plotly

In [None]:
import pandas as pd
dataset = pd.read_csv("dataset.csv")


In [None]:
import plotly.express as px


In [None]:
scatter_plot = px.scatter(dataset, x="Electric Range", y="Base MSRP", title="Scatter Plot: Electric Range vs Base MSRP")
scatter_plot.show()


In [None]:
box_plot = px.box(dataset, x="Electric Vehicle Type", y="Electric Range", title="Box Plot: Electric Vehicle Type vs Electric Range")
box_plot.show()


In [None]:
vehicle_type_count = dataset['Electric Vehicle Type'].value_counts().reset_index()
vehicle_type_count.columns = ['Electric Vehicle Type', 'Count']
pie_chart = px.pie(vehicle_type_count, names='Electric Vehicle Type', values='Count', title="Pie Chart: Distribution of Electric Vehicle Types")
pie_chart.show()


In [None]:
vehicle_count_by_state = dataset['State'].value_counts().reset_index()
vehicle_count_by_state.columns = ['State', 'Vehicle Count']

choropleth = px.choropleth(vehicle_count_by_state,
                           locations="State",
                           locationmode="USA-states",
                           color="Vehicle Count",
                           scope="usa",
                           title="Choropleth Map: Number of EV Vehicles by State")
choropleth.show()


In [None]:
animated_choropleth = px.choropleth(dataset,
                                    locations="State",
                                    locationmode="USA-states",
                                    color="Electric Range",
                                    animation_frame="Model Year",
                                    scope="usa",
                                    title="Animated Choropleth: Electric Range over Model Year by State")
animated_choropleth.show()


In [None]:
# Task 3: Create a Racing Bar Plot to display the animation of EV Make and its count each year.

In [None]:
!pip install bar_chart_race

In [None]:
import pandas as pd

# Assuming you have already loaded your dataset
# Create a pivot table with counts of vehicles by 'Make' and 'Model Year'
pivot_data = dataset.pivot_table(index="Model Year", columns="Make", aggfunc="size", fill_value=0)

# Sort the columns by sum of vehicle counts
pivot_data = pivot_data.loc[:, pivot_data.sum(axis=0).sort_values(ascending=False).index]


In [None]:
import bar_chart_race as bcr


In [None]:
import pandas as pd
import plotly.express as px

# Create a pivot table with counts of vehicles by 'Make' and 'Model Year'
pivot_data = dataset.pivot_table(index="Model Year", columns="Make", aggfunc="size", fill_value=0)

# Reset index to make 'Model Year' a column
pivot_data.reset_index(inplace=True)
melted_data = pivot_data.melt(id_vars=["Model Year"], var_name="Make", value_name="Count")

# Create an animated bar plot
fig = px.bar(melted_data,
             x='Count',
             y='Make',
             color='Make',
             animation_frame='Model Year',
             range_x=[0, melted_data['Count'].max() + 10],  # Adjust range for better visualization
             title='Year-wise EV Make Sales Animation',
             orientation='h')


fig.update_layout(
    title_font=dict(size=30), 
    xaxis_title_font=dict(size=20), 
    yaxis_title_font=dict(size=20),  
    width=1000,  
    height=600,  
    bargap=0.1,  
)

fig.show()
