In [1]:
# Section 1: Import Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Enable inline plotting in Google Colab
%matplotlib inline


In [2]:


# Load the dataset into a DataFrame
df = pd.read_csv('EV_Dataset.csv')

# Preview the dataset
print("Dataset Preview:")
print(df.head())

# Section 3: Explore the Dataset
# Check for missing values and data types
print("\nDataset Info:")
print(df.info())

print("\nSummary Statistics:")
print(df.describe())


Dataset Preview:
     Year Month_Name      Date           State         Vehicle_Class  \
0  2014.0        jan  1/1/2014  Andhra Pradesh       ADAPTED VEHICLE   
1  2014.0        jan  1/1/2014  Andhra Pradesh  AGRICULTURAL TRACTOR   
2  2014.0        jan  1/1/2014  Andhra Pradesh             AMBULANCE   
3  2014.0        jan  1/1/2014  Andhra Pradesh   ARTICULATED VEHICLE   
4  2014.0        jan  1/1/2014  Andhra Pradesh                   BUS   

  Vehicle_Category Vehicle_Type  EV_Sales_Quantity  
0           Others       Others                0.0  
1           Others       Others                0.0  
2           Others       Others                0.0  
3           Others       Others                0.0  
4              Bus          Bus                0.0  

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96845 entries, 0 to 96844
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Year    

In [5]:

# Check for unique values in key columns
print("\nUnique States:", df['State'].nunique())
print("Unique Vehicle Types:", df['Vehicle_Type'].unique())
print("Unique Vehicle Categories:", df['Vehicle_Category'].unique())



Unique States: 34
Unique Vehicle Types: ['Others' 'Bus' 'Institution Bus' '2W_Personal' '4W_Shared' '4W_Personal'
 '3W_Shared' '3W_Personal' '3W_Goods' '2W_Shared' '3W_Shared_LowSpeed'
 '3W_Goods_LowSpeed']
Unique Vehicle Categories: ['Others' 'Bus' '2-Wheelers' '4-Wheelers' '3-Wheelers']


In [6]:

# Section 4: Analyze and Visualize Data
# Plot total EV sales by state
state_sales = df.groupby('State')['Electric_Vehicle_Sales_Quantity'].sum().sort_values(ascending=False)
plt.figure(figsize=(12, 6))
state_sales.plot(kind='bar', color='skyblue')
plt.title('Total EV Sales by State', fontsize=16)
plt.ylabel('Sales Quantity', fontsize=12)
plt.xlabel('State', fontsize=12)
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

# Plot EV sales distribution by vehicle type
plt.figure(figsize=(8, 6))
sns.boxplot(x='Vehicle Type', y='Electric_Vehicle_Sales_Quantity', data=df)
plt.title('EV Sales Distribution by Vehicle Type', fontsize=16)
plt.xlabel('Vehicle Type', fontsize=12)
plt.ylabel('Sales Quantity', fontsize=12)
plt.show()

KeyError: 'Column not found: Electric_Vehicle_Sales_Quantity'

In [7]:
# Section 5: Advanced Insights
# Analyze sales by vehicle category
category_sales = df.groupby('Vehicle Category')['Electric_Vehicle_Sales_Quantity'].sum()
plt.figure(figsize=(8, 6))
category_sales.plot(kind='pie', autopct='%1.1f%%', startangle=140, colors=['#ff9999','#66b3ff'])
plt.title('Sales Distribution by Vehicle Category', fontsize=16)
plt.ylabel('')
plt.tight_layout()
plt.show()

# Correlation Heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Heatmap', fontsize=16)
plt.show()

# Section 6: Save Cleaned Data
# Check for and handle missing values (if any)
if df.isnull().values.any():
    df = df.dropna()
    print("\nMissing values detected and dropped.")
else:
    print("\nNo missing values detected.")

# Save the cleaned dataset
output_file = 'Cleaned_EV_Dataset.csv'
df.to_csv(output_file, index=False)
print(f"\nCleaned dataset saved as {output_file}.")

KeyError: 'Vehicle Category'