# EV Market Data Analysis
This notebook implements the detailed analysis plan for the India EV Market datasets.

In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import geopandas as gpd
from datetime import datetime

# Set plot style
sns.set(style="whitegrid")

## 1. Data Loading and Cleaning

In [None]:
# Load datasets
ev_maker_path = '../data/raw/India-EV-Market/EV Maker by Place.csv'
ev_cat_path = '../data/raw/India-EV-Market/ev_cat_01-24.csv'
ev_sales_path = '../data/raw/India-EV-Market/ev_sales_by_makers_and_cat_15-24.csv'
operational_pc_path = '../data/raw/India-EV-Market/OperationalPC.csv'
vehicle_class_path = '../data/raw/India-EV-Market/Vehicle Class - All.csv'

ev_maker = pd.read_csv(ev_maker_path)
ev_cat = pd.read_csv(ev_cat_path)
ev_sales = pd.read_csv(ev_sales_path)
operational_pc = pd.read_csv(operational_pc_path)
vehicle_class = pd.read_csv(vehicle_class_path)

# Clean Vehicle Class - All dataset: remove commas and convert to int
vehicle_class['Total Registration'] = vehicle_class['Total Registration'].str.replace(',', '').astype(int)

# Convert Date column in ev_cat to datetime
ev_cat['Date'] = pd.to_datetime(ev_cat['Date'], errors='coerce', dayfirst=True)

# Remove commas from ev_cat columns and convert to numeric
for col in ev_cat.columns[1:]:
    ev_cat[col] = pd.to_numeric(ev_cat[col], errors='coerce')

# Preview cleaned data
ev_maker.head(), ev_cat.head(), ev_sales.head(), operational_pc.head(), vehicle_class.head()

## 2. Exploratory Data Analysis (EDA)

In [None]:
# EV Maker by Place: Distribution by State and Place
plt.figure(figsize=(12,6))
state_counts = ev_maker['State'].value_counts()
sns.barplot(x=state_counts.index, y=state_counts.values, palette='viridis')
plt.xticks(rotation=45)
plt.title('Number of EV Makers by State')
plt.ylabel('Count')
plt.xlabel('State')
plt.show()

In [None]:
# ev_cat_01-24: Trends of vehicle categories over time
plt.figure(figsize=(14,8))
for col in ev_cat.columns[1:6]:  # Plotting first 5 categories for clarity
    plt.plot(ev_cat['Date'], ev_cat[col], label=col)
plt.title('Trends of Vehicle Categories Over Time')
plt.xlabel('Date')
plt.ylabel('Count')
plt.legend()
plt.show()

In [None]:
# ev_sales_by_makers_and_cat_15-24: Top 10 makers by total sales
ev_sales['Total Sales'] = ev_sales.loc[:, '2015':'2024'].sum(axis=1)
top_makers = ev_sales.groupby('Maker')['Total Sales'].sum().sort_values(ascending=False).head(10)

plt.figure(figsize=(12,6))
sns.barplot(x=top_makers.values, y=top_makers.index, palette='magma')
plt.title('Top 10 EV Makers by Total Sales (2015-2024)')
plt.xlabel('Total Sales')
plt.ylabel('Maker')
plt.show()

In [None]:
# OperationalPC: Distribution by State
plt.figure(figsize=(12,6))
sns.barplot(x=operational_pc['State'], y=operational_pc['No. of Operational PCS'], palette='coolwarm')
plt.xticks(rotation=45)
plt.title('Number of Operational PCs by State')
plt.ylabel('No. of Operational PCs')
plt.xlabel('State')
plt.show()

In [None]:
# Vehicle Class - All: Total Registrations by Vehicle Class
plt.figure(figsize=(14,8))
sns.barplot(x=vehicle_class['Vehicle Class'], y=vehicle_class['Total Registration'], palette='cubehelix')
plt.xticks(rotation=90)
plt.title('Total Registrations by Vehicle Class')
plt.ylabel('Total Registration')
plt.xlabel('Vehicle Class')
plt.show()

## 3. Combined Analysis
Further combined analysis and correlation can be implemented based on these initial insights.