In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

# Load the dataset
file_path = 'D:\Job Assignments\Statewise Analysis of Electric Vehicles and Charging Stations in India.csv'
df = pd.read_csv(file_path)

# Display the first few rows of the dataset
print(df.head())

# Select relevant features for clustering
features = [
    'Two Wheeler', 'Three Wheeler', 'Four Wheeler', 
    'Goods Vehicles', 'Public Service Vehicle', 'Ambulance/Hearses', 
    'Other', 'Grand Total', 'total-charging-stations'
]

# Handle missing values by filling them with the mean of the column
df.fillna(df.mean(), inplace=True)

# Standardize the data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df[features])

# Determine the number of clusters using the Elbow method
wcss = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=42)
    kmeans.fit(scaled_data)
    wcss.append(kmeans.inertia_)

# Plot the Elbow Method
plt.plot(range(1, 11), wcss)
plt.title('Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.show()

# Apply K-means to the dataset
kmeans = KMeans(n_clusters=3, init='k-means++', max_iter=300, n_init=10, random_state=42)
y_kmeans = kmeans.fit_predict(scaled_data)

# Add cluster information to the original data
df['Cluster'] = y_kmeans

# Visualize the clusters using PCA
pca = PCA(n_components=2)
principal_components = pca.fit_transform(scaled_data)
pc_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])
pc_df['Cluster'] = y_kmeans

plt.figure(figsize=(8, 6))
colors = ['red', 'blue', 'green']
for cluster in range(3):
    plt.scatter(pc_df[pc_df['Cluster'] == cluster]['PC1'], pc_df[pc_df['Cluster'] == cluster]['PC2'], 
                s=100, c=colors[cluster], label=f'Cluster {cluster}')

plt.title('Clusters of Electric Vehicles in India')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend()
plt.show()

# Save the clustered data to a new CSV file
output_file_path = 'D:\Job Assignments\EV_market_segmentation_output.csv'
df.to_csv(output_file_path, index=False)
print(f'Segmented data saved to {output_file_path}')


   Unnamed: 0                  State Name  Two Wheeler  Three Wheeler  \
0           0  Andaman and Nicobar Island            1           30.0   
1           1           Arunachal Pradesh           14            0.0   
2           2                       Assam          721        47041.0   
3           3                       Bihar         5003        59079.0   
4           4                  Chandigarh          298         1410.0   

   Four Wheeler  Goods Vehicles  Public Service Vehicle  \
0            81             0.0                    40.0   
1             5             0.0                     0.0   
2           161             7.0                    15.0   
3           114            11.0                    26.0   
4           182             0.0                    40.0   

   Special Category Vehicles  Ambulance/Hearses  \
0                        0.0                0.0   
1                        0.0                0.0   
2                        0.0                0.0   
3 

TypeError: Could not convert ['Andaman and Nicobar IslandArunachal PradeshAssamBiharChandigarhChhattisgarhDelhiGoaGujaratHaryanaHimachal PradeshJammu and KashmirJharkhandKarnatakaKeralaLadakhMaharashtraManipurMeghalayaMizoramNagalandOdishaPuducherryPunjabRajasthanSikkimTamil NaduTripuraDadra and Nagar Haveli and Daman and DiuUttar PradeshUttarakhandWest Bengal'] to numeric