In [None]:
import matplotlib
matplotlib.use("TkAgg")
import pandas as pd
from sklearn.impute import KNNImputer
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import geopandas as gpd
from geopy.geocoders import Nominatim
from shapely.geometry import Point
from geopy.extra.rate_limiter import RateLimiter
import geodatasets
import mplcursors

In [None]:
cars = pd.read_csv("Used_Car_Price_Prediction.csv")
cars.head(10)

## KNN Imputation

In [None]:
imputer = KNNImputer(n_neighbors=5, weights='distance')
cars['original_price'] = imputer.fit_transform(cars[['original_price']])

In [None]:
cars['transmission'] = cars['transmission'].fillna('unknown')

First, we will **create a Pandas GeoData frame** so we can make map plots, and we start by geocoding each of the cities in the dataframe.


In [None]:
geolocator = Nominatim(user_agent="city_mapper")

geocode = RateLimiter(
    geolocator.geocode,
    min_delay_seconds=3,       # at least 3 seconds between calls (per Nominatim policy)
    max_retries=2,             # retry a couple of times if it fails
    error_wait_seconds=3,      # wait 2 seconds before retrying after an error
    swallow_exceptions=False
)

def geocode_city(city, country="India"):
  location = geocode(f"{city}, {country}")
  if location:
    return location.latitude, location.longitude
  else:
    return None, None

cities = pd.DataFrame(cars["city"].astype('str').str.title().unique(), columns=["city"])
for idx, city in cities["city"].items():
    print(f"Geocoding: {city}...")
    lat, lon = geocode_city(city)
    cities.loc[idx, "lat"] = lat
    cities.loc[idx, "lon"] = lon

# Save geocoded cities
cities.to_csv("cities_geocoded_india.csv", index=False)

Now, we can create a GeoDataFrame that includes latitude and longitude information for each city.


In [None]:
# Optional: Read in geocoded cities
cities = pd.read_csv("cities_geocoded_india.csv")

# Create GeoData Frame
cars["city"] = cars["city"].astype("str").str.title()
df = pd.merge(cars, cities, how="outer", on="city")
geometry = [Point(xy) for xy in zip(df["lon"], df["lat"])]
map_cars = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")

Now, we can load in a basic map of India.

In [None]:
world = gpd.read_file("ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp")
world = world.set_crs("EPSG:4326")

# Extract India
india = world[world["ADMIN"] == "India"]

fig, ax = plt.subplots(figsize=(10, 6))

india.plot(ax=ax, color='lightblue', edgecolor='gray')
map_cars.plot(ax=ax, markersize=40)

for x, y, label in zip(map_cars.geometry.x, map_cars.geometry.y, map_cars["city"]):
  ax.text(x + 0.3, y + 0.3, label, fontsize=7)

ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)

plt.tight_layout()
plt.show()

#### Top Five Car Brands in Each City (interactive)

In [None]:
# Count brands per city
map_cars["brand"] = map_cars["car_name"].astype("str").str.split(" ").str[0].str.title()

top5_list = (
    map_cars.groupby(["city", "brand"]).size().groupby(level=0, group_keys=False)
    .nlargest(5).reset_index(name="count").groupby("city")["brand"].agg(list)
    .reset_index().rename(columns={"brand": "top5"})
)

map_cars = map_cars.merge(top5_list, on="city", how="left")

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

india.plot(ax=ax, color='lightblue', edgecolor='gray')
points = ax.scatter(map_cars.geometry.x, map_cars.geometry.y, s=40)


for x, y, label in zip(map_cars.geometry.x, map_cars.geometry.y, map_cars["city"]):
  ax.text(x + 0.3, y + 0.3, label, fontsize=7)

cursor = mplcursors.cursor(points, hover=True)
cursor.annotation_kwargs["animated"]  = False

ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)

# An attempt to reduce tooltip lag
for c in ax.collections:
   if c is not points:
      c.set_picker(False)

@cursor.connect("add")
def on_add(sel):
  i = sel.index
  row = map_cars.iloc[i]

  brands = row["top5"]
  if isinstance(brands, list):
      brands_text = "\n".join(f"{j+1}. {b}" for j, b in enumerate(brands[:5]))
  else:
      brands_text = brands

  sel.annotation.set_text(
    f"{row['city']}\n\nTop Car Brands:\n{brands_text}"
  )


plt.tight_layout()
plt.show(block=False)

In [None]:
map2 = (map_cars.dissolve(by = "city", aggfunc = {"sale_price": "mean"}).reset_index())
map2["sale_price"] = map2["sale_price"].round(2)
map2.head()

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

india.plot(ax=ax, color='lightblue', edgecolor='gray')
points = map2.plot(ax=ax, markersize=40)

for x, y, label in zip(map2.geometry.x, map2.geometry.y, map2["city"]):
  ax.text(x + 0.3, y + 0.3, label, fontsize=7)

cursor = mplcursors.cursor(points, hover=True)
cursor.annotation_kwargs["animated"] = False

ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)

for c in ax.collections:
  if c is not points:
    c.set_picker(False)

@cursor.connect("add")
def on_add(sel):
  i = sel.index
  row = map2.iloc[i]

  price = row["sale_price"]
  sel.annotation.set_text(f"{row['city']}\n\nAverage Sale Price (rupees):\n{price}")

plt.tight_layout()
plt.show(block=False)

In [None]:
map3 = map_cars.groupby(["city", "fuel_type"]).size().reset_index(name = "count")
map3 = map3.sort_values(["city", "count"], ascending=True).groupby("city").first().reset_index()
map3["geometry"] = map2["geometry"]
map3 = map3.drop(columns="count")
map3 = gpd.GeoDataFrame(map3, geometry=map3["geometry"], crs="EPSG:4326")

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

india.plot(ax=ax, color='lightblue', edgecolor='gray')
points = map3.plot(ax=ax, markersize=40)

for point, label in zip(map3.geometry, map3["city"]):
  ax.text(point.x + 0.3, point.y + 0.3, label, fontsize=7)

cursor = mplcursors.cursor(points, hover=True)
cursor.annotation_kwargs["animated"] = False

ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)

for c in ax.collections:
  if c is not points:
    c.set_picker(False)

@cursor.connect("add")
def on_add(sel):
  i = sel.index
  row = map3.iloc[i]

  fuel = row["fuel_type"]
  sel.annotation.set_text(f"{row['city']}\n\nMost Popular Fuel Type:\n{fuel}")

plt.tight_layout()
plt.show(block=False)