## 🧠 Analyzing Urban Mobility Patterns - EDA

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('data/urban_mobility_data.csv')
df.head()

In [None]:
print("Shape of dataset:", df.shape)
df.info()
df.describe()

In [None]:
missing = df.isnull().sum()
print("Missing Values:\n", missing)

In [None]:
plt.figure(figsize=(8, 4))
sns.histplot(df['trip_distance_km'], kde=True, color='skyblue')
plt.title("Distribution of Trip Distance (km)")
plt.xlabel("Distance (km)")
plt.ylabel("Frequency")
plt.show()

In [None]:
plt.figure(figsize=(8, 4))
sns.histplot(df['fare_amount'], kde=True, color='orange')
plt.title("Distribution of Fare Amount")
plt.xlabel("Fare (₹)")
plt.ylabel("Frequency")
plt.show()

In [None]:
sns.countplot(x='vehicle_type', data=df, palette='Set2')
plt.title("Count of Vehicle Types")
plt.show()

In [None]:
sns.countplot(x='passengers', data=df, palette='Set1')
plt.title("Passenger Count Distribution")
plt.show()

In [None]:
sns.scatterplot(x='trip_distance_km', y='fare_amount', hue='vehicle_type', data=df)
plt.title("Fare vs. Trip Distance by Vehicle Type")
plt.show()

In [None]:
sns.boxplot(x='vehicle_type', y='fare_amount', data=df)
plt.title("Fare Distribution by Vehicle Type")
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Correlation Heatmap")
plt.show()

### 📝 Key Observations
- Cabs typically have higher fares for similar distances.
- Autos and Bikes are used for shorter trips.
- Passenger count is mostly between 1 and 3.