# Calgary On-Street Parking Zones: Data Cleaning, EDA, and Storytelling

In [None]:
import pandas as pd

# Load the dataset
url = "https://data.calgary.ca/api/views/45az-7kh9/rows.csv?accessType=DOWNLOAD"
df = pd.read_csv(url)

# Display basic information
print("Dataset Shape:", df.shape)
print("Column Names:", df.columns.tolist())
df.head()


## Data Cleaning

In [None]:
# Check for missing values
missing_values = df.isnull().sum()
print("Missing Values:\n", missing_values)

# Drop rows with missing 'PRICE_ZONE' or 'HOURLY_RATE'
df_clean = df.dropna(subset=['PRICE_ZONE', 'HOURLY_RATE'])

# Convert 'HOURLY_RATE' to numeric
df_clean['HOURLY_RATE'] = pd.to_numeric(df_clean['HOURLY_RATE'], errors='coerce')

# Drop rows with invalid 'HOURLY_RATE'
df_clean = df_clean.dropna(subset=['HOURLY_RATE'])

# Reset index
df_clean.reset_index(drop=True, inplace=True)
df_clean.head()


## Exploratory Data Analysis

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Distribution of Hourly Rates
plt.figure(figsize=(10,6))
sns.histplot(df_clean['HOURLY_RATE'], bins=20, kde=True)
plt.title('Distribution of Hourly Parking Rates')
plt.xlabel('Hourly Rate ($)')
plt.ylabel('Frequency')
plt.show()


In [None]:
# Average Hourly Rate by Price Zone
avg_rate_by_zone = df_clean.groupby('PRICE_ZONE')['HOURLY_RATE'].mean().sort_values(ascending=False)

plt.figure(figsize=(12,6))
sns.barplot(x=avg_rate_by_zone.index, y=avg_rate_by_zone.values)
plt.title('Average Hourly Rate by Price Zone')
plt.xlabel('Price Zone')
plt.ylabel('Average Hourly Rate ($)')
plt.xticks(rotation=45)
plt.show()


## Geospatial Visualization

In [None]:
import folium

# Create a base map centered around Calgary
calgary_map = folium.Map(location=[51.0447, -114.0719], zoom_start=12)

# Add parking zones to the map
for idx, row in df_clean.iterrows():
    if pd.notnull(row.get('LATITUDE')) and pd.notnull(row.get('LONGITUDE')):
        folium.CircleMarker(
            location=[row['LATITUDE'], row['LONGITUDE']],
            radius=5,
            popup=f"Zone: {row['PRICE_ZONE']}, Rate: ${row['HOURLY_RATE']}",
            color='blue',
            fill=True,
            fill_color='blue'
        ).add_to(calgary_map)

calgary_map


## Storytelling Insights

- **Rate Distribution**: Most hourly parking rates fall within a typical range, indicating pricing consistency.
- **High-Rate Zones**: Some zones have higher average rates, likely reflecting downtown or high-demand areas.
- **Geospatial Patterns**: Mapping shows high-rate clusters in central Calgary, highlighting the spatial logic of pricing.
