This notebook analyzes the service alerts data from the City of Cape Town, both planned and unplanned, to provide a summary of alerts by service area.

In [11]:
import json
import pandas as pd
from pathlib import Path
import glob

# Find the latest service alerts files
service_alerts_dir = Path('service-alerts-data')
planned_file = max(service_alerts_dir.glob('service_alerts_planned_*.json'))
unplanned_file = max(service_alerts_dir.glob('service_alerts_unplanned_*.json'))

# Read the JSON files
with open(planned_file) as f:
    planned_alerts = json.load(f)
    
with open(unplanned_file) as f:
    unplanned_alerts = json.load(f)

# Convert to DataFrames
planned_df = pd.DataFrame(planned_alerts)
unplanned_df = pd.DataFrame(unplanned_alerts)

# Add a column to identify the alert type
planned_df['alert_type'] = 'Planned'
unplanned_df['alert_type'] = 'Unplanned'

# Combine the dataframes
all_alerts = pd.concat([planned_df, unplanned_df], ignore_index=True)

# Count all alerts by planned / unplanned
print(all_alerts['planned'].value_counts())

print()

# Count all alerts by service_area
print(all_alerts['service_area'].value_counts())

print()

publish_date
# Count all alerts by date
print(all_alerts['publish_date'].value_counts())


planned
False    42
True      7
Name: count, dtype: int64

service_area
Water & Sanitation           19
Electricity                  18
Refuse                        9
Roads and Stormwater          2
Drivers Licence Enquiries     1
Name: count, dtype: int64



NameError: name 'publish_date' is not defined

Let's analyze the distribution of alerts across different service areas, split by planned vs unplanned:

In [None]:
# Create a pivot table
service_area_summary = pd.pivot_table(
    all_alerts,
    values='Id',
    index='service_area',
    columns='alert_type',
    aggfunc='count',
    fill_value=0
)

# Add a total column
service_area_summary['Total'] = service_area_summary.sum(axis=1)

# Sort by total number of alerts
service_area_summary = service_area_summary.sort_values('Total', ascending=False)

print("Service Alerts Summary:")
print("-" * 60)
print(service_area_summary)
print("\nTotal Alerts:", len(all_alerts))
print("Planned Alerts:", len(planned_df))
print("Unplanned Alerts:", len(unplanned_df))

Let's create a bar chart to visualize the distribution of alerts by service area: