# Bus Location Data API Exploration

This notebook explores the bus location data using the MOT SIRI API directly.

In [None]:
import pandas as pd
import datetime
from dateutil import tz
import requests
import plotly.express as px
import plotly.graph_objects as go
import json

# Configure pandas display options
pd.options.display.max_columns = 1000
pd.options.display.max_colwidth = 1000
pd.set_option('display.unicode.east_asian_width', True)

# API Configuration
SIRI_BASE_URL = "https://gtfs.mot.gov.il/api/v1"

## Helper Functions

In [None]:
def localize_dates(data, dt_columns=None):
    """Convert datetime columns to Israel timezone"""
    if dt_columns is None:
        dt_columns = []
    
    data = data.copy()
    for c in dt_columns:
        data[c] = pd.to_datetime(data[c]).dt.tz_convert('Israel')
    
    return data

def get_routes(date=None):
    """Get routes from MOT API"""
    if date is None:
        date = datetime.datetime.now().strftime('%Y-%m-%d')
    
    response = requests.get(f"{SIRI_BASE_URL}/gtfs/routes")
    if response.status_code == 200:
        return pd.DataFrame(response.json())
    else:
        print(f"Error fetching routes: {response.status_code}")
        return pd.DataFrame()

def get_vehicle_locations(route_id=None, limit=1000):
    """Get real-time vehicle locations from SIRI API"""
    params = {
        'route_id': route_id,
        'limit': limit
    }
    
    response = requests.get(f"{SIRI_BASE_URL}/siri/vehicle-locations", params=params)
    if response.status_code == 200:
        return pd.DataFrame(response.json().get('locations', []))
    else:
        print(f"Error fetching vehicle locations: {response.status_code}")
        return pd.DataFrame()

## 1. Exploring Bus Routes

Let's first look at the available bus routes data

In [None]:
# Get routes
routes = get_routes()

print(f"Total number of routes: {len(routes)}")
if not routes.empty:
    print("\nUnique agencies:")
    print(routes['agency_name'].value_counts())

### 1.1 Analyzing Route Details

In [None]:
# Display key information about routes
if not routes.empty:
    route_summary = routes[['route_short_name', 'route_long_name', 'agency_name']].head(10)
    display(route_summary)

## 2. Vehicle Location Analysis

Now let's explore real-time vehicle location data

In [None]:
# Get vehicle locations for a specific route
vehicle_locations = get_vehicle_locations(route_id='15531')  # Example route ID

if not vehicle_locations.empty:
    # Localize datetime columns if they exist
    dt_columns = [col for col in vehicle_locations.columns if 'time' in col.lower()]
    vehicle_locations = localize_dates(vehicle_locations, dt_columns)

    print(f"Number of location records: {len(vehicle_locations)}")
    display(vehicle_locations.head())

### 2.1 Visualizing Vehicle Locations

In [None]:
# Create a scatter map of vehicle locations
if not vehicle_locations.empty and 'latitude' in vehicle_locations.columns:
    fig = px.scatter_mapbox(
        vehicle_locations,
        lat='latitude',
        lon='longitude',
        hover_data=['recorded_at_time', 'vehicle_ref'] if 'recorded_at_time' in vehicle_locations.columns else None,
        zoom=11
    )
    
    fig.update_layout(
        mapbox_style="open-street-map",
        margin={"r":0,"t":0,"l":0,"b":0}
    )
    
    fig.show()

## 3. Advanced Analysis

Let's analyze some patterns in the data

In [None]:
if not vehicle_locations.empty and 'recorded_at_time' in vehicle_locations.columns:
    # Time-based analysis
    vehicle_locations['hour'] = pd.to_datetime(vehicle_locations['recorded_at_time']).dt.hour
    hourly_counts = vehicle_locations['hour'].value_counts().sort_index()
    
    fig = px.bar(x=hourly_counts.index, y=hourly_counts.values,
                 labels={'x': 'Hour of Day', 'y': 'Number of Records'},
                 title='Distribution of Vehicle Location Records by Hour')
    fig.show()

## 4. API Endpoints Reference

Main endpoints used in this analysis:
- `/gtfs/routes`: Get information about bus routes
- `/siri/vehicle-locations`: Get real-time vehicle location data

Key parameters:
- `route_id`: Filter locations by specific route
- `limit`: Maximum number of records to return 