# Tel Aviv Junctions - Data Exploration

This notebook explores the panel dataset of Tel Aviv junctions with temporal features.

## Overview

- Load panel dataset (junction × year)
- Visualize temporal feature changes
- Explore accident patterns
- Analyze infrastructure changes over time


In [1]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

from tel_aviv_junctions.panel import (
    build_full_pipeline,
    load_panel_dataset,
    get_junctions_for_year,
    get_junction_history,
    compute_temporal_changes,
)
from tel_aviv_junctions.config import YEARS, OUTPUT_DIR

# Set up plotting
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
%matplotlib inline


ModuleNotFoundError: No module named 'geopandas'

## Load Panel Dataset

Load the panel dataset. If it doesn't exist, we can build it using the full pipeline.


In [None]:
# Try to load existing panel dataset
panel_path = Path(OUTPUT_DIR) / "tel_aviv_junctions_panel.csv"

if panel_path.exists():
    print(f"Loading existing panel dataset from {panel_path}")
    df = load_panel_dataset(str(panel_path))
else:
    print("Panel dataset not found. Building it now...")
    print("(This will take a while - queries ohsome API for historical data)")
    
    # Uncomment to build the dataset:
    # df = build_full_pipeline(
    #     accidents_csv=None,  # Add path to accidents CSV if available
    #     use_cache=True,
    # )
    print("Please run build_full_pipeline() or provide path to existing panel CSV")
    df = None

if df is not None:
    print(f"\nDataset shape: {df.shape}")
    print(f"Columns: {list(df.columns)}")
    print(f"\nFirst few rows:")
    display(df.head())


## Basic Statistics


In [None]:
if df is not None:
    print("Dataset Info:")
    print(f"  Total rows: {len(df):,}")
    print(f"  Unique junctions: {df['junction_id'].nunique():,}")
    print(f"  Years: {sorted(df['year'].unique())}")
    print(f"  Years per junction: {len(df) / df['junction_id'].nunique():.1f}")
    
    print("\nStatic Features (should be constant per junction):")
    static_cols = ['latitude', 'longitude', 'degree', 'node_count']
    for col in static_cols:
        if col in df.columns:
            n_unique_per_junction = df.groupby('junction_id')[col].nunique()
            if (n_unique_per_junction == 1).all():
                print(f"  ✓ {col}: constant per junction")
            else:
                print(f"  ✗ {col}: varies! {n_unique_per_junction.max()} unique values")
    
    print("\nTime-Varying Features:")
    time_varying = ['has_cycleway', 'has_traffic_signal', 'max_speed', 'total_lanes']
    for col in time_varying:
        if col in df.columns:
            n_unique_per_junction = df.groupby('junction_id')[col].nunique()
            changed = (n_unique_per_junction > 1).sum()
            print(f"  {col}: {changed} junctions changed ({100*changed/df['junction_id'].nunique():.1f}%)")


## Temporal Feature Changes

Visualize how infrastructure features changed over time.


In [None]:
if df is not None:
    # Aggregate features by year
    yearly_stats = df.groupby('year').agg({
        'has_cycleway': 'sum',
        'has_traffic_signal': 'sum',
        'has_crossing': 'sum',
        'max_speed': 'mean',
        'total_lanes': 'mean',
    }).reset_index()
    
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # Cycleway expansion
    axes[0, 0].plot(yearly_stats['year'], yearly_stats['has_cycleway'], marker='o', linewidth=2)
    axes[0, 0].set_title('Junctions with Cycleways Over Time')
    axes[0, 0].set_xlabel('Year')
    axes[0, 0].set_ylabel('Count')
    axes[0, 0].grid(True, alpha=0.3)
    
    # Traffic signals
    axes[0, 1].plot(yearly_stats['year'], yearly_stats['has_traffic_signal'], marker='o', linewidth=2, color='orange')
    axes[0, 1].set_title('Junctions with Traffic Signals Over Time')
    axes[0, 1].set_xlabel('Year')
    axes[0, 1].set_ylabel('Count')
    axes[0, 1].grid(True, alpha=0.3)
    
    # Average speed
    axes[1, 0].plot(yearly_stats['year'], yearly_stats['max_speed'], marker='o', linewidth=2, color='green')
    axes[1, 0].set_title('Average Max Speed Over Time')
    axes[1, 0].set_xlabel('Year')
    axes[1, 0].set_ylabel('Speed (km/h)')
    axes[1, 0].grid(True, alpha=0.3)
    
    # Average lanes
    axes[1, 1].plot(yearly_stats['year'], yearly_stats['total_lanes'], marker='o', linewidth=2, color='red')
    axes[1, 1].set_title('Average Total Lanes Over Time')
    axes[1, 1].set_xlabel('Year')
    axes[1, 1].set_ylabel('Lanes')
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()


## Infrastructure Changes Analysis

Identify which junctions had infrastructure improvements.


In [None]:
if df is not None:
    changes = compute_temporal_changes(df)
    
    print("Junctions with Infrastructure Changes:")
    print(f"  Total junctions: {len(changes)}")
    
    if 'has_cycleway_changed' in changes.columns:
        cycleway_added = changes['has_cycleway_changed'].sum()
        print(f"  Cycleways added: {cycleway_added} junctions")
        
        if 'has_cycleway_added_year' in changes.columns:
            cycleway_years = changes[changes['has_cycleway_added_year'].notna()]['has_cycleway_added_year']
            if len(cycleway_years) > 0:
                print(f"  Cycleway addition years: {sorted(cycleway_years.unique())}")
    
    display(changes.head(10))


## Accident Analysis (if available)

If accident data was joined, explore accident patterns.


In [None]:
if df is not None and 'accident_count' in df.columns:
    print("Accident Statistics:")
    print(f"  Total accidents: {df['accident_count'].sum():,}")
    print(f"  Junction-years with accidents: {(df['accident_count'] > 0).sum():,}")
    print(f"  Percentage with accidents: {100*(df['accident_count'] > 0).sum()/len(df):.1f}%")
    
    # Accidents by year
    accidents_by_year = df.groupby('year')['accident_count'].sum()
    print("\nAccidents by Year:")
    for year, count in accidents_by_year.items():
        print(f"  {int(year)}: {count}")
    
    # Accidents vs infrastructure
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Accidents vs cycleway
    cycleway_accidents = df.groupby('has_cycleway')['accident_count'].agg(['mean', 'sum'])
    axes[0].bar(['No Cycleway', 'Has Cycleway'], cycleway_accidents['mean'], color=['red', 'green'])
    axes[0].set_title('Average Accidents per Junction-Year')
    axes[0].set_ylabel('Accidents')
    axes[0].grid(True, alpha=0.3, axis='y')
    
    # Accidents vs traffic signal
    signal_accidents = df.groupby('has_traffic_signal')['accident_count'].agg(['mean', 'sum'])
    axes[1].bar(['No Signal', 'Has Signal'], signal_accidents['mean'], color=['orange', 'blue'])
    axes[1].set_title('Average Accidents per Junction-Year')
    axes[1].set_ylabel('Accidents')
    axes[1].grid(True, alpha=0.3, axis='y')
    
    plt.tight_layout()
    plt.show()
else:
    print("No accident data available. Join accidents using join_accidents_temporal()")


## Example: Single Junction History

Look at how a specific junction's features changed over time.


In [None]:
if df is not None:
    # Pick a junction that had changes
    if 'has_cycleway_changed' in changes.columns:
        changed_junction = changes[changes['has_cycleway_changed']].iloc[0]['junction_id']
    else:
        changed_junction = df['junction_id'].iloc[0]
    
    junction_history = get_junction_history(df, changed_junction)
    
    print(f"History for Junction {changed_junction}:")
    display(junction_history[['year', 'has_cycleway', 'has_traffic_signal', 'max_speed', 'accident_count']])
