# Trip Assignment Analysis - Node-Level Overview

This notebook analyzes the trip assignment data from the last episode of the multi-agent ride-hailing simulation.
We'll examine how passengers are assigned to each agent based on prices, utilities, and probabilities for each origin node.

## 1. Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set plotting style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

print("Libraries imported successfully")

## 2. Load Episode Data

In [None]:
# Configuration - UPDATE THESE VALUES BASED ON YOUR EXPERIMENT
city = 'san_francisco'  # Change this to your city
mode = 2  # Change this to your mode (0, 1, or 2)
fix_agent = 2  # Change this to your fix_agent setting (0, 1, or 2)

# Construct file path
data_dir = Path('saved_files/trip_data')

# Try to load experiment data first, fall back to test data
filename = f'trip_assignments_{city}_mode{mode}_fixagent{fix_agent}_last_episode.csv'
filepath = data_dir / filename

if not filepath.exists():
    print(f"Experiment data not found: {filepath}")
    print("Using test data instead...")
    filepath = data_dir / 'test_trip_assignments.csv'

# Load data
if filepath.exists():
    df = pd.read_csv(filepath)
    print(f"✓ Loaded {len(df)} trip records from {filepath.name}")
    print(f"\nDataset shape: {df.shape}")
    print(f"Columns: {list(df.columns)}")
else:
    print(f"❌ File not found: {filepath}")
    print("Please run the training script with --test flag to generate trip data")
    df = None

## 3. Data Overview

In [None]:
# Display first few rows
print("First 10 rows of trip data:")
display(df.head(10))

# Basic statistics
print("\nBasic statistics:")
display(df.describe())

## 4. Node-Level Analysis

Aggregate data by origin node to understand demand patterns and agent assignments.

In [None]:
# Group by origin node and aggregate metrics
node_summary = df.groupby('origin').agg({
    'total_demand': 'sum',
    'demand_agent0': 'sum',
    'demand_agent1': 'sum',
    'demand_rejected': 'sum',
    'price_agent0': 'mean',
    'price_agent1': 'mean',
    'travel_time': 'mean',
    'utility_agent0': 'mean',
    'utility_agent1': 'mean',
    'utility_reject': 'mean',
    'prob_agent0': 'mean',
    'prob_agent1': 'mean',
    'prob_reject': 'mean'
}).reset_index()

# Calculate additional metrics
node_summary['agent0_share'] = node_summary['demand_agent0'] / node_summary['total_demand']
node_summary['agent1_share'] = node_summary['demand_agent1'] / node_summary['total_demand']
node_summary['rejection_rate'] = node_summary['demand_rejected'] / node_summary['total_demand']
node_summary['price_difference'] = node_summary['price_agent0'] - node_summary['price_agent1']

# Sort by total demand
node_summary = node_summary.sort_values('total_demand', ascending=False)

print(f"\nNode-level summary for {len(node_summary)} origin nodes:")
display(node_summary)

## 5. Overall System Metrics

In [None]:
# Calculate overall metrics
total_demand = df['total_demand'].sum()
total_served_agent0 = df['demand_agent0'].sum()
total_served_agent1 = df['demand_agent1'].sum()
total_rejected = df['demand_rejected'].sum()

print("=" * 80)
print("OVERALL SYSTEM METRICS")
print("=" * 80)
print(f"\nTotal Demand: {total_demand:.0f}")
print(f"  Agent 0 served: {total_served_agent0:.0f} ({total_served_agent0/total_demand*100:.1f}%)")
print(f"  Agent 1 served: {total_served_agent1:.0f} ({total_served_agent1/total_demand*100:.1f}%)")
print(f"  Rejected: {total_rejected:.0f} ({total_rejected/total_demand*100:.1f}%)")

print(f"\nAverage Prices:")
print(f"  Agent 0: ${node_summary['price_agent0'].mean():.2f}")
print(f"  Agent 1: ${node_summary['price_agent1'].mean():.2f}")

print(f"\nAverage Travel Time: {node_summary['travel_time'].mean():.1f} minutes")

print(f"\nAverage Choice Probabilities:")
print(f"  Agent 0: {node_summary['prob_agent0'].mean():.3f}")
print(f"  Agent 1: {node_summary['prob_agent1'].mean():.3f}")
print(f"  Reject: {node_summary['prob_reject'].mean():.3f}")

## 6. Visualizations: Demand Distribution by Node

In [None]:
# Plot demand distribution by node
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Total demand by node
top_nodes = node_summary.head(15)  # Show top 15 nodes
ax1.bar(top_nodes['origin'].astype(str), top_nodes['total_demand'])
ax1.set_xlabel('Origin Node')
ax1.set_ylabel('Total Demand')
ax1.set_title('Total Demand by Origin Node (Top 15)')
ax1.tick_params(axis='x', rotation=45)

# Stacked bar chart showing agent assignment
ax2.bar(top_nodes['origin'].astype(str), top_nodes['demand_agent0'], label='Agent 0')
ax2.bar(top_nodes['origin'].astype(str), top_nodes['demand_agent1'], 
        bottom=top_nodes['demand_agent0'], label='Agent 1')
ax2.bar(top_nodes['origin'].astype(str), top_nodes['demand_rejected'], 
        bottom=top_nodes['demand_agent0'] + top_nodes['demand_agent1'], label='Rejected')
ax2.set_xlabel('Origin Node')
ax2.set_ylabel('Demand')
ax2.set_title('Demand Assignment by Agent (Top 15 Nodes)')
ax2.tick_params(axis='x', rotation=45)
ax2.legend()

plt.tight_layout()
plt.show()

## 7. Visualizations: Price and Utility Comparison

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Price comparison
x = np.arange(len(top_nodes))
width = 0.35
ax1.bar(x - width/2, top_nodes['price_agent0'], width, label='Agent 0')
ax1.bar(x + width/2, top_nodes['price_agent1'], width, label='Agent 1')
ax1.set_xlabel('Origin Node')
ax1.set_ylabel('Average Price ($)')
ax1.set_title('Average Prices by Agent (Top 15 Nodes)')
ax1.set_xticks(x)
ax1.set_xticklabels(top_nodes['origin'].astype(str), rotation=45)
ax1.legend()

# Utility comparison
ax2.bar(x - width, top_nodes['utility_agent0'], width, label='Agent 0')
ax2.bar(x, top_nodes['utility_agent1'], width, label='Agent 1')
ax2.bar(x + width, top_nodes['utility_reject'], width, label='Reject')
ax2.set_xlabel('Origin Node')
ax2.set_ylabel('Average Utility')
ax2.set_title('Average Utilities by Choice (Top 15 Nodes)')
ax2.set_xticks(x)
ax2.set_xticklabels(top_nodes['origin'].astype(str), rotation=45)
ax2.legend()

plt.tight_layout()
plt.show()

## 8. Visualizations: Choice Probabilities

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Probability comparison
x = np.arange(len(top_nodes))
width = 0.25
ax1.bar(x - width, top_nodes['prob_agent0'], width, label='Agent 0')
ax1.bar(x, top_nodes['prob_agent1'], width, label='Agent 1')
ax1.bar(x + width, top_nodes['prob_reject'], width, label='Reject')
ax1.set_xlabel('Origin Node')
ax1.set_ylabel('Average Probability')
ax1.set_title('Average Choice Probabilities (Top 15 Nodes)')
ax1.set_xticks(x)
ax1.set_xticklabels(top_nodes['origin'].astype(str), rotation=45)
ax1.legend()

# Market share pie chart
total_shares = [total_served_agent0, total_served_agent1, total_rejected]
labels = ['Agent 0', 'Agent 1', 'Rejected']
colors = ['#ff9999', '#66b3ff', '#99ff99']
ax2.pie(total_shares, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
ax2.set_title('Overall Market Share')

plt.tight_layout()
plt.show()

## 9. Detailed Node Analysis

Select specific nodes for detailed analysis.

In [None]:
def analyze_node(node_id, df):
    """Analyze a specific node in detail"""
    node_data = df[df['origin'] == node_id]
    
    print("="*80)
    print(f"DETAILED ANALYSIS FOR NODE {node_id}")
    print("="*80)
    
    # Summary statistics
    print(f"\nTotal demand: {node_data['total_demand'].sum():.0f}")
    print(f"  Agent 0 served: {node_data['demand_agent0'].sum():.0f}")
    print(f"  Agent 1 served: {node_data['demand_agent1'].sum():.0f}")
    print(f"  Rejected: {node_data['demand_rejected'].sum():.0f}")
    
    print(f"\nAverage prices:")
    print(f"  Agent 0: ${node_data['price_agent0'].mean():.2f} (std: ${node_data['price_agent0'].std():.2f})")
    print(f"  Agent 1: ${node_data['price_agent1'].mean():.2f} (std: ${node_data['price_agent1'].std():.2f})")
    
    print(f"\nAverage utilities:")
    print(f"  Agent 0: {node_data['utility_agent0'].mean():.2f}")
    print(f"  Agent 1: {node_data['utility_agent1'].mean():.2f}")
    print(f"  Reject: {node_data['utility_reject'].mean():.2f}")
    
    print(f"\nAverage choice probabilities:")
    print(f"  Agent 0: {node_data['prob_agent0'].mean():.3f}")
    print(f"  Agent 1: {node_data['prob_agent1'].mean():.3f}")
    print(f"  Reject: {node_data['prob_reject'].mean():.3f}")
    
    print(f"\nAverage travel time: {node_data['travel_time'].mean():.1f} minutes")
    print(f"Number of O-D pairs: {len(node_data)}")
    
    # Visualizations
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # Price distribution
    axes[0, 0].hist([node_data['price_agent0'], node_data['price_agent1']], 
                     label=['Agent 0', 'Agent 1'], bins=15, alpha=0.7)
    axes[0, 0].set_xlabel('Price ($)')
    axes[0, 0].set_ylabel('Frequency')
    axes[0, 0].set_title(f'Price Distribution - Node {node_id}')
    axes[0, 0].legend()
    
    # Demand by destination
    dest_demand = node_data.groupby('destination')['total_demand'].sum().sort_values(ascending=False).head(10)
    axes[0, 1].bar(dest_demand.index.astype(str), dest_demand.values)
    axes[0, 1].set_xlabel('Destination Node')
    axes[0, 1].set_ylabel('Total Demand')
    axes[0, 1].set_title(f'Top 10 Destinations from Node {node_id}')
    axes[0, 1].tick_params(axis='x', rotation=45)
    
    # Price vs Travel Time scatter
    axes[1, 0].scatter(node_data['travel_time'], node_data['price_agent0'], 
                       alpha=0.5, label='Agent 0')
    axes[1, 0].scatter(node_data['travel_time'], node_data['price_agent1'], 
                       alpha=0.5, label='Agent 1')
    axes[1, 0].set_xlabel('Travel Time (minutes)')
    axes[1, 0].set_ylabel('Price ($)')
    axes[1, 0].set_title(f'Price vs Travel Time - Node {node_id}')
    axes[1, 0].legend()
    
    # Choice probability distribution
    prob_data = [node_data['prob_agent0'].values, 
                 node_data['prob_agent1'].values, 
                 node_data['prob_reject'].values]
    axes[1, 1].boxplot(prob_data, labels=['Agent 0', 'Agent 1', 'Reject'])
    axes[1, 1].set_ylabel('Probability')
    axes[1, 1].set_title(f'Choice Probability Distribution - Node {node_id}')
    
    plt.tight_layout()
    plt.show()

# Analyze the top 3 nodes by demand
top_3_nodes = node_summary.head(3)['origin'].values
print(f"\nAnalyzing top 3 nodes by demand: {top_3_nodes}\n")

for node in top_3_nodes:
    analyze_node(node, df)

## 10. Correlation Analysis

In [None]:
# Select numerical columns for correlation
corr_columns = ['travel_time', 'price_agent0', 'price_agent1', 
                'utility_agent0', 'utility_agent1', 'utility_reject',
                'prob_agent0', 'prob_agent1', 'prob_reject',
                'total_demand']

# Calculate correlation matrix
corr_matrix = df[corr_columns].corr()

# Plot heatmap
plt.figure(figsize=(12, 10))
sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm', center=0,
            square=True, linewidths=1, cbar_kws={"shrink": 0.8})
plt.title('Correlation Matrix of Trip Assignment Variables')
plt.tight_layout()
plt.show()

# Key insights
print("\nKey Correlations:")
print(f"Price Agent 0 vs Utility Agent 0: {corr_matrix.loc['price_agent0', 'utility_agent0']:.3f}")
print(f"Price Agent 1 vs Utility Agent 1: {corr_matrix.loc['price_agent1', 'utility_agent1']:.3f}")
print(f"Utility Agent 0 vs Prob Agent 0: {corr_matrix.loc['utility_agent0', 'prob_agent0']:.3f}")
print(f"Utility Agent 1 vs Prob Agent 1: {corr_matrix.loc['utility_agent1', 'prob_agent1']:.3f}")
print(f"Travel Time vs Price Agent 0: {corr_matrix.loc['travel_time', 'price_agent0']:.3f}")
print(f"Travel Time vs Price Agent 1: {corr_matrix.loc['travel_time', 'price_agent1']:.3f}")

## 11. Summary Report

In [None]:
# Generate comprehensive summary
print("="*80)
print("TRIP ASSIGNMENT ANALYSIS SUMMARY")
print("="*80)

print(f"\nDataset Information:")
print(f"  City: {city}")
print(f"  Mode: {mode}")
print(f"  Fixed Agent: {fix_agent}")
print(f"  Total trip records: {len(df)}")
print(f"  Number of origin nodes: {df['origin'].nunique()}")
print(f"  Number of destination nodes: {df['destination'].nunique()}")
print(f"  Time steps covered: {df['time'].min()} to {df['time'].max()}")

print(f"\nDemand Summary:")
print(f"  Total demand: {total_demand:.0f}")
print(f"  Agent 0 market share: {total_served_agent0/total_demand*100:.1f}%")
print(f"  Agent 1 market share: {total_served_agent1/total_demand*100:.1f}%")
print(f"  Rejection rate: {total_rejected/total_demand*100:.1f}%")

print(f"\nPrice Summary:")
print(f"  Agent 0 - Mean: ${df['price_agent0'].mean():.2f}, Std: ${df['price_agent0'].std():.2f}")
print(f"  Agent 1 - Mean: ${df['price_agent1'].mean():.2f}, Std: ${df['price_agent1'].std():.2f}")
print(f"  Average price difference: ${df['price_agent0'].mean() - df['price_agent1'].mean():.2f}")

print(f"\nUtility Summary:")
print(f"  Agent 0 - Mean: {df['utility_agent0'].mean():.2f}")
print(f"  Agent 1 - Mean: {df['utility_agent1'].mean():.2f}")
print(f"  Reject - Mean: {df['utility_reject'].mean():.2f}")

print(f"\nChoice Probability Summary:")
print(f"  Agent 0 - Mean: {df['prob_agent0'].mean():.3f}")
print(f"  Agent 1 - Mean: {df['prob_agent1'].mean():.3f}")
print(f"  Reject - Mean: {df['prob_reject'].mean():.3f}")

print(f"\nTravel Time Summary:")
print(f"  Mean: {df['travel_time'].mean():.1f} minutes")
print(f"  Std: {df['travel_time'].std():.1f} minutes")
print(f"  Min: {df['travel_time'].min():.1f} minutes")
print(f"  Max: {df['travel_time'].max():.1f} minutes")

print(f"\nTop 5 Nodes by Demand:")
for idx, row in node_summary.head(5).iterrows():
    print(f"  Node {int(row['origin'])}: {row['total_demand']:.0f} trips "
          f"(Agent 0: {row['agent0_share']*100:.1f}%, "
          f"Agent 1: {row['agent1_share']*100:.1f}%, "
          f"Rejected: {row['rejection_rate']*100:.1f}%)")

print("\n" + "="*80)
print("Analysis complete!")