This notebook shows vizuals of the data analyzed in the deficit_action_paln_notebook

In [3]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import json
import numpy as np

from IPython.display import display, HTML
import seaborn as sns

1. Equipment flow, conversion rate, unused action, regional and subregional amalysis

In [2]:
#visuals under  ↓ ↓ ↓

def load_data():
    """Load all required data files"""
    try:
        # Read JSON files
        with open(r"C:\Users\adeba\Desktop\hapag-lloyd\output\metrics_locode.json", 'r') as f:
            metrics_locode = json.load(f)
        
        # Read CSVs
        locations_df = pd.read_csv(r"C:\Users\adeba\Desktop\hapag-lloyd\output\locations_locode.csv") #replace with approprite file path
        flows_df = pd.read_csv(r"C:\Users\adeba\Desktop\hapag-lloyd\output\flows_locode.csv") #replace with approprite file path
        unused_actions_df = pd.read_csv(r"C:\Users\adeba\Desktop\hapag-lloyd\output\unused_actions_locode.csv") #replace with approprite file path
        conversion_rates_df = pd.read_csv(r"C:\Users\adeba\Desktop\hapag-lloyd\output\conversion_rates_locode.csv") #replace with approprite file path
        
        return metrics_locode, locations_df, flows_df, unused_actions_df, conversion_rates_df
    except Exception as e:
        print(f"Error loading data: {str(e)}")
        raise

def plot_equipment_flows(flows_df):
    """Create equipment flow analysis visualization at the region level"""
    try:
        # Aggregate flows by region
        region_flows = flows_df.groupby(
            ['region_from', 'region_to']
        )['volume_teu_proposal'].sum().reset_index()
        
        # Create Sankey diagram
        fig = go.Figure(data=[go.Sankey(
            node=dict(
                pad=15,
                thickness=20,
                line=dict(color="black", width=0.5),
                label=list(set(region_flows['region_from'].unique()) | 
                         set(region_flows['region_to'].unique())),
                color="blue"
            ),
            link=dict(
                source=[list(set(region_flows['region_from'].unique())).index(x) 
                       for x in region_flows['region_from']],
                target=[list(set(region_flows['region_to'].unique())).index(x) 
                       for x in region_flows['region_to']],
                value=region_flows['volume_teu_proposal'],
                color=region_flows['volume_teu_proposal'].map(lambda x: 'rgba(63, 81, 181, {})'.format(x/region_flows['volume_teu_proposal'].max()))
            )
        )])
        
        fig.update_layout(
            title="Aggregated Equipment Flows Between Regions",
            font_size=10,
            height=600
        )
        
        return fig
    except Exception as e:
        print(f"Error creating equipment flow analysis: {str(e)}")
        raise

def plot_top_flow_matrix(flows_df, level='subregion', top_n=20):
    """
    Create a heatmap visualization of top N flows at different geographic levels
    
    Parameters:
    flows_df: DataFrame with flow data
    level: str, one of 'subregion' or 'locode'
    top_n: int, number of top flows to show
    """
    try:
        # Select appropriate columns based on level
        from_col = f'{level}_from'
        to_col = f'{level}_to'
        
        # Aggregate flows and get top N pairs
        flow_matrix = flows_df.groupby(
            [from_col, to_col]
        )['volume_teu_proposal'].sum().reset_index()
        
        # Sort by volume and get top N pairs
        flow_matrix = flow_matrix.nlargest(top_n, 'volume_teu_proposal')
        
        # Get unique locations that appear in top N flows
        unique_origins = flow_matrix[from_col].unique()
        unique_destinations = flow_matrix[to_col].unique()
        
        # Create a complete pivot table with only the selected locations
        pivot_data = flow_matrix.pivot(
            index=from_col,
            columns=to_col,
            values='volume_teu_proposal'
        ).fillna(0)
        
        # Create heatmap
        fig = go.Figure(data=go.Heatmap(
            z=pivot_data.values,
            x=pivot_data.columns,
            y=pivot_data.index,
            colorscale='Blues',
            colorbar=dict(
                title='TEU Volume',
                tickformat=',.0f'
            ),
            hoverongaps=False,
            hovertemplate=
            f"From: %{{y}}<br>" +
            f"To: %{{x}}<br>" +
            "Volume: %{z:,.0f} TEU<br>" +
            "<extra></extra>"
        ))
        
        # Update layout
        fig.update_layout(
            title=f"Top {top_n} Equipment Flows - {level.capitalize()} Level",
            xaxis_title="Destination",
            yaxis_title="Origin",
            height=700,
            width=900,
            xaxis={'tickangle': 45},
            font=dict(size=10)
        )
        
        # Add value annotations for all flows since we're only showing top N
        for i in range(len(pivot_data.index)):
            for j in range(len(pivot_data.columns)):
                value = pivot_data.values[i, j]
                if value > 0:  # Only show non-zero flows
                    fig.add_annotation(
                        x=j,
                        y=i,
                        text=f"{value:,.0f}",
                        font=dict(
                            size=9,
                            color='black' if value < pivot_data.values.max() * 0.7 else 'white'
                        ),
                        showarrow=False
                    )
        
        # Add a text box with total flow information
        total_flow = flow_matrix['volume_teu_proposal'].sum()
        total_all_flow = flows_df['volume_teu_proposal'].sum()
        percentage = (total_flow / total_all_flow) * 100
        
        fig.add_annotation(
            xref="paper",
            yref="paper",
            x=1.0,
            y=1.1,
            text=f"Top {top_n} flows represent {percentage:.1f}% of total volume<br>" +
                 f"Total volume shown: {total_flow:,.0f} TEU",
            showarrow=False,
            font=dict(size=10),
            align="right"
        )
        
        return fig
    
    except Exception as e:
        print(f"Error creating flow matrix visualization: {str(e)}")
        raise

def create_flow_summary(flows_df, top_n=20):
    """
    Create a summary of top N flows with additional statistics
    """
    try:
        # Top flows by volume
        flow_summary = flows_df.groupby(
            ['subregion_from', 'subregion_to']
        ).agg({
            'volume_teu_proposal': ['sum', 'count'],
            'eq_type': lambda x: len(set(x))  # number of unique equipment types
        }).reset_index()
        
        # Clean up column names
        flow_summary.columns = ['Origin', 'Destination', 'Total_TEU', 'Number_of_Flows', 'Equipment_Types']
        flow_summary = flow_summary.sort_values('Total_TEU', ascending=False).head(top_n)
        
        # Create summary visualization
        fig = make_subplots(
            rows=1, cols=2,
            specs=[[{"type": "table"}, {"type": "bar"}]],
            subplot_titles=("Top Flow Pairs", "Flow Volumes")
        )
        
        # Top pairs table
        fig.add_trace(
            go.Table(
                header=dict(
                    values=list(flow_summary.columns),
                    fill_color='paleturquoise',
                    align='left',
                    font=dict(size=12)
                ),
                cells=dict(
                    values=[flow_summary[col] for col in flow_summary.columns],
                    fill_color='lavender',
                    align=['left', 'left', 'right', 'right', 'right'],
                    format=[None, None, ",.0f", ",d", ",d"],
                    font=dict(size=11)
                )
            ),
            row=1, col=1
        )
        
        # Flow volumes bar chart
        fig.add_trace(
            go.Bar(
                x=list(range(len(flow_summary))),
                y=flow_summary['Total_TEU'],
                text=flow_summary.apply(lambda x: f"{x['Origin']} → {x['Destination']}", axis=1),
                hovertemplate="<br>".join([
                    "Origin: %{text}",
                    "Volume: %{y:,.0f} TEU",
                    "<extra></extra>"
                ])
            ),
            row=1, col=2
        )
        
        # Update layout
        fig.update_layout(
            height=600,
            width=1200,
            showlegend=False,
            title_text=f"Top {top_n} Equipment Flows Summary"
        )
        
        # Update bar chart axis
        fig.update_xaxes(showticklabels=False, title_text="", row=1, col=2)
        fig.update_yaxes(title_text="TEU Volume", row=1, col=2)
        
        return fig
    
    except Exception as e:
        print(f"Error creating flow summary: {str(e)}")
        raise



def create_surplus_deficit_dashboard(locations_df):
    """
    Create a comprehensive dashboard for surplus/deficit analysis
    """
    try:
        # Create subplot layout
        fig = make_subplots(
            rows=2, cols=2,
            specs=[
                [{"type": "choropleth"}, {"type": "bar"}],
                [{"type": "table"}, {"type": "bar"}]
            ],
            subplot_titles=(
                "Container Surplus/Deficit by Region",
                "Top 10 Regions by Absolute Net Flow",
                "Detailed Regional Balance",
                "Regional Flow Comparison"
            )
        )

        # 1. Choropleth map
        fig.add_trace(
            go.Choropleth(
                locations=locations_df['region'],
                z=locations_df['net_flow'],
                colorscale='RdBu',
                colorbar_title="Net Flow (TEU)",
                zmin=-locations_df['net_flow'].abs().max(),  # Symmetric scale
                zmax=locations_df['net_flow'].abs().max(),
                hovertemplate="<br>".join([
                    "Region: %{location}",
                    "Net Flow: %{z:,.0f} TEU",
                    "<extra></extra>"
                ])
            ),
            row=1, col=1
        )

        # 2. Top regions by absolute net flow
        top_regions = locations_df.groupby('region').agg({
            'net_flow': 'sum'
        }).reset_index()
        top_regions['abs_net_flow'] = abs(top_regions['net_flow'])
        top_regions = top_regions.nlargest(10, 'abs_net_flow')

        fig.add_trace(
            go.Bar(
                x=top_regions['region'],
                y=top_regions['net_flow'],
                marker_color=top_regions['net_flow'].map(
                    lambda x: 'red' if x < 0 else 'blue'
                ),
                hovertemplate="<br>".join([
                    "Region: %{x}",
                    "Net Flow: %{y:,.0f} TEU",
                    "<extra></extra>"
                ])
            ),
            row=1, col=2
        )
        fig.update_layout(font=dict(size=12), margin=dict(t=100, b=50, l=100, r=50))

        # 3. Detailed table
        region_summary = locations_df.groupby('region').agg({
            'outflow': 'sum',
            'inflow': 'sum',
            'net_flow': 'sum'
        }).reset_index()
        region_summary['turnover'] = region_summary['outflow'] + region_summary['inflow']
        region_summary = region_summary.sort_values('turnover', ascending=False)

        fig.add_trace(
            go.Table(
                header=dict(
                    values=['Region', 'Outflow', 'Inflow', 'Net Flow', 'Total Turnover'],
                    fill_color='paleturquoise',
                    align='left'
                ),
                cells=dict(
                    values=[
                        region_summary['region'],
                        region_summary['outflow'].map('{:,.0f}'.format),
                        region_summary['inflow'].map('{:,.0f}'.format),
                        region_summary['net_flow'].map(lambda x: f'<span style="color:red">{x:,.0f}</span>' if x < 0 else f'{x:,.0f}'),
                        region_summary['turnover'].map('{:,.0f}'.format)
                    ],
                    fill_color=[
                        'white',
                        'white',
                        'white',
                        region_summary['net_flow'].map(
                            lambda x: 'rgba(255,200,200,0.6)' if x < 0 else 'rgba(200,200,255,0.6)'
                        ),
                        'white'
                    ],
                    align='left'
                )
            ),
            row=2, col=1
        )

        # 4. Regional flow comparison
        fig.add_trace(
            go.Bar(
                name='Outflow',
                x=region_summary['region'][:10],
                y=region_summary['outflow'][:10],
                marker_color='lightblue'
            ),
            row=2, col=2
        )

        fig.add_trace(
            go.Bar(
                name='Inflow',
                x=region_summary['region'][:10],
                y=region_summary['inflow'][:10],
                marker_color='lightgreen'
            ),
            row=2, col=2
        )

        fig.update_layout(
            height=1200,
            width=1600,
            showlegend=True,
            title=dict(
                text="Container Balance Analysis Dashboard",
                font=dict(size=18)
            ),
            font=dict(size=14),
            template="plotly_white",
            margin=dict(t=120, b=50, l=50, r=50),
            grid=dict(rows=2, columns=2, pattern='independent')
        )

        return fig

    except Exception as e:
        print(f"Error creating dashboard: {str(e)}")
        raise

def create_subregion_analysis(locations_df):
    """Create detailed subregion level analysis"""
    try:
        # Aggregate by subregion
        subregion_summary = locations_df.groupby(['region', 'subregion']).agg({
            'outflow': 'sum',
            'inflow': 'sum',
            'net_flow': 'sum'
        }).reset_index()

        # Calculate total throughput
        subregion_summary['total_volume'] = subregion_summary['outflow'] + subregion_summary['inflow']

        # Create visualization
        fig = make_subplots(
            rows=1, cols=2,
            subplot_titles=(
                "Top 20 Subregions by Volume",
                "Subregion Net Flow Analysis"
            ),
            specs=[[{"type": "bar"}, {"type": "scatter"}]]
        )

        # Top subregions by volume
        top_subregions = subregion_summary.nlargest(20, 'total_volume')

        fig.add_trace(
            go.Bar(
                name='Outflow',
                x=top_subregions['subregion'],
                y=top_subregions['outflow'],
                marker_color='lightblue'
            ),
            row=1, col=1
        )

        fig.add_trace(
            go.Bar(
                name='Inflow',
                x=top_subregions['subregion'],
                y=top_subregions['inflow'],
                marker_color='lightgreen'
            ),
            row=1, col=1
        )

        # Net flow analysis with improved label placement
        max_val = max(top_subregions['outflow'].max(), top_subregions['inflow'].max())
        
        # Add diagonal line first (so it's in the background)
        fig.add_trace(
            go.Scatter(
                x=[0, max_val],
                y=[0, max_val],
                mode='lines',
                line=dict(dash='dash', color='gray'),
                name='Balanced Flow',
                hovertemplate=None,
                showlegend=True
            ),
            row=1, col=2
        )

        # Add scatter plot with improved labels
        fig.add_trace(
            go.Scatter(
                x=top_subregions['outflow'],
                y=top_subregions['inflow'],
                mode='markers+text',
                text=top_subregions['subregion'],
                textposition='top right',  # Consistent label placement
                marker=dict(
                    size=(top_subregions['net_flow'].abs()/top_subregions['net_flow'].abs().max() * 30) + 10,  # Scaled marker size
                    color=top_subregions['net_flow'],
                    colorscale='RdBu',
                    showscale=True,
                    colorbar=dict(
                        title="Net Flow",
                        x=1.15  # Move colorbar further right
                    )
                ),
                hovertemplate="<br>".join([
                    "Subregion: %{text}",
                    "Outflow: %{x:,.0f} TEU",
                    "Inflow: %{y:,.0f} TEU",
                    "<extra></extra>"
                ])
            ),
            row=1, col=2
        )

        # Update layout with improved spacing and formatting
        fig.update_layout(
            height=700,  # Increased height
            width=1400,  # Increased width
            title_text="Subregional Container Flow Analysis",
            barmode='group',
            showlegend=True,
            legend=dict(
                orientation="h",
                yanchor="bottom",
                y=1.02,
                xanchor="right",
                x=1
            ),
            margin=dict(r=150),  # More right margin for colorbar
            font=dict(size=10)
        )

        # Update axes
        fig.update_xaxes(
            tickangle=45,
            title_text="Subregion",
            row=1, 
            col=1
        )
        fig.update_yaxes(
            title_text="Volume (TEU)",
            row=1,
            col=1
        )
        fig.update_xaxes(
            title_text="Outflow (TEU)",
            range=[-max_val*0.05, max_val*1.05],  # Add some padding
            row=1,
            col=2
        )
        fig.update_yaxes(
            title_text="Inflow (TEU)",
            range=[-max_val*0.05, max_val*1.05],  # Add some padding
            row=1,
            col=2
        )

        return fig

    except Exception as e:
        print(f"Error creating subregion analysis: {str(e)}")
        raise


def create_surplus_deficit_dashboard(locations_df):
    """Create a comprehensive dashboard for surplus/deficit analysis"""
    try:
        # Create subplot layout - replacing choropleth with treemap
        fig = make_subplots(
            rows=2, cols=2,
            specs=[
                [{"type": "treemap"}, {"type": "bar"}],
                [{"type": "table"}, {"type": "bar"}]
            ],
            subplot_titles=(
                "Container Surplus/Deficit by Region",
                "Top 10 Regions by Absolute Net Flow",
                "Detailed Regional Balance",
                "Regional Flow Comparison"
            )
        )

        # 1. Treemap instead of Choropleth
        region_data = locations_df.groupby('region').agg({
            'net_flow': 'sum',
            'outflow': 'sum',
            'inflow': 'sum'
        }).reset_index()
        region_data['total_volume'] = region_data['outflow'] + region_data['inflow']
        
        fig.add_trace(
            go.Treemap(
                labels=region_data['region'],
                parents=[""] * len(region_data),
                values=region_data['total_volume'],
                customdata=region_data['net_flow'],
                marker=dict(
                    colors=region_data['net_flow'],
                    colorscale='RdBu',
                    cmid=0  # Center the colorscale at 0
                ),
                hovertemplate="<br>".join([
                    "Region: %{label}",
                    "Total Volume: %{value:,.0f}",
                    "Net Flow: %{customdata:,.0f}",
                    "<extra></extra>"
                ])
            ),
            row=1, col=1
        )

        # 2. Top regions by absolute net flow (unchanged)
        top_regions = region_data.copy()
        top_regions['abs_net_flow'] = abs(top_regions['net_flow'])
        top_regions = top_regions.nlargest(10, 'abs_net_flow')

        fig.add_trace(
            go.Bar(
                x=top_regions['region'],
                y=top_regions['net_flow'],
                marker_color=top_regions['net_flow'].map(
                    lambda x: 'red' if x < 0 else 'blue'
                ),
                text=top_regions['net_flow'].round(0),
                textposition='auto'
            ),
            row=1, col=2
        )

        # 3. Detailed table - Fix the color formatting
        fig.add_trace(
            go.Table(
                header=dict(
                    values=['Region', 'Outflow', 'Inflow', 'Net Flow', 'Total Turnover'],
                    fill_color='paleturquoise',
                    align='left'
                ),
                cells=dict(
                    values=[
                        region_data['region'],
                        region_data['outflow'].map('{:,.0f}'.format),
                        region_data['inflow'].map('{:,.0f}'.format),
                        region_data['net_flow'].round(0),  # Just show numbers
                        (region_data['outflow'] + region_data['inflow']).map('{:,.0f}'.format)
                    ],
                    fill_color=[
                        'white',
                        'white',
                        'white',
                        region_data['net_flow'].map(
                            lambda x: 'rgba(255,200,200,0.6)' if x < 0 else 'rgba(200,200,255,0.6)'
                        ),
                        'white'
                    ],
                    align='left',
                    font=dict(
                        color=[
                            'black',
                            'black',
                            'black',
                            region_data['net_flow'].map(  # Color the text directly
                                lambda x: 'red' if x < 0 else 'blue'
                            ),
                            'black'
                        ]
                    )
                )
            ),
            row=2, col=1
        )

        # 4. Regional flow comparison (unchanged)
        fig.add_trace(
            go.Bar(
                name='Outflow',
                x=region_data['region'][:10],
                y=region_data['outflow'][:10],
                marker_color='lightblue',
                text=region_data['outflow'][:10].round(0),
                textposition='auto'
            ),
            row=2, col=2
        )

        fig.add_trace(
            go.Bar(
                name='Inflow',
                x=region_data['region'][:10],
                y=region_data['inflow'][:10],
                marker_color='lightgreen',
                text=region_data['inflow'][:10].round(0),
                textposition='auto'
            ),
            row=2, col=2
        )

        # Update layout
        fig.update_layout(
            height=1200,
            width=1600,
            title=dict(
                text="Container Balance Analysis Dashboard",
                font=dict(size=18)
            ),
            template="plotly_white"
        )

        return fig

    except Exception as e:
        print(f"Error creating dashboard: {str(e)}")
        raise

def plot_conversion_rates(metrics_locode):
    """Create conversion rate analysis visualization"""
    try:
        # Extract data
        equipment = metrics_locode['equipment']
        source = metrics_locode['source']
        transport = metrics_locode['transport']
        
        # Create subplots
        fig = make_subplots(
            rows=2, cols=2,
            specs=[[{'type': 'domain'}, {'type': 'domain'}],
                  [{'type': 'xy', 'colspan': 2}, None]],
            subplot_titles=('Equipment Type Conversion', 'Source Conversion',
                          'Transport Mode Conversion')
        )
        
        # Equipment type conversion (Sunburst)
        eq_labels = list(equipment['volume_teu_proposal'].keys())
        eq_parents = [''] * len(eq_labels)
        eq_values = [equipment['volume_teu_actual'][k] / equipment['volume_teu_proposal'][k] * 100 
                    for k in eq_labels]
        
        fig.add_trace(go.Sunburst(
            labels=eq_labels,
            parents=eq_parents,
            values=eq_values,
            branchvalues='total'
        ), row=1, col=1)
        
        # Source conversion (Pie)
        fig.add_trace(go.Pie(
            labels=['System', 'User'],
            values=[source['volume_teu_actual']['S'] / source['volume_teu_proposal']['S'] * 100,
                   source['volume_teu_actual']['U'] / source['volume_teu_proposal']['U'] * 100],
            hole=.3
        ), row=1, col=2)
        
        # Transport mode conversion (Bar)
        transport_modes = list(transport['volume_teu_proposal'].keys())
        conversion_rates = [transport['volume_teu_actual'][k] / transport['volume_teu_proposal'][k] * 100 
                          for k in transport_modes]
        
        fig.add_trace(go.Bar(
            x=transport_modes,
            y=conversion_rates,
            text=[f"{v:.1f}%" for v in conversion_rates],
            textposition='auto',
        ), row=2, col=1)
        
        fig.update_layout(height=800, title_text="Conversion Rate Analysis")
        return fig
    except Exception as e:
        print(f"Error creating conversion rate analysis: {str(e)}")
        raise

def plot_unused_actions(unused_actions_df):
    """Create unused actions analysis visualization"""
    try:
        # Create subplots
        fig = make_subplots(
            rows=2, cols=2,
            specs=[[{'type': 'xy'}, {'type': 'xy'}],
                   [{'type': 'domain'}, {'type': 'xy'}]],
            subplot_titles=('Unused Actions by Region',
                          'Top Equipment Types in Unused Actions',
                          'Transport Mode Distribution',
                          'Timeline of Unused Proposals')
        )
        
        # Unused actions heatmap
        unused_matrix = unused_actions_df.groupby(
            ['region_from', 'region_to']
        )['unused_count'].sum().reset_index()
        pivot_matrix = unused_matrix.pivot(
            index='region_from',
            columns='region_to',
            values='unused_count'
        )
        
        # Top equipment types
        eq_unused = unused_actions_df.groupby('equipment_type')[
            'unused_count'].sum().sort_values(ascending=False)
        
        # Transport mode distribution
        mot_unused = unused_actions_df.groupby('mode_of_transportation')[
            'unused_count'].sum()
        
        # Timeline analysis
        timeline = pd.to_datetime(unused_actions_df['first_proposed'])
        timeline_counts = timeline.value_counts().sort_index()
        
        # Add traces
        fig.add_trace(
            go.Heatmap(
                z=pivot_matrix.values,
                x=pivot_matrix.columns,
                y=pivot_matrix.index,
                colorscale='Viridis'
            ),
            row=1, col=1
        )
        
        fig.add_trace(
            go.Bar(x=eq_unused.index[:10], y=eq_unused.values[:10]),
            row=1, col=2
        )
        
        fig.add_trace(
            go.Pie(labels=mot_unused.index, values=mot_unused.values),
            row=2, col=1
        )
        
        fig.add_trace(
            go.Scatter(x=timeline_counts.index, y=timeline_counts.values),
            row=2, col=2
        )
        
        fig.update_layout(height=1000, title_text="Unused Actions Analysis")
        return fig
    except Exception as e:
        print(f"Error creating unused actions analysis: {str(e)}")
        raise

def plot_regional_analysis(flows_df, locations_df, region):
    """Create detailed analysis for a specific region"""
    try:
        # Filter data for the region
        region_flows = flows_df[
            (flows_df['region_from'] == region) | 
            (flows_df['region_to'] == region)
        ]
        region_locations = locations_df[locations_df['region'] == region]
        
        # Create subplots
        fig = make_subplots(
            rows=2, cols=2,
            specs=[
                [{'type': 'domain'}, {'type': 'xy'}],
                [{'type': 'sankey'}, {'type': 'xy'}]
            ],
            subplot_titles=(
                f'{region} - Equipment Distribution',
                f'{region} - Transport Mode Split',
                f'{region} - Top Subregional Flows',
                f'{region} - Net Flow by Subregion'
            )
        )
        
        # Equipment distribution
        eq_dist = region_flows.groupby('eq_type')['volume_teu_proposal'].sum()
        fig.add_trace(
            go.Pie(labels=eq_dist.index, values=eq_dist.values),
            row=1, col=1
        )
        
        # Transport mode split
        transport_split = region_flows.groupby(
            'eq_type'
        )['volume_teu_proposal'].sum()
        fig.add_trace(
            go.Bar(x=transport_split.index, y=transport_split.values),
            row=1, col=2
        )
        
        # Get top 10 subregional flows
        top_subregion_flows = region_flows.groupby(
            ['subregion_from', 'subregion_to']
        )['volume_teu_proposal'].sum().reset_index().nlargest(10, 'volume_teu_proposal')
        
        # Calculate percentage of total flow
        total_flow = region_flows['volume_teu_proposal'].sum()
        flows_included = top_subregion_flows['volume_teu_proposal'].sum()
        coverage_pct = (flows_included / total_flow) * 100
        
        # Only add Sankey if there are flows to show
        if not top_subregion_flows.empty:
            unique_subregions = list(set(top_subregion_flows['subregion_from'].unique()) | 
                                   set(top_subregion_flows['subregion_to'].unique()))
            
            fig.add_trace(
                go.Sankey(
                    node=dict(
                        pad=15,
                        thickness=20,
                        line=dict(color="black", width=0.5),
                        label=unique_subregions
                    ),
                    link=dict(
                        source=[unique_subregions.index(x) for x in top_subregion_flows['subregion_from']],
                        target=[unique_subregions.index(x) for x in top_subregion_flows['subregion_to']],
                        value=top_subregion_flows['volume_teu_proposal'],
                        # Add color gradient based on volume
                        color=top_subregion_flows['volume_teu_proposal'].map(
                            lambda x: f'rgba(63, 81, 181, {x/top_subregion_flows["volume_teu_proposal"].max():.2f})'
                        )
                    )
                ),
                row=2, col=1
            )
            
            # Add annotation showing coverage at figure level
            fig.add_annotation(
                xref="paper",
                yref="paper",
                x=0.25,  # Center of left subplot in second row
                y=0.35,  # Position above the Sankey diagram
                text=f"Top 10 flows represent {coverage_pct:.1f}% of total regional volume",
                showarrow=False,
                font=dict(size=10)
            )
        
        # Net flow by subregion
        subregion_net = region_locations.groupby('subregion')['net_flow'].sum()
        fig.add_trace(
            go.Bar(x=subregion_net.index, y=subregion_net.values),
            row=2, col=2
        )
        
        fig.update_layout(height=1000, title_text=f"Regional Analysis - {region}")
        return fig
    except Exception as e:
        print(f"Error creating regional analysis for {region}: {str(e)}")
        raise

# Load data
metrics_locode, locations_df, flows_df, unused_actions_df, conversion_rates_df = load_data()

# Create and display all visualizations
print("Equipment Flow Analysis:")
plot_equipment_flows(flows_df).show()

# For subregion level
subregion_viz = plot_top_flow_matrix(flows_df, level='subregion', top_n=20)
subregion_viz.show()

# For locode level
locode_viz = plot_top_flow_matrix(flows_df, level='locode', top_n=20)
locode_viz.show()

# For summary dashboard
summary_viz = create_flow_summary(flows_df, top_n=20)
summary_viz.show()

# Create main dashboard
dashboard = create_surplus_deficit_dashboard(locations_df)
dashboard.show()

# Create subregional analysis
subregion_viz = create_subregion_analysis(locations_df)
subregion_viz.show()

print("\nConversion Rate Analysis:")
plot_conversion_rates(metrics_locode).show()

print("\nUnused Actions Analysis:")
plot_unused_actions(unused_actions_df).show()

print("\nRegional Analyses:")
for region in locations_df['region'].unique():
    print(f"\nAnalyzing {region}:")
    plot_regional_analysis(flows_df, locations_df, region).show()

Equipment Flow Analysis:



Conversion Rate Analysis:



Unused Actions Analysis:



Regional Analyses:

Analyzing M. EAST:



Analyzing S. EUROPE:



Analyzing L.AMERICA:



Analyzing EUROPE:



Analyzing ASIA:



Analyzing N.AMERICA:


2. circular and ienficcient routing analysis

In [20]:
def yearweek_to_date(yearweek):
    """Convert yearweek to date"""
    year = int(str(yearweek)[:4])
    week = int(str(yearweek)[4:])
    return pd.to_datetime(f"{year}-W{week:02d}-1", format="%Y-W%W-%w")

class LogisticsAnalysisDashboard:
    def __init__(self, df_circular, df_inefficient):
        """
        Initialize dashboard with both circular movements and inefficient routes dataframes
        """
        self.df_circular = df_circular.copy()
        self.df_inefficient = df_inefficient.copy()
        
        # Calculate basic statistics
        self._calculate_stats()

    def _calculate_stats(self):
        """Calculate basic statistics for both datasets"""
        # Circular movements stats
        self.circular_stats = {
            'total_circuits': len(self.df_circular),
            'unique_circuits': self.df_circular['cycle_path'].nunique(),
            'total_movements': self.df_circular['movement_count'].sum(),
            'unique_equipment': self.df_circular['eq_type'].nunique(),
            'unique_locations': len(set([loc for locs in self.df_circular['locations'] for loc in locs]))
        }
        
        # Inefficient routes stats
        self.inefficient_stats = {
            'total_routes': len(self.df_inefficient),
            'total_volume': self.df_inefficient['volume'].sum(),
            'avg_extra_stops': self.df_inefficient['extra_stops'].mean(),
            'unique_equipment': self.df_inefficient['eq_type'].nunique(),
            'unique_locations': len(set(self.df_inefficient['from_location'].unique()) | 
                                  set(self.df_inefficient['to_location'].unique()))
        }

    def display_summary_stats(self):
        """Display summary statistics in a formatted table"""
        summary_html = """
        <div style='display: flex; justify-content: space-around; margin: 20px;'>
            <div style='border: 1px solid #ddd; padding: 20px; border-radius: 10px; width: 45%;'>
                <h3 style='text-align: center;'>Circular Movements</h3>
                <table style='width: 100%;'>
                    <tr><td>Total Circuits:</td><td>{}</td></tr>
                    <tr><td>Unique Circuits:</td><td>{}</td></tr>
                    <tr><td>Total Movements:</td><td>{}</td></tr>
                    <tr><td>Equipment Types:</td><td>{}</td></tr>
                    <tr><td>Unique Locations:</td><td>{}</td></tr>
                </table>
            </div>
            <div style='border: 1px solid #ddd; padding: 20px; border-radius: 10px; width: 45%;'>
                <h3 style='text-align: center;'>Inefficient Routes</h3>
                <table style='width: 100%;'>
                    <tr><td>Total Routes:</td><td>{}</td></tr>
                    <tr><td>Total Volume:</td><td>{:,.0f} TEU</td></tr>
                    <tr><td>Avg Extra Stops:</td><td>{:.2f}</td></tr>
                    <tr><td>Equipment Types:</td><td>{}</td></tr>
                    <tr><td>Unique Locations:</td><td>{}</td></tr>
                </table>
            </div>
        </div>
        """.format(
            self.circular_stats['total_circuits'],
            self.circular_stats['unique_circuits'],
            self.circular_stats['total_movements'],
            self.circular_stats['unique_equipment'],
            self.circular_stats['unique_locations'],
            self.inefficient_stats['total_routes'],
            self.inefficient_stats['total_volume'],
            self.inefficient_stats['avg_extra_stops'],
            self.inefficient_stats['unique_equipment'],
            self.inefficient_stats['unique_locations']
        )
        display(HTML(summary_html))

    def plot_equipment_analysis(self):
        """Create equipment type analysis plots"""
        # Equipment counts and movements
        eq_counts_circular = self.df_circular['eq_type'].value_counts()
        eq_movements = self.df_circular.groupby('eq_type')['movement_count'].sum()
        
        # Inefficient routes analysis
        eq_inefficient_volume = self.df_inefficient.groupby('eq_type')['volume'].sum()
        eq_inefficient_stops = self.df_inefficient.groupby('eq_type')['extra_stops'].mean()

        # Create subplot
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=(
                'Equipment Distribution - Circular Movements',
                'Total Movements by Equipment Type',
                'Volume by Equipment - Inefficient Routes',
                'Average Extra Stops by Equipment'
            )
        )

        # Plot 1: Equipment Distribution (Circular)
        fig.add_trace(
            go.Bar(
                x=eq_counts_circular.index,
                y=eq_counts_circular.values,
                name='Circuit Count'
            ),
            row=1, col=1
        )

        # Plot 2: Movements by Equipment
        fig.add_trace(
            go.Bar(
                x=eq_movements.index,
                y=eq_movements.values,
                name='Total Movements',
                marker_color='lightgreen'
            ),
            row=1, col=2
        )

        # Plot 3: Volume by Equipment (Inefficient)
        fig.add_trace(
            go.Bar(
                x=eq_inefficient_volume.index,
                y=eq_inefficient_volume.values,
                name='Volume (Inefficient)',
                marker_color='coral'
            ),
            row=2, col=1
        )

        # Plot 4: Average Extra Stops
        fig.add_trace(
            go.Bar(
                x=eq_inefficient_stops.index,
                y=eq_inefficient_stops.values,
                name='Avg Extra Stops',
                marker_color='lightblue'
            ),
            row=2, col=2
        )

        fig.update_layout(
            height=800,
            showlegend=False,
            title_text="Equipment Analysis Dashboard",
            title_x=0.5
        )
        
        return fig

    def plot_time_series_analysis(self):
        """Create time series analysis plots"""
        try:
            # Convert yearweek to date for circular movements
            self.df_circular['start_date'] = self.df_circular['start_yearweek'].apply(yearweek_to_date)
            
            # For inefficient routes, use existing start_date
            # If start_date column doesn't exist, convert from yearweek
            if 'start_date' not in self.df_inefficient.columns:
                self.df_inefficient['start_date'] = self.df_inefficient['start_yearweek'].apply(yearweek_to_date)
            
            # Daily circuits - using temporary date column
            daily_circuits = self.df_circular.groupby('start_date')['cycle_path'].count()
            
            # Daily inefficient routes
            daily_inefficient = self.df_inefficient.groupby('start_date').agg({
                'volume': 'sum',
                'extra_stops': 'mean'
            })

            # Create subplot
            fig = make_subplots(
                rows=2, cols=1,
                subplot_titles=(
                    'Circular Movements Over Time',
                    'Inefficient Routes - Volume and Extra Stops'
                ),
                specs=[[{"secondary_y": False}],
                    [{"secondary_y": True}]],
                vertical_spacing=0.15
            )

            # Plot 1: Circular Movements
            fig.add_trace(
                go.Scatter(
                    x=daily_circuits.index,
                    y=daily_circuits.values,
                    name='Circuits',
                    mode='lines+markers',
                    line=dict(color='blue'),
                    marker=dict(size=6)
                ),
                row=1, col=1
            )

            # Plot 2: Inefficient Routes (left y-axis for volume)
            fig.add_trace(
                go.Scatter(
                    x=daily_inefficient.index,
                    y=daily_inefficient['volume'],
                    name='Volume',
                    mode='lines+markers',
                    line=dict(color='green'),
                    marker=dict(size=6)
                ),
                row=2, col=1
            )

            # Plot 3: Extra Stops (right y-axis)
            fig.add_trace(
                go.Scatter(
                    x=daily_inefficient.index,
                    y=daily_inefficient['extra_stops'],
                    name='Extra Stops',
                    mode='lines+markers',
                    line=dict(color='red'),
                    marker=dict(size=6)
                ),
                row=2, col=1,
                secondary_y=True
            )

            # Update layout
            fig.update_layout(
                height=800,
                showlegend=True,
                title_text="Time Series Analysis Dashboard",
                title_x=0.5,
                legend=dict(
                    orientation="h",
                    yanchor="bottom",
                    y=1.02,
                    xanchor="right",
                    x=1
                )
            )

            # Update axes titles and formatting
            fig.update_xaxes(title_text="Date", row=1, col=1)
            fig.update_xaxes(title_text="Date", row=2, col=1)
            
            fig.update_yaxes(title_text="Number of Circuits", row=1, col=1)
            fig.update_yaxes(title_text="Volume (TEU)", row=2, col=1)
            fig.update_yaxes(title_text="Average Extra Stops", secondary_y=True, row=2, col=1)

            # Remove temporary date columns
            self.df_circular.drop('start_date', axis=1, inplace=True)
            
            return fig
        
        except Exception as e:
            print(f"Error in time series analysis: {str(e)}")
            print("Available columns in circular movements:", self.df_circular.columns.tolist())
            print("Available columns in inefficient routes:", self.df_inefficient.columns.tolist())
            raise

    def plot_route_analysis(self):
        """Create route analysis visualizations"""
        # Circuit size analysis
        circuit_sizes = self.df_circular['locations'].apply(len).value_counts().sort_index()
        
        # Route matrix for inefficient routes
        route_matrix = pd.crosstab(
            self.df_inefficient['from_location'],
            self.df_inefficient['to_location'],
            values=self.df_inefficient['volume'],
            aggfunc='sum'
        )
        
        # Take top 15 locations
        top_locations = (
            self.df_inefficient.groupby('from_location')['volume'].sum() +
            self.df_inefficient.groupby('to_location')['volume'].sum()
        ).nlargest(15).index
        
        route_matrix_filtered = route_matrix.loc[
            route_matrix.index.isin(top_locations),
            route_matrix.columns.isin(top_locations)
        ]

        # Create subplot
        fig = make_subplots(
            rows=1, cols=2,
            subplot_titles=(
                'Circuit Size Distribution',
                'Top Routes Heatmap'
            ),
            specs=[[{"type": "bar"}, {"type": "heatmap"}]]
        )

        # Plot 1: Circuit Size Distribution
        fig.add_trace(
            go.Bar(
                x=circuit_sizes.index,
                y=circuit_sizes.values,
                name='Circuit Sizes'
            ),
            row=1, col=1
        )

        # Plot 2: Route Heatmap
        fig.add_trace(
            go.Heatmap(
                z=route_matrix_filtered.values,
                x=route_matrix_filtered.columns,
                y=route_matrix_filtered.index,
                colorscale='YlOrRd',
                name='Route Volume'
            ),
            row=1, col=2
        )

        fig.update_layout(
            height=600,
            title_text="Route Analysis Dashboard",
            title_x=0.5
        )

        return fig

    def plot_transport_mode_analysis(self):
        """Create transport mode analysis visualization"""
        try:
                # Analyze transport modes for circular movements
                circular_modes = []
                for modes in self.df_circular['transport_modes']:
                    # Handle case where modes might be string representation of list
                    if isinstance(modes, str):
                        modes = eval(modes)  # Convert string representation to list
                    circular_modes.extend(modes)
                circular_mode_counts = pd.Series(circular_modes).value_counts()

                # Analyze transport modes for inefficient routes
                inefficient_modes = []
                for modes in self.df_inefficient['transport_modes']:
                    # Handle case where modes might be string representation of list
                    if isinstance(modes, str):
                        modes = eval(modes)  # Convert string representation to list
                    inefficient_modes.extend(modes)
                inefficient_mode_counts = pd.Series(inefficient_modes).value_counts()

                # Create subplot with domain type for pie charts
                fig = make_subplots(
                    rows=1, cols=2,
                    specs=[[{'type': 'domain'}, {'type': 'domain'}]],
                    subplot_titles=(
                        'Transport Modes - Circular Movements',
                        'Transport Modes - Inefficient Routes'
                    )
                )

                # Plot 1: Circular Movements Transport Modes
                fig.add_trace(
                    go.Pie(
                        labels=circular_mode_counts.index,
                        values=circular_mode_counts.values,
                        name='Circular',
                        domain={'x': [0, 0.45]},  # Domain in Pie constructor
                        textinfo='label+percent'  # Added to show more information
                    ),
                    row=1, col=1
                )

                # Plot 2: Inefficient Routes Transport Modes
                fig.add_trace(
                    go.Pie(
                        labels=inefficient_mode_counts.index,
                        values=inefficient_mode_counts.values,
                        name='Inefficient',
                        domain={'x': [0.55, 1]},  # Domain in Pie constructor
                        textinfo='label+percent'  # Added to show more information
                    ),
                    row=1, col=2
                )

                fig.update_layout(
                    height=500,
                    title_text="Transport Mode Analysis",
                    title_x=0.5
                )

                return fig
        except Exception as e:
                print(f"Error in transport mode analysis: {str(e)}")
                print("Sample circular modes:", self.df_circular['transport_modes'].iloc[0])
                print("Sample inefficient modes:", self.df_inefficient['transport_modes'].iloc[0])
                raise
    

    def display_full_dashboard(self):
        """Display the complete dashboard"""
        # Display summary statistics
        self.display_summary_stats()
        
        # Display equipment analysis
        equipment_fig = self.plot_equipment_analysis()
        equipment_fig.show()
        
        # Display time series analysis
        time_series_fig = self.plot_time_series_analysis()
        time_series_fig.show()
        
        # Display route analysis
        route_fig = self.plot_route_analysis()
        route_fig.show()
        
        # Display transport mode analysis
        transport_fig = self.plot_transport_mode_analysis()
        transport_fig.show()

In [21]:
df_circular = pd.read_csv(r"C:\Users\adeba\Desktop\hapag-lloyd\output\networkx_circular_movements.csv")
df_inefficient = pd.read_csv(r"C:\Users\adeba\Desktop\hapag-lloyd\output\routing_inefficiencies.csv")


# Create and display dashboard
dashboard = LogisticsAnalysisDashboard(df_circular, df_inefficient)
dashboard.display_full_dashboard()

0,1
Total Circuits:,3198
Unique Circuits:,1558
Total Movements:,25632
Equipment Types:,8
Unique Locations:,31

0,1
Total Routes:,4612
Total Volume:,"251,646 TEU"
Avg Extra Stops:,1.32
Equipment Types:,10
Unique Locations:,234
