In [None]:
Filter all tables to the three most recent days
, pull only from slam_leg for now
, first filter for the final leg, but also for the

create a table for mapping

create a table from

In [None]:
-- Step 1: Create a temp table for node mapping (locations)
DROP TABLE IF EXISTS amzl_mapping;
CREATE TEMP TABLE amzl_mapping AS (
    SELECT
        location_id AS node,
        timezone,
        region,
        country,
        GETDATE() AS run_time_utc,
        CAST(CAST(GETDATE() AS DATE) AS TIMESTAMP) AS day_start_utc,
        CONVERT_TIMEZONE('UTC', timezone, GETDATE()) AS run_time_local,
        CONVERT_TIMEZONE('UTC', timezone, CAST(CAST(GETDATE() AS DATE) AS TIMESTAMP)) AS local_offset_utc
    FROM
        "amzlanalytics"."perfectmile"."d_perfectmile_node_mapping_mdm"
    WHERE
        1=1
        AND location_status = 'A'
        AND country IN('US', 'CA')
        -- Uncomment to filter by specific regions if needed
        -- AND UPPER(region) IN('ROCKIES', 'UPSTATE NY') 
        AND (location_type = 'DS' 
            OR location_id IN ('MCO5', 'ZYG1', 'ZYN9', 'XVV2', 'XVV3', 'XYT6', 'XLC1', 'XVC1', 'XNK2')
        )
);

In [None]:
get unique shipments from slam leg, get the final leg and the second to last leg

In [None]:

-- Step 2: Create a temp table for slam legs

DROP TABLE IF EXISTS slam_leg;
CREATE TEMP TABLE slam_leg SORTKEY() AS (
    SELECT a.*, 
           m.timezone,
           m.region,
           m.country,
           m.run_time_utc,
           m.day_start_utc,
           m.run_time_local,
           m.local_offset_utc
    FROM (
        SELECT
            shipment_id,
            package_id,
            route_id,
            route_warehouse_id,
            route_ship_method,
            route_internal_sort_code,
            route_external_sort_code,
            processing_date,
            leg_sequence_id,
            leg_id,
            leg_warehouse_id,
            leg_ship_method,
            leg_internal_sort_code,
            leg_external_sort_code,
            leg_destination_warehouse_id,
            ship_option,
            pickup_date,
            estimated_arrival_date,
            transit_time_in_hours,
            "zone",
            request_timestamp,
            dw_creation_date,
            unloaded_at,
            ROW_NUMBER() OVER (PARTITION BY shipment_id ORDER BY request_timestamp DESC, pickup_date DESC) AS leg_rn
        FROM backlog_datasets.ATROPS.o_slam_packages_leg_live
        WHERE request_timestamp > GETDATE() - INTERVAL '3 DAYS'
    ) a
    JOIN amzl_mapping m ON m.node = a.leg_warehouse_id
    WHERE a.leg_rn = 1
);

In [ ]:
-- Step 3: Create a consolidated table that joins slam_leg, gmp, and pse tables

DROP TABLE IF EXISTS consolidated;
CREATE TEMP TABLE consolidated SORTKEY(shipment_key) DISTKEY(shipment_key) AS (
    SELECT
        -- Coalesced key fields
        COALESCE(sl.shipment_key, g.shipment_key) AS shipment_key,
        COALESCE(sl.source_table, g.source_table, p.source_table) AS source_table,
        COALESCE(sl.request_timestamp, g.status_date, p.state_time) AS event_timestamp,
        COALESCE(sl.leg_rn, g.duplicate_rn, p.duplicate_rn) AS duplicate_rn,
        
        -- slam_leg fields
        sl.shipment_id,
        sl.package_id,
        sl.route_id,
        sl.route_warehouse_id,
        sl.route_ship_method,
        sl.route_internal_sort_code,
        sl.route_external_sort_code,
        sl.processing_date,
        sl.leg_sequence_id,
        sl.leg_id,
        sl.leg_warehouse_id,
        sl.leg_ship_method,
        sl.leg_internal_sort_code,
        sl.leg_external_sort_code,
        sl.leg_destination_warehouse_id,
        sl.ship_option AS sl_ship_option,
        sl.pickup_date,
        sl.estimated_arrival_date,
        sl.transit_time_in_hours,
        sl.zone,
        sl.request_timestamp,
        sl.dw_creation_date,
        sl.unloaded_at AS sl_unloaded_at,
        sl.timezone,
        sl.region,
        sl.country,
        sl.run_time_utc,
        sl.day_start_utc,
        sl.run_time_local,
        sl.local_offset_utc,
        
        -- gmp fields
        g.shipment_type,
        g.sender_id,
        g.tracking_id,
        g.ship_track_event_code,
        g.standard_carrier_alpha_code,
        g.supplement_code,
        g.tcda_container_id,
        g.parent_container_id,
        g.parent_container_type,
        g.status_node_id,
        g.load_id,
        g.status_date,
        g.status_date_timezone,
        g.edi_standard_name,
        g.status_code,
        g.reason_code,
        g.fulfillment_reference_id,
        g.marketplace_id,
        g.amazon_bar_code,
        g.fulfillment_shipment_id,
        g.package_id AS gmp_package_id,
        g.estimated_arrival_date AS gmp_estimated_arrival_date,
        g.promised_arrival_date,
        g.tss_ship_date,
        g.business_unit,
        g.ship_option AS gmp_ship_option,
        g.ship_method AS gmp_ship_method,
        g.actual_delivery_date,
        g.attempted_delivery_date,
        g.is_export_charge_prepaid,
        g.is_virtual_scan,
        g.access_point_id,
        g.seller_id,
        g.transport_shipment_id,
        g.amazon_reference_number,
        g.additional_reference_number,
        g.status_ref_target_type,
        g.predicted_delivery_date,
        g.estimated_delivery_date,
        g.origin_address_country_code,
        g.manifest_id,
        g.service_type,
        g.dw_created_time AS gmp_dw_created_time,
        g.unloaded_at AS gmp_unloaded_at,
        g.shipment_tracking_key,
        g.status_date_rn,
        
        -- pse fields
        p.type,
        p.package_id AS pse_package_id,
        p.package_id_type,
        p.forward_amazon_barcode,
        p.forward_tracking_id,
        p.forward_tcda_container_id,
        p.state_location_type,
        p.state_location_id,
        p.state_location_destination_id,
        p.state_location_source_id,
        p.state_status,
        p.state_time,
        p.triggerer_id,
        p.triggerer_id_type,
        p.dw_created_time AS pse_dw_created_time,
        p.state_sub_status,
        p.comp_type,
        p.comp_reason,
        p.comp_state,
        p.reverse_amazon_barcode,
        p.reverse_tcda_container_id,
        p.reverse_tracking_id,
        p.execution_id,
        p.execution_id_type,
        p.state_location_destination_type,
        p.state_location_source_type,
        p.state_time_rn,
        p.forward_tcda_rn,
        p.unloaded_at AS pse_unloaded_at,
        
        -- nc_load_summary fields
        v.origin,
        v.final_destination,
        v.origin_type,
        v.destination_type,
        v.lane,
        v.cpt,
        v.miles,
        v.origin_local_timezone,
        v.origin_scheduled_arrival,
        v.origin_calc_arrival,
        v.origin_arrival_late_group,
        v.origin_arrival_late_hrs,
        v.origin_arrival_reason,
        v.origin_scheduled_depart,
        v.origin_calc_depart,
        v.origin_departure_late_group,
        v.origin_depart_late_hrs,
        v.dest_local_timezone,
        v.dest_scheduled_arrival,
        v.dest_calc_arrival,
        v.dest_arrival_late_group,
        v.dest_arrival_late_hrs,
        v.dest_arrival_reason,
        
        -- node_type mappings
        origin_nt.location_type AS origin_location_type,
        status_nt.location_type AS status_node_location_type,
        dest_nt.location_type AS destination_location_type
    FROM 
        slam_leg sl
    FULL OUTER JOIN 
        gmp g ON sl.shipment_key = g.shipment_key
    FULL OUTER JOIN 
        pse p ON COALESCE(g.tracking_id, '') = COALESCE(p.forward_tracking_id, '')
    -- Join with nc_load_summary_snapshot using parent_container_id (vrid)
    LEFT JOIN 
        nc_load_summary_snapshot v ON g.parent_container_id = v.vrid
    -- Join with node_type for location type mappings
    LEFT JOIN 
        node_type origin_nt ON v.origin = origin_nt.location_id
    LEFT JOIN 
        node_type status_nt ON g.status_node_id = status_nt.location_id
    LEFT JOIN 
        node_type dest_nt ON v.final_destination = dest_nt.location_id
);

## Consolidated Data Model

The consolidated table brings together data from three primary sources, along with additional reference data:

1. **slam_leg** - Shipment legs from SLAM (Sort, Label, and Manifest) system
2. **gmp** - GMP (Global Marketplace Platform) shipment tracking events
3. **pse** - Package Systems Events 
4. **nc_load_summary_snapshot** - Vehicle routing data
5. **node_type** - Location type reference data

### Key Design Elements:

- **Full Outer Join**: Preserves all records from all three main tables, even when there's no match
- **Coalesced Key Fields**: 
  - `shipment_key`: Common identifier across tables
  - `source_table`: Identifies which system the record came from ('slam_leg', 'GMP', or 'PSE')
  - `event_timestamp`: Normalized timestamp from different source fields
  - `duplicate_rn`: Row number for handling duplicates
  
- **Location Type Mapping**:
  - Join with `node_type` for each location type (origin, status_node, destination)
  - Enhances data with location type information for richer analysis
  
- **Vehicle Routing Integration**:
  - Left join with `nc_load_summary_snapshot` on `gmp.parent_container_id = nc_load_summary_snapshot.vrid`
  - Provides vehicle routing details, including origin/destination info and timing

### Optimization:

- **SORTKEY(shipment_key)**: Improves query performance for filtering and joining on shipment_key
- **DISTKEY(shipment_key)**: Distributes data efficiently across Redshift nodes

### Common Use Cases:

1. Tracking a shipment's complete journey across multiple systems
2. Analyzing delivery performance metrics
3. Identifying where packages may be getting delayed or lost
4. Creating comprehensive timelines of shipment events
5. Analyzing location-specific performance based on location type
6. Correlating shipment events with vehicle routing data

The example queries below demonstrate how to extract useful insights from this consolidated view.

In [ ]:
-- Step 4: Example query to show how to use the consolidated table

-- Sample query 1: Get event timeline for a specific shipment
SELECT
    shipment_key,
    source_table,
    event_timestamp,
    CASE 
        WHEN source_table = 'slam_leg' THEN 'SLAM_LEG'
        WHEN source_table = 'GMP' AND ship_track_event_code = 'EVENT_301' THEN 'DELIVERY'
        WHEN source_table = 'GMP' AND ship_track_event_code = 'EVENT_302' THEN 'DISPATCH'
        WHEN source_table = 'GMP' AND ship_track_event_code = 'EVENT_304' THEN 'DELIVERY_ATTEMPT'
        WHEN source_table = 'GMP' THEN ship_track_event_code
        WHEN source_table = 'PSE' THEN state_status
        ELSE 'UNKNOWN'
    END AS event_type,
    leg_warehouse_id,
    status_node_id,
    state_location_id,
    timezone,
    region,
    country
FROM
    consolidated
ORDER BY
    shipment_key,
    event_timestamp
LIMIT 100;

-- Sample query 2: Get event counts by source
SELECT
    source_table,
    COUNT(*) AS event_count
FROM
    consolidated
GROUP BY
    source_table;

-- Sample query 3: Find shipments with events from all three sources
SELECT
    c.shipment_key,
    MAX(CASE WHEN c.source_table = 'slam_leg' THEN 1 ELSE 0 END) AS has_slam_leg,
    MAX(CASE WHEN c.source_table = 'GMP' THEN 1 ELSE 0 END) AS has_gmp,
    MAX(CASE WHEN c.source_table = 'PSE' THEN 1 ELSE 0 END) AS has_pse,
    COUNT(DISTINCT c.source_table) AS source_count
FROM
    consolidated c
GROUP BY
    c.shipment_key
HAVING
    COUNT(DISTINCT c.source_table) = 3
LIMIT 10;

In [ ]:
-- Sample query 4: Analyze shipments by location type
SELECT
    origin_location_type,
    status_node_location_type,
    destination_location_type,
    COUNT(*) AS event_count,
    COUNT(DISTINCT shipment_key) AS shipment_count
FROM
    consolidated
WHERE
    origin_location_type IS NOT NULL
    OR status_node_location_type IS NOT NULL
    OR destination_location_type IS NOT NULL
GROUP BY
    origin_location_type,
    status_node_location_type,
    destination_location_type
ORDER BY
    shipment_count DESC;

-- Sample query 5: Find shipment journey with vehicle routing information
SELECT
    c.shipment_key,
    c.event_timestamp,
    c.source_table,
    CASE 
        WHEN c.source_table = 'slam_leg' THEN 'SLAM_LEG'
        WHEN c.source_table = 'GMP' AND c.ship_track_event_code = 'EVENT_301' THEN 'DELIVERY'
        WHEN c.source_table = 'GMP' AND c.ship_track_event_code = 'EVENT_302' THEN 'DISPATCH'
        WHEN c.source_table = 'GMP' AND c.ship_track_event_code = 'EVENT_304' THEN 'DELIVERY_ATTEMPT'
        WHEN c.source_table = 'GMP' THEN c.ship_track_event_code
        WHEN c.source_table = 'PSE' THEN c.state_status
        ELSE 'UNKNOWN'
    END AS event_type,
    c.origin,
    c.origin_location_type,
    c.final_destination,
    c.destination_location_type,
    c.lane,
    c.miles,
    c.origin_scheduled_depart,
    c.origin_calc_depart,
    c.origin_depart_late_hrs,
    c.dest_scheduled_arrival,
    c.dest_calc_arrival,
    c.dest_arrival_late_hrs
FROM
    consolidated c
WHERE
    c.parent_container_id IS NOT NULL
ORDER BY
    c.shipment_key,
    c.event_timestamp
LIMIT 100;