# ⏱️ Processing Time Analysis Dashboard

**Purpose**: Analyze cycle times, processing durations, and operational efficiency metrics.

## Key Metrics
- Time to Acknowledge
- Time to Submit
- Time to Approve
- Total Cycle Time
- Bottleneck identification

---

## 1. Setup & Data Loading

In [None]:
# Install required packages
!pip install pandas openpyxl plotly seaborn matplotlib -q

import pandas as pd
import numpy as np
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

pd.set_option('display.max_columns', None)
print("✅ Libraries loaded successfully!")

In [None]:
# File path - update this to match your file location
filename = r"C:\Users\bmalaraju\Documents\WP-OP Agent\JIRA-Agent\11.25.WP Orders_25-11-2025_v01.xlsx"
print(f"📁 Using file: {filename}")

In [None]:
# Load data
df = pd.read_excel(filename, engine='openpyxl')
print(f"📊 Dataset loaded: {len(df):,} rows, {len(df.columns)} columns")
print(f"📅 Analysis Date: {datetime.now().strftime('%Y-%m-%d %H:%M')}")

## 2. Data Preparation

In [None]:
# Column mapping for date fields
DATE_COLUMNS = {
    'added': 'Added Date',
    'acknowledged': 'Acknowledgement Date',
    'submitted': 'Submitted Date',
    'approved': 'Approved Date',
    'cancelled': 'Cancelled Date',
    'po_start': 'PO StartDate',
    'po_end': 'PO EndDate',
    'requested': 'WP Requested Delivery Date',
    'readiness': 'WP Readiness Date',
    'updated': 'Updated Date'
}

OTHER_COLUMNS = {
    'status': 'WP Order Status',
    'product': 'Product',
    'customer': 'Customer',
    'order_id': 'WP Order ID',
    'employee': 'Employee Name',
    'std': 'STD'
}

# Check availability
available_dates = {k: v for k, v in DATE_COLUMNS.items() if v in df.columns}
print(f"✅ Available date columns: {len(available_dates)} of {len(DATE_COLUMNS)}")
for k, v in available_dates.items():
    print(f"   - {v}")

In [None]:
# Prepare analysis dataframe with parsed dates
analysis_df = df.copy()

# Helper to strip timezone from dates
def parse_date(series):
    dt = pd.to_datetime(series, errors='coerce')
    if dt.dt.tz is not None:
        dt = dt.dt.tz_localize(None)
    return dt

# Parse all date columns with timezone normalization
for key, col in DATE_COLUMNS.items():
    if col in analysis_df.columns:
        analysis_df[f'{key}_date'] = parse_date(analysis_df[col])
    else:
        analysis_df[f'{key}_date'] = pd.NaT

# Parse STD (Standard Time in Days)
std_col = OTHER_COLUMNS.get('std')
if std_col and std_col in analysis_df.columns:
    analysis_df['std_days'] = pd.to_numeric(analysis_df[std_col], errors='coerce').fillna(0)
else:
    analysis_df['std_days'] = 0

print("\n✅ Dates parsed successfully!")

In [None]:
# Calculate time intervals (in days)

# Time to Acknowledge: Acknowledged - Added
analysis_df['time_to_ack'] = (analysis_df['acknowledged_date'] - analysis_df['added_date']).dt.days

# Time to Submit: Submitted - Acknowledged
analysis_df['time_to_submit'] = (analysis_df['submitted_date'] - analysis_df['acknowledged_date']).dt.days

# Time to Approve: Approved - Submitted
analysis_df['time_to_approve'] = (analysis_df['approved_date'] - analysis_df['submitted_date']).dt.days

# Total Cycle Time: Approved/Cancelled - Added
# Use the latest of approved/cancelled as terminal date
analysis_df['terminal_date'] = analysis_df[['approved_date', 'cancelled_date']].max(axis=1)
analysis_df['total_cycle_time'] = (analysis_df['terminal_date'] - analysis_df['added_date']).dt.days

# PO Duration
analysis_df['po_duration'] = (analysis_df['po_end_date'] - analysis_df['po_start_date']).dt.days

# STD Variance: Actual - Standard
analysis_df['std_variance'] = analysis_df['total_cycle_time'] - analysis_df['std_days']

# Clean up negative values (data quality issues)
time_cols = ['time_to_ack', 'time_to_submit', 'time_to_approve', 'total_cycle_time', 'po_duration']
for col in time_cols:
    analysis_df.loc[analysis_df[col] < 0, col] = np.nan

print("✅ Time intervals calculated!")

## 3. Processing Time Summary

In [None]:
# Calculate summary statistics
def calc_stats(series):
    clean = series.dropna()
    if len(clean) == 0:
        return {'count': 0, 'mean': 0, 'median': 0, 'p25': 0, 'p75': 0, 'max': 0}
    return {
        'count': len(clean),
        'mean': clean.mean(),
        'median': clean.median(),
        'p25': clean.quantile(0.25),
        'p75': clean.quantile(0.75),
        'max': clean.max()
    }

time_metrics = {
    'Time to Acknowledge': calc_stats(analysis_df['time_to_ack']),
    'Time to Submit': calc_stats(analysis_df['time_to_submit']),
    'Time to Approve': calc_stats(analysis_df['time_to_approve']),
    'Total Cycle Time': calc_stats(analysis_df['total_cycle_time']),
    'PO Duration': calc_stats(analysis_df['po_duration'])
}

print("="*80)
print("⏱️ PROCESSING TIME SUMMARY (in days)")
print("="*80)
print(f"{'Metric':<25} {'Count':>8} {'Mean':>8} {'Median':>8} {'P25':>8} {'P75':>8} {'Max':>8}")
print("-"*80)
for name, stats in time_metrics.items():
    if stats['count'] > 0:
        print(f"{name:<25} {stats['count']:>8,} {stats['mean']:>8.1f} {stats['median']:>8.1f} {stats['p25']:>8.1f} {stats['p75']:>8.1f} {stats['max']:>8.0f}")
print("="*80)

In [None]:
# KPI Indicators for average times
ack_mean = time_metrics['Time to Acknowledge']['mean']
submit_mean = time_metrics['Time to Submit']['mean']
approve_mean = time_metrics['Time to Approve']['mean']
cycle_mean = time_metrics['Total Cycle Time']['mean']

fig = make_subplots(
    rows=1, cols=4,
    specs=[[{'type': 'indicator'}]*4],
    subplot_titles=['Avg Acknowledge', 'Avg Submit', 'Avg Approve', 'Avg Total Cycle']
)

fig.add_trace(go.Indicator(
    mode="number",
    value=ack_mean,
    number={'suffix': ' days', 'font': {'size': 30, 'color': '#3498DB'}}
), row=1, col=1)

fig.add_trace(go.Indicator(
    mode="number",
    value=submit_mean,
    number={'suffix': ' days', 'font': {'size': 30, 'color': '#2ECC71'}}
), row=1, col=2)

fig.add_trace(go.Indicator(
    mode="number",
    value=approve_mean,
    number={'suffix': ' days', 'font': {'size': 30, 'color': '#E67E22'}}
), row=1, col=3)

fig.add_trace(go.Indicator(
    mode="number",
    value=cycle_mean,
    number={'suffix': ' days', 'font': {'size': 30, 'color': '#9B59B6'}}
), row=1, col=4)

fig.update_layout(height=200, title={'text': 'Average Processing Times', 'x': 0.5})
fig.show()

## 4. Stage-by-Stage Analysis

In [None]:
# Waterfall: Average time breakdown
stages = ['Time to Acknowledge', 'Time to Submit', 'Time to Approve']
avg_times = [ack_mean, submit_mean, approve_mean]

fig = go.Figure(go.Waterfall(
    name="Processing Time",
    orientation="v",
    measure=['relative', 'relative', 'relative', 'total'],
    x=stages + ['Total (Sum)'],
    y=avg_times + [0],
    connector={"line": {"color": "rgb(63, 63, 63)"}},
    text=[f"{t:.1f}d" for t in avg_times] + [f"{sum(avg_times):.1f}d"],
    textposition="outside",
    increasing={"marker": {"color": "#3498DB"}},
    totals={"marker": {"color": "#9B59B6"}}
))

fig.update_layout(
    title={'text': 'Cumulative Processing Time Breakdown', 'x': 0.5, 'font': {'size': 20}},
    yaxis_title='Days',
    height=400
)

fig.show()

In [None]:
# Box plots for time distributions
fig = go.Figure()

time_data = [
    ('Time to Ack', analysis_df['time_to_ack'].dropna(), '#3498DB'),
    ('Time to Submit', analysis_df['time_to_submit'].dropna(), '#2ECC71'),
    ('Time to Approve', analysis_df['time_to_approve'].dropna(), '#E67E22'),
    ('Total Cycle', analysis_df['total_cycle_time'].dropna(), '#9B59B6')
]

for name, data, color in time_data:
    if len(data) > 0:
        # Cap at 99th percentile for visualization
        cap = data.quantile(0.99)
        fig.add_trace(go.Box(
            y=data[data <= cap],
            name=name,
            marker_color=color,
            boxmean=True
        ))

fig.update_layout(
    title={'text': 'Processing Time Distribution (Box Plots)', 'x': 0.5, 'font': {'size': 20}},
    yaxis_title='Days',
    height=450,
    showlegend=False
)

fig.show()

## 5. Cycle Time Distribution

In [None]:
# Histogram: Total cycle time distribution
cycle_data = analysis_df['total_cycle_time'].dropna()

if len(cycle_data) > 0:
    # Cap at 99th percentile
    cap = cycle_data.quantile(0.99)
    cycle_capped = cycle_data[cycle_data <= cap]
    
    fig = go.Figure(data=[go.Histogram(
        x=cycle_capped,
        nbinsx=30,
        marker_color='#9B59B6'
    )])
    
    fig.update_layout(
        title={'text': 'Total Cycle Time Distribution', 'x': 0.5, 'font': {'size': 20}},
        xaxis_title='Days',
        yaxis_title='Number of Orders',
        height=400
    )
    
    # Add mean and median lines
    mean_val = cycle_data.mean()
    median_val = cycle_data.median()
    
    fig.add_vline(x=mean_val, line_dash="dash", line_color="red",
                  annotation_text=f"Mean: {mean_val:.1f}d")
    fig.add_vline(x=median_val, line_dash="dash", line_color="green",
                  annotation_text=f"Median: {median_val:.1f}d")
    
    fig.show()
else:
    print("⚠️ No cycle time data available")

## 6. Processing Time by Product

In [None]:
# Average processing time by product
product_col = OTHER_COLUMNS.get('product')

if product_col and product_col in analysis_df.columns:
    product_times = analysis_df.groupby(product_col).agg(
        count=('total_cycle_time', 'count'),
        avg_cycle=('total_cycle_time', 'mean'),
        avg_ack=('time_to_ack', 'mean'),
        avg_submit=('time_to_submit', 'mean'),
        avg_approve=('time_to_approve', 'mean')
    ).reset_index()
    
    # Filter to products with enough data
    product_times = product_times[product_times['count'] >= 5]
    product_times = product_times.nlargest(15, 'avg_cycle')
    product_times = product_times.sort_values('avg_cycle', ascending=True)
    
    fig = go.Figure(data=[go.Bar(
        y=product_times[product_col],
        x=product_times['avg_cycle'],
        orientation='h',
        marker_color='#9B59B6',
        text=product_times['avg_cycle'].apply(lambda x: f'{x:.1f}d'),
        textposition='outside'
    )])
    
    fig.update_layout(
        title={'text': 'Average Cycle Time by Product (Top 15 Slowest)', 'x': 0.5, 'font': {'size': 20}},
        xaxis_title='Average Days',
        yaxis_title='Product',
        height=500,
        margin={'l': 200}
    )
    
    fig.show()

In [None]:
# Stacked bar: Time breakdown by product
if product_col and product_col in analysis_df.columns and len(product_times) > 0:
    top_products = product_times.tail(10)  # Top 10 by cycle time
    
    fig = go.Figure()
    
    fig.add_trace(go.Bar(
        y=top_products[product_col],
        x=top_products['avg_ack'].fillna(0),
        name='Acknowledge',
        orientation='h',
        marker_color='#3498DB'
    ))
    
    fig.add_trace(go.Bar(
        y=top_products[product_col],
        x=top_products['avg_submit'].fillna(0),
        name='Submit',
        orientation='h',
        marker_color='#2ECC71'
    ))
    
    fig.add_trace(go.Bar(
        y=top_products[product_col],
        x=top_products['avg_approve'].fillna(0),
        name='Approve',
        orientation='h',
        marker_color='#E67E22'
    ))
    
    fig.update_layout(
        title={'text': 'Processing Time Breakdown by Product', 'x': 0.5, 'font': {'size': 20}},
        barmode='stack',
        xaxis_title='Days',
        yaxis_title='Product',
        height=450,
        margin={'l': 200},
        legend={'orientation': 'h', 'y': 1.1}
    )
    
    fig.show()

## 7. Bottleneck Analysis

In [None]:
# Identify bottleneck stage
stage_means = {
    'Acknowledge': ack_mean,
    'Submit': submit_mean,
    'Approve': approve_mean
}

bottleneck = max(stage_means, key=stage_means.get)
total_avg = sum(stage_means.values())

print("\n🔍 BOTTLENECK ANALYSIS")
print("="*60)
for stage, avg_time in sorted(stage_means.items(), key=lambda x: x[1], reverse=True):
    pct = (avg_time / total_avg * 100) if total_avg > 0 else 0
    indicator = "🚨" if stage == bottleneck else "  "
    print(f"{indicator} {stage:<20} {avg_time:>8.1f} days ({pct:>5.1f}% of cycle)")

print(f"\n⚠️ Bottleneck identified: {bottleneck} stage")
print(f"   Contributes {stage_means[bottleneck]/total_avg*100:.1f}% of total processing time")

In [None]:
# Bottleneck pie chart
colors = ['#FF6B6B' if s == bottleneck else '#4ECDC4' for s in stage_means.keys()]

fig = go.Figure(data=[go.Pie(
    labels=list(stage_means.keys()),
    values=list(stage_means.values()),
    hole=0.4,
    marker_colors=colors,
    textinfo='percent+label'
)])

fig.update_layout(
    title={'text': 'Time Distribution Across Stages', 'x': 0.5, 'font': {'size': 20}},
    annotations=[{'text': f'🚨<br>{bottleneck}', 'x': 0.5, 'y': 0.5, 'font_size': 14, 'showarrow': False}],
    height=400
)

fig.show()

## 8. STD Variance Analysis

In [None]:
# STD (Standard Time) variance analysis
std_variance = analysis_df['std_variance'].dropna()

if len(std_variance) > 0 and analysis_df['std_days'].sum() > 0:
    # Orders exceeding STD
    exceeds_std = (std_variance > 0).sum()
    within_std = (std_variance <= 0).sum()
    
    print("\n📊 STD (Standard Time) COMPLIANCE")
    print("="*60)
    print(f"   Orders with STD data: {len(std_variance):,}")
    print(f"   Within STD:           {within_std:,} ({within_std/len(std_variance)*100:.1f}%)")
    print(f"   Exceeds STD:          {exceeds_std:,} ({exceeds_std/len(std_variance)*100:.1f}%)")
    print(f"\n   Avg Variance:         {std_variance.mean():.1f} days")
    print(f"   Max Overrun:          {std_variance.max():.0f} days")
    
    # Histogram
    cap = std_variance.abs().quantile(0.95)
    variance_capped = std_variance[(std_variance > -cap) & (std_variance < cap)]
    
    fig = go.Figure(data=[go.Histogram(
        x=variance_capped,
        nbinsx=40,
        marker_color=np.where(variance_capped > 0, '#FF6B6B', '#4ECDC4').tolist()
    )])
    
    fig.add_vline(x=0, line_color="black", line_width=2, annotation_text="On Target")
    
    fig.update_layout(
        title={'text': 'STD Variance Distribution (Actual - Standard)', 'x': 0.5, 'font': {'size': 20}},
        xaxis_title='Variance (Days) - Negative = Early, Positive = Late',
        yaxis_title='Count',
        height=400
    )
    
    fig.show()
else:
    print("⚠️ No STD data available for variance analysis")

## 9. Slowest Orders

In [None]:
# Top 20 slowest orders
order_col = OTHER_COLUMNS.get('order_id')
status_col = OTHER_COLUMNS.get('status')

slowest = analysis_df[
    analysis_df['total_cycle_time'].notna()
].nlargest(20, 'total_cycle_time')

cols_to_show = [order_col, product_col, status_col, 'time_to_ack', 'time_to_submit', 'time_to_approve', 'total_cycle_time']
cols_to_show = [c for c in cols_to_show if c in slowest.columns or c in slowest.columns]

display_cols = [c if c in slowest.columns else None for c in cols_to_show]
display_cols = [c for c in display_cols if c]

slowest_display = slowest[display_cols].copy()
slowest_display.columns = ['Order ID', 'Product', 'Status', 'Ack (d)', 'Submit (d)', 'Approve (d)', 'Total (d)']

print("\n🐌 TOP 20 SLOWEST ORDERS")
print("="*100)
print(slowest_display.to_string(index=False))

## 10. Processing Time Trend

In [None]:
# Weekly average cycle time trend
trend_df = analysis_df[analysis_df['added_date'].notna() & analysis_df['total_cycle_time'].notna()].copy()
trend_df['week'] = trend_df['added_date'].dt.to_period('W').dt.start_time

weekly_times = trend_df.groupby('week').agg(
    avg_cycle=('total_cycle_time', 'mean'),
    count=('total_cycle_time', 'count')
).reset_index()

# Last 12 weeks
weekly_times = weekly_times.tail(12)

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=weekly_times['week'],
    y=weekly_times['avg_cycle'],
    mode='lines+markers',
    name='Avg Cycle Time',
    line=dict(color='#9B59B6', width=3)
))

fig.update_layout(
    title={'text': 'Weekly Average Cycle Time Trend', 'x': 0.5, 'font': {'size': 20}},
    xaxis_title='Week',
    yaxis_title='Average Days',
    height=400
)

# Add overall mean line
overall_mean = analysis_df['total_cycle_time'].mean()
fig.add_hline(y=overall_mean, line_dash="dash", line_color="gray",
              annotation_text=f"Overall Avg: {overall_mean:.1f}d")

fig.show()

## 11. Export Results

In [None]:
# Export to Excel
export_filename = f"processing_time_analysis_{datetime.now().strftime('%Y%m%d_%H%M')}.xlsx"

with pd.ExcelWriter(export_filename, engine='openpyxl') as writer:
    # Summary statistics
    summary_rows = []
    for name, stats in time_metrics.items():
        summary_rows.append({
            'Metric': name,
            'Count': stats['count'],
            'Mean (days)': round(stats['mean'], 1),
            'Median (days)': round(stats['median'], 1),
            'P25': round(stats['p25'], 1),
            'P75': round(stats['p75'], 1),
            'Max': round(stats['max'], 0)
        })
    pd.DataFrame(summary_rows).to_excel(writer, sheet_name='Summary', index=False)
    
    # By Product
    if 'product_times' in dir():
        product_times.to_excel(writer, sheet_name='By Product', index=False)
    
    # Slowest orders
    slowest_display.to_excel(writer, sheet_name='Slowest Orders', index=False)

print(f"\n✅ Results exported to: {export_filename}")
# files.download() - uncomment if using Colab
# files.download(export_filename)

---

## 📋 Summary

This notebook analyzed processing times and operational efficiency:

| Metric | Description |
|--------|-------------|
| **Time to Acknowledge** | Days from Added to Acknowledged |
| **Time to Submit** | Days from Acknowledged to Submitted |
| **Time to Approve** | Days from Submitted to Approved |
| **Total Cycle Time** | Days from Added to Terminal status |
| **STD Variance** | Actual cycle time vs Standard time |

### Key Insights
1. Focus improvements on the bottleneck stage
2. Investigate products with long cycle times
3. Address orders exceeding STD targets
4. Track trend improvements over time