# Order Data Analysis

This notebook processes and analyzes order data from a JSON structure to extract insights about orders, shipments, and revenue.

## Load JSON Data

Load the provided JSON data into a Python dictionary using the `json` module.

In [None]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Sample JSON data structure (replace with your actual JSON data)
json_data = '''
{
    "orders": [
        {
            "orderNumber": "ORD-001",
            "customerName": "John Doe",
            "totalPrice": 150.99,
            "shipmentStatus": "delivered",
            "fastDelivery": true
        },
        {
            "orderNumber": "ORD-002",
            "customerName": "Jane Smith",
            "totalPrice": 89.50,
            "shipmentStatus": "shipped",
            "fastDelivery": false
        },
        {
            "orderNumber": "ORD-003",
            "customerName": "Bob Johnson",
            "totalPrice": 220.75,
            "shipmentStatus": "processing",
            "fastDelivery": true
        },
        {
            "orderNumber": "ORD-004",
            "customerName": "Alice Brown",
            "totalPrice": 45.25,
            "shipmentStatus": "delivered",
            "fastDelivery": false
        },
        {
            "orderNumber": "ORD-005",
            "customerName": "Charlie Wilson",
            "totalPrice": 312.00,
            "shipmentStatus": "shipped",
            "fastDelivery": true
        }
    ]
}
'''

# Load JSON data into Python dictionary
data = json.loads(json_data)
print("JSON data loaded successfully!")
print(f"Number of orders: {len(data['orders'])}")

## Extract and Display Order Details

Extract key details such as order numbers, customer names, and total prices, and display them in a tabular format using pandas.

In [None]:
# Extract order details into a pandas DataFrame
orders_df = pd.DataFrame(data['orders'])

# Display the order details
print("Order Details:")
print("=" * 50)
display(orders_df)

# Display basic information about the dataset
print("\nDataset Info:")
print(f"Total orders: {len(orders_df)}")
print(f"Columns: {list(orders_df.columns)}")
print(f"Data types:\n{orders_df.dtypes}")

## Analyze Shipment Status

Analyze the shipment statuses and count the number of orders in each status category.

In [None]:
# Analyze shipment status
shipment_status_counts = orders_df['shipmentStatus'].value_counts()

print("Shipment Status Analysis:")
print("=" * 30)
print(shipment_status_counts)

# Create a visualization of shipment statuses
plt.figure(figsize=(10, 6))
plt.subplot(1, 2, 1)
shipment_status_counts.plot(kind='bar', color='skyblue')
plt.title('Orders by Shipment Status')
plt.xlabel('Shipment Status')
plt.ylabel('Number of Orders')
plt.xticks(rotation=45)

plt.subplot(1, 2, 2)
shipment_status_counts.plot(kind='pie', autopct='%1.1f%%', colors=['lightcoral', 'lightblue', 'lightgreen'])
plt.title('Shipment Status Distribution')
plt.ylabel('')

plt.tight_layout()
plt.show()

# Display percentage distribution
print("\nShipment Status Percentage:")
for status, count in shipment_status_counts.items():
    percentage = (count / len(orders_df)) * 100
    print(f"{status}: {count} orders ({percentage:.1f}%)")

## Calculate Total Revenue

Calculate the total revenue by summing up the `totalPrice` values for all orders.

In [None]:
# Calculate total revenue
total_revenue = orders_df['totalPrice'].sum()
average_order_value = orders_df['totalPrice'].mean()
min_order_value = orders_df['totalPrice'].min()
max_order_value = orders_df['totalPrice'].max()

print("Revenue Analysis:")
print("=" * 20)
print(f"Total Revenue: ${total_revenue:.2f}")
print(f"Average Order Value: ${average_order_value:.2f}")
print(f"Minimum Order Value: ${min_order_value:.2f}")
print(f"Maximum Order Value: ${max_order_value:.2f}")

# Revenue by shipment status
revenue_by_status = orders_df.groupby('shipmentStatus')['totalPrice'].agg(['sum', 'mean', 'count'])
revenue_by_status.columns = ['Total Revenue', 'Average Order Value', 'Order Count']

print("\nRevenue by Shipment Status:")
print(revenue_by_status)

# Visualize revenue distribution
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
orders_df['totalPrice'].hist(bins=10, color='lightgreen', alpha=0.7)
plt.title('Distribution of Order Values')
plt.xlabel('Order Value ($)')
plt.ylabel('Frequency')

plt.subplot(1, 2, 2)
revenue_by_status['Total Revenue'].plot(kind='bar', color='orange')
plt.title('Total Revenue by Shipment Status')
plt.xlabel('Shipment Status')
plt.ylabel('Revenue ($)')
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

## Filter Fast Delivery Orders

Filter and display orders where `fastDelivery` is true.

In [None]:
# Filter orders with fast delivery
fast_delivery_orders = orders_df[orders_df['fastDelivery'] == True]

print("Fast Delivery Orders:")
print("=" * 25)
display(fast_delivery_orders)

# Analysis of fast delivery orders
fast_delivery_count = len(fast_delivery_orders)
fast_delivery_percentage = (fast_delivery_count / len(orders_df)) * 100
fast_delivery_revenue = fast_delivery_orders['totalPrice'].sum()
fast_delivery_avg_value = fast_delivery_orders['totalPrice'].mean()

print(f"\nFast Delivery Statistics:")
print(f"Number of fast delivery orders: {fast_delivery_count}")
print(f"Percentage of total orders: {fast_delivery_percentage:.1f}%")
print(f"Revenue from fast delivery orders: ${fast_delivery_revenue:.2f}")
print(f"Average value of fast delivery orders: ${fast_delivery_avg_value:.2f}")

# Compare fast delivery vs regular delivery
delivery_comparison = orders_df.groupby('fastDelivery').agg({
    'totalPrice': ['sum', 'mean', 'count'],
    'orderNumber': 'count'
}).round(2)

print("\nDelivery Type Comparison:")
print(delivery_comparison)

# Visualize fast delivery analysis
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
fast_delivery_status = orders_df['fastDelivery'].value_counts()
plt.pie(fast_delivery_status.values, labels=['Regular Delivery', 'Fast Delivery'], autopct='%1.1f%%', 
        colors=['lightcoral', 'lightgreen'])
plt.title('Fast Delivery vs Regular Delivery')

plt.subplot(1, 2, 2)
delivery_revenue = orders_df.groupby('fastDelivery')['totalPrice'].sum()
delivery_revenue.plot(kind='bar', color=['lightcoral', 'lightgreen'])
plt.title('Revenue by Delivery Type')
plt.xlabel('Fast Delivery')
plt.ylabel('Revenue ($)')
plt.xticks([0, 1], ['Regular', 'Fast'], rotation=0)

plt.tight_layout()
plt.show()

## Summary

This analysis provides insights into:
- Order distribution across different shipment statuses
- Total revenue and average order values
- Fast delivery preferences and their impact on revenue
- Customer ordering patterns

Key findings can be used to optimize delivery strategies and improve customer satisfaction.