# Point Cloud Quality Visualization

This notebook demonstrates how to visualize the quality metrics and results from the point cloud quality checks. It provides visualizations for:

1. Overall quality distribution
2. Quality metrics over time
3. Common failure patterns
4. 3D visualization of point clouds with quality issues

## Setup

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from pyspark.sql import functions as F

# Set plot style
plt.style.use('ggplot')
sns.set(style="whitegrid")

# Configure plot size
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['figure.dpi'] = 100

## Load Quality Metrics Data

First, let's load the quality metrics data from our Delta Live Tables pipeline.

In [None]:
# Load quality metrics data
quality_metrics_df = spark.table("point_cloud_quality_metrics").toPandas()

# Display basic statistics
print(f"Total point clouds: {len(quality_metrics_df)}")
print(f"Valid point clouds: {quality_metrics_df[quality_metrics_df['quality_score'] == 1.0].shape[0]}")
print(f"Invalid point clouds: {quality_metrics_df[quality_metrics_df['quality_score'] < 1.0].shape[0]}")
print(f"Average quality score: {quality_metrics_df['quality_score'].mean():.2f}")

# Display the first few rows
quality_metrics_df.head()

## 1. Overall Quality Distribution

Let's visualize the distribution of quality scores across all point clouds.

In [None]:
# Create a histogram of quality scores
plt.figure(figsize=(12, 6))
sns.histplot(quality_metrics_df['quality_score'], bins=6, kde=True)
plt.title('Distribution of Point Cloud Quality Scores', fontsize=16)
plt.xlabel('Quality Score (0-1)', fontsize=14)
plt.ylabel('Count', fontsize=14)
plt.xticks(np.arange(0, 1.1, 0.2))
plt.grid(True, alpha=0.3)
plt.show()

# Create a pie chart of pass/fail ratio
pass_fail_counts = quality_metrics_df['quality_score'].apply(lambda x: 'Pass' if x == 1.0 else 'Fail').value_counts()
plt.figure(figsize=(10, 10))
plt.pie(pass_fail_counts, labels=pass_fail_counts.index, autopct='%1.1f%%', startangle=90, colors=['#4CAF50', '#F44336'])
plt.title('Pass/Fail Ratio for Point Cloud Quality Checks', fontsize=16)
plt.axis('equal')
plt.show()

## 2. Quality Metrics Over Time

Now, let's analyze how quality metrics have changed over time.

In [None]:
# Load quality history data
quality_history_df = spark.table("point_cloud_quality_history").toPandas()

# Convert processing_date to datetime
quality_history_df['processing_date'] = pd.to_datetime(quality_history_df['processing_date'])

# Sort by date
quality_history_df = quality_history_df.sort_values('processing_date')

# Calculate success rates
quality_history_df['ground_plane_success_rate'] = quality_history_df['valid_ground_plane_count'] / quality_history_df['total_point_clouds']
quality_history_df['density_success_rate'] = quality_history_df['valid_density_count'] / quality_history_df['total_point_clouds']
quality_history_df['noise_success_rate'] = quality_history_df['valid_noise_level_count'] / quality_history_df['total_point_clouds']
quality_history_df['arc_success_rate'] = quality_history_df['no_arc_distortion_count'] / quality_history_df['total_point_clouds']
quality_history_df['completeness_success_rate'] = quality_history_df['complete_point_cloud_count'] / quality_history_df['total_point_clouds']

# Plot average quality score over time
plt.figure(figsize=(14, 7))
plt.plot(quality_history_df['processing_date'], quality_history_df['avg_quality_score'], marker='o', linewidth=2)
plt.title('Average Point Cloud Quality Score Over Time', fontsize=16)
plt.xlabel('Date', fontsize=14)
plt.ylabel('Average Quality Score', fontsize=14)
plt.grid(True, alpha=0.3)
plt.ylim(0, 1.05)
plt.tight_layout()
plt.show()

# Plot success rates for each quality check over time
plt.figure(figsize=(14, 8))
plt.plot(quality_history_df['processing_date'], quality_history_df['ground_plane_success_rate'], marker='o', label='Ground Plane Orientation')
plt.plot(quality_history_df['processing_date'], quality_history_df['density_success_rate'], marker='s', label='Density Distribution')
plt.plot(quality_history_df['processing_date'], quality_history_df['noise_success_rate'], marker='^', label='Noise Level')
plt.plot(quality_history_df['processing_date'], quality_history_df['arc_success_rate'], marker='d', label='Arc Distortion')
plt.plot(quality_history_df['processing_date'], quality_history_df['completeness_success_rate'], marker='*', label='Completeness')
plt.title('Success Rates by Quality Check Over Time', fontsize=16)
plt.xlabel('Date', fontsize=14)
plt.ylabel('Success Rate', fontsize=14)
plt.ylim(0, 1.05)
plt.legend(fontsize=12)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 3. Common Failure Patterns

Let's analyze the most common failure patterns in our point clouds.

In [None]:
# Load failure patterns data
failure_patterns_df = spark.table("point_cloud_failure_patterns").toPandas()

# Sort by count in descending order
failure_patterns_df = failure_patterns_df.sort_values('pattern_count', ascending=False)

# Create a bar chart of failure patterns
plt.figure(figsize=(14, 8))
sns.barplot(x='pattern_count', y='failure_pattern', data=failure_patterns_df, palette='viridis')
plt.title('Common Point Cloud Failure Patterns', fontsize=16)
plt.xlabel('Count', fontsize=14)
plt.ylabel('Failure Pattern', fontsize=14)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Create a pie chart of failure patterns
plt.figure(figsize=(12, 12))
plt.pie(failure_patterns_df['pattern_count'], labels=failure_patterns_df['failure_pattern'], 
        autopct='%1.1f%%', startangle=90, shadow=True)
plt.title('Distribution of Failure Patterns', fontsize=16)
plt.axis('equal')
plt.tight_layout()
plt.show()

## 4. 3D Visualization of Point Clouds

Now, let's visualize some example point clouds with quality issues to better understand the problems.

In [None]:
# Function to parse points from JSON and create a 3D scatter plot
def visualize_point_cloud(points_json, title):
    import json
    
    # Parse points from JSON
    points = json.loads(points_json)
    
    # Extract x, y, z coordinates
    xs = [p['x'] for p in points]
    ys = [p['y'] for p in points]
    zs = [p['z'] for p in points]
    
    # Create 3D scatter plot
    fig = go.Figure(data=[go.Scatter3d(
        x=xs,
        y=ys,
        z=zs,
        mode='markers',
        marker=dict(
            size=2,
            color=zs,  # Color points by height
            colorscale='Viridis',
            opacity=0.8
        )
    )])
    
    # Update layout
    fig.update_layout(
        title=title,
        scene=dict(
            xaxis_title='X',
            yaxis_title='Y',
            zaxis_title='Z',
            aspectmode='data'  # Preserve aspect ratio
        ),
        width=800,
        height=800,
        margin=dict(l=0, r=0, b=0, t=40)
    )
    
    return fig

In [None]:
# Get example point clouds with different quality issues
# Note: This assumes you have a table with point clouds and their quality metrics

# Example with ground plane orientation issue
ground_plane_issue = spark.table("invalid_point_clouds") \
    .filter("ground_plane_valid = false") \
    .limit(1) \
    .toPandas()

if not ground_plane_issue.empty:
    fig = visualize_point_cloud(ground_plane_issue.iloc[0]['points'], 'Point Cloud with Ground Plane Orientation Issue')
    fig.show()

# Example with noise issue
noise_issue = spark.table("invalid_point_clouds") \
    .filter("noise_level_valid = false") \
    .limit(1) \
    .toPandas()

if not noise_issue.empty:
    fig = visualize_point_cloud(noise_issue.iloc[0]['points'], 'Point Cloud with Excessive Noise')
    fig.show()

# Example with arc distortion
arc_issue = spark.table("invalid_point_clouds") \
    .filter("no_arc_distortion = false") \
    .limit(1) \
    .toPandas()

if not arc_issue.empty:
    fig = visualize_point_cloud(arc_issue.iloc[0]['points'], 'Point Cloud with Arc Distortion')
    fig.show()

# Example of a good point cloud for comparison
good_example = spark.table("valid_point_clouds") \
    .limit(1) \
    .toPandas()

if not good_example.empty:
    fig = visualize_point_cloud(good_example.iloc[0]['points'], 'Example of a Good Quality Point Cloud')
    fig.show()

## 5. Correlation Between Quality Metrics

Let's analyze if there are correlations between different quality metrics.

In [None]:
# Create a correlation matrix
quality_cols = ['ground_plane_valid', 'density_distribution_valid', 'noise_level_valid', 'no_arc_distortion', 'completeness_valid']
corr_df = quality_metrics_df[quality_cols].astype(int).corr()

# Plot correlation heatmap
plt.figure(figsize=(12, 10))
sns.heatmap(corr_df, annot=True, cmap='coolwarm', vmin=-1, vmax=1, center=0, square=True, linewidths=.5)
plt.title('Correlation Between Quality Metrics', fontsize=16)
plt.tight_layout()
plt.show()

## 6. Quality Metrics by Capture Device or Location

If your data includes information about the capture device or location, you can analyze quality metrics by these factors.

In [None]:
# This assumes your data has 'device_type' and 'location' columns
# If not, you can skip this section or adapt it to your data

# Check if these columns exist
if 'device_type' in quality_metrics_df.columns and 'location' in quality_metrics_df.columns:
    # Quality by device type
    plt.figure(figsize=(14, 7))
    sns.boxplot(x='device_type', y='quality_score', data=quality_metrics_df)
    plt.title('Point Cloud Quality by Device Type', fontsize=16)
    plt.xlabel('Device Type', fontsize=14)
    plt.ylabel('Quality Score', fontsize=14)
    plt.grid(True, alpha=0.3)
    plt.show()
    
    # Quality by location
    plt.figure(figsize=(14, 7))
    sns.boxplot(x='location', y='quality_score', data=quality_metrics_df)
    plt.title('Point Cloud Quality by Location', fontsize=16)
    plt.xlabel('Location', fontsize=14)
    plt.ylabel('Quality Score', fontsize=14)
    plt.xticks(rotation=45)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

## 7. Recommendations Based on Analysis

Based on the analysis, we can provide recommendations for improving point cloud quality.

In [None]:
# Calculate failure rates for each check
failure_rates = {
    'Ground Plane Orientation': (quality_metrics_df['ground_plane_valid'] == False).mean(),
    'Density Distribution': (quality_metrics_df['density_distribution_valid'] == False).mean(),
    'Noise Level': (quality_metrics_df['noise_level_valid'] == False).mean(),
    'Arc Distortion': (quality_metrics_df['no_arc_distortion'] == False).mean(),
    'Completeness': (quality_metrics_df['completeness_valid'] == False).mean()
}

# Sort by failure rate in descending order
sorted_failure_rates = {k: v for k, v in sorted(failure_rates.items(), key=lambda item: item[1], reverse=True)}

# Create a bar chart of failure rates
plt.figure(figsize=(14, 7))
plt.bar(sorted_failure_rates.keys(), sorted_failure_rates.values(), color='#FF5722')
plt.title('Failure Rates by Quality Check', fontsize=16)
plt.xlabel('Quality Check', fontsize=14)
plt.ylabel('Failure Rate', fontsize=14)
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Print recommendations based on the most common issues
print("Recommendations for Improving Point Cloud Quality:")
print("\nBased on the analysis, the following areas need the most attention (in order of priority):")

for i, (check, rate) in enumerate(sorted_failure_rates.items(), 1):
    if rate > 0.05:  # Only show recommendations for checks with >5% failure rate
        print(f"\n{i}. {check} (Failure Rate: {rate:.1%})")
        
        if check == 'Ground Plane Orientation':
            print("   - Ensure the camera is held level during capture")
            print("   - Use a tripod or stabilizer for more consistent orientation")
            print("   - Check calibration of capture devices")
            
        elif check == 'Density Distribution':
            print("   - Ensure even coverage when capturing images around the object")
            print("   - Maintain consistent distance from the object during capture")
            print("   - Consider using more capture positions for complex objects")
            
        elif check == 'Noise Level':
            print("   - Use better lighting conditions to reduce noise")
            print("   - Ensure the camera lens is clean")
            print("   - Consider using cameras with better low-light performance")
            
        elif check == 'Arc Distortion':
            print("   - Ensure complete 360° coverage around the object")
            print("   - Maintain consistent height during capture")
            print("   - Check for calibration issues in the reconstruction software")
            
        elif check == 'Completeness':
            print("   - Ensure all parts of the object are captured, including top and bottom")
            print("   - Use sufficient overlap between images")
            print("   - Consider using more capture positions for complex objects")