# iOS World - Data Exploration

This notebook is for exploring captured transition data and understanding patterns.

In [None]:
import json
from pathlib import Path
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
from collections import Counter

## Load Dataset

In [None]:
# Path to processed data
data_dir = Path('../data/processed')

# Load index
with open(data_dir / 'index.json', 'r') as f:
    index = json.load(f)

print(f"Total transitions: {index['total_transitions']}")
print(f"\nSplits:")
for split, count in index['splits'].items():
    print(f"  {split}: {count}")

print(f"\nApps:")
for app in index['apps']:
    print(f"  {app['bundle_id']}: {app['transitions']}")

print(f"\nAction distribution:")
for action, count in sorted(index['action_distribution'].items(), key=lambda x: -x[1]):
    print(f"  {action}: {count}")

## Visualize Action Distribution

In [None]:
actions = list(index['action_distribution'].keys())
counts = list(index['action_distribution'].values())

plt.figure(figsize=(10, 6))
plt.bar(actions, counts)
plt.xlabel('Action Type')
plt.ylabel('Count')
plt.title('Distribution of Action Types')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## Load and Display Sample Transitions

In [None]:
def load_transition(split, index):
    """Load a transition by split and index."""
    transition_file = data_dir / split / f"{split}_{index:06d}.json"
    
    with open(transition_file, 'r') as f:
        transition = json.load(f)
    
    # Load images
    before_img = Image.open(data_dir / split / transition['before_state']['screenshot'])
    after_img = Image.open(data_dir / split / transition['after_state']['screenshot'])
    
    return transition, before_img, after_img

def display_transition(transition, before_img, after_img):
    """Display a transition with before/after images and metadata."""
    fig, axes = plt.subplots(1, 2, figsize=(12, 6))
    
    # Display images
    axes[0].imshow(before_img)
    axes[0].set_title('Before')
    axes[0].axis('off')
    
    axes[1].imshow(after_img)
    axes[1].set_title('After')
    axes[1].axis('off')
    
    plt.suptitle(f"Action: {transition['action']['type']}")
    plt.tight_layout()
    plt.show()
    
    # Print metadata
    print(f"Transition ID: {transition['transition_id']}")
    print(f"App: {transition['app']['name']}")
    print(f"Action Type: {transition['action']['type']}")
    
    if transition['action'].get('element'):
        element = transition['action']['element']
        print(f"Target Element: {element.get('type')} - {element.get('label')}")
    
    print(f"Quality - Stability: {transition['quality']['stability_score']:.2f}")
    print(f"Duration: {transition['timing']['total_duration_ms']}ms")

In [None]:
# Display first transition from training set
transition, before, after = load_transition('train', 0)
display_transition(transition, before, after)

## Analyze Image Statistics

In [None]:
# Sample some images to analyze
sample_size = 100
image_sizes = []
image_means = []
image_stds = []

for i in range(min(sample_size, index['splits']['train'])):
    _, before, after = load_transition('train', i)
    
    before_array = np.array(before)
    after_array = np.array(after)
    
    image_sizes.append(before.size)
    image_means.append(before_array.mean())
    image_stds.append(before_array.std())

print(f"Image sizes: {set(image_sizes)}")
print(f"Mean pixel value: {np.mean(image_means):.2f} ± {np.std(image_means):.2f}")
print(f"Pixel std dev: {np.mean(image_stds):.2f} ± {np.std(image_stds):.2f}")

## Analyze Transition Quality

In [None]:
# Load quality metrics from all transitions
stability_scores = []
durations = []

for i in range(min(1000, index['splits']['train'])):
    transition, _, _ = load_transition('train', i)
    
    stability_scores.append(transition['quality']['stability_score'])
    durations.append(transition['timing']['total_duration_ms'])

# Plot distributions
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

axes[0].hist(stability_scores, bins=20)
axes[0].set_xlabel('Stability Score')
axes[0].set_ylabel('Count')
axes[0].set_title('Distribution of Stability Scores')

axes[1].hist(durations, bins=20)
axes[1].set_xlabel('Duration (ms)')
axes[1].set_ylabel('Count')
axes[1].set_title('Distribution of Transition Durations')

plt.tight_layout()
plt.show()

print(f"Stability score: {np.mean(stability_scores):.3f} ± {np.std(stability_scores):.3f}")
print(f"Duration: {np.mean(durations):.1f}ms ± {np.std(durations):.1f}ms")

## Browse Transitions by Action Type

In [None]:
def find_transitions_by_action(split, action_type, max_results=10):
    """Find transitions with a specific action type."""
    results = []
    
    for i in range(index['splits'][split]):
        transition, before, after = load_transition(split, i)
        
        if transition['action']['type'] == action_type:
            results.append((i, transition, before, after))
            
            if len(results) >= max_results:
                break
    
    return results

In [None]:
# Find and display some "tap" transitions
tap_transitions = find_transitions_by_action('train', 'tap', max_results=3)

for idx, transition, before, after in tap_transitions:
    print(f"\n--- Transition {idx} ---")
    display_transition(transition, before, after)

## Compute Visual Difference Between Before/After

In [None]:
def compute_visual_diff(before_img, after_img):
    """Compute visual difference between two images."""
    before_array = np.array(before_img).astype(float)
    after_array = np.array(after_img).astype(float)
    
    # Absolute difference
    diff = np.abs(after_array - before_array)
    
    # Convert to grayscale for visualization
    diff_gray = diff.mean(axis=2)
    
    return diff_gray

def display_with_diff(transition, before_img, after_img):
    """Display before, after, and difference."""
    diff = compute_visual_diff(before_img, after_img)
    
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    
    axes[0].imshow(before_img)
    axes[0].set_title('Before')
    axes[0].axis('off')
    
    axes[1].imshow(after_img)
    axes[1].set_title('After')
    axes[1].axis('off')
    
    axes[2].imshow(diff, cmap='hot')
    axes[2].set_title('Difference (hotter = more change)')
    axes[2].axis('off')
    
    plt.suptitle(f"Action: {transition['action']['type']}")
    plt.tight_layout()
    plt.show()
    
    # Print change statistics
    percent_changed = (diff > 10).sum() / diff.size * 100
    print(f"Pixels changed (>10 intensity): {percent_changed:.1f}%")
    print(f"Mean difference: {diff.mean():.2f}")
    print(f"Max difference: {diff.max():.2f}")

In [None]:
# Display a transition with difference map
transition, before, after = load_transition('train', 0)
display_with_diff(transition, before, after)

## Next Steps

- Explore more transitions
- Analyze patterns in different action types
- Identify common UI changes
- Use insights to inform model architecture