# Data Exploration - Greek Traditional Motifs

This notebook provides an overview of the collected Greek motif dataset and helps identify patterns, gaps, and quality issues.

## Goals
1. Visualize dataset distribution
2. Check image quality metrics
3. Analyze metadata completeness
4. Identify missing categories or underrepresented regions
5. Generate dataset statistics for paper


In [None]:
import sys
from pathlib import Path
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
from collections import Counter

# Add project root to path
project_root = Path.cwd().parent
sys.path.insert(0, str(project_root))

from src.utils.config import get_paths

# Set up paths
paths = get_paths()

# Configure plotting
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')
%matplotlib inline

print(f"Project root: {paths.root}")
print(f"Data directory: {paths.data_raw}")


## 1. Load Dataset

Find all images and annotations in the data directory.


In [None]:
# Find all images in raw data directory
image_extensions = {'.png', '.jpg', '.jpeg', '.tif', '.tiff'}
image_files = []

if paths.data_raw.exists():
    for ext in image_extensions:
        image_files.extend(paths.data_raw.rglob(f'*{ext}'))
else:
    print(f"⚠️  Data directory not found: {paths.data_raw}")
    print("Create it and add your Greek motif images there!")

print(f"Found {len(image_files)} images")

# Find all annotation files
annotation_files = []
if paths.data_annotations.exists():
    annotation_files = [f for f in paths.data_annotations.rglob('*.json') if f.name != 'template.json']

print(f"Found {len(annotation_files)} annotation files")

if len(image_files) == 0:
    print("\n" + "="*60)
    print("NO IMAGES FOUND - START HERE:")
    print("="*60)
    print("1. Add Greek motif images to:", paths.data_raw)
    print("2. Organize by region and type (see NEXT_STEPS.md)")
    print("3. Re-run this notebook to see statistics")
elif len(annotation_files) == 0:
    print("\n⚠️  You have images but no annotations yet!")
    print(f"   Use the template at: {paths.data_annotations / 'template.json'}")
