## PyroQ Data Exploration
## Thermal Satellite Imagery Analysis for Wildfire Detection
This notebook explores:
- Dataset loading and statistics
- Visual comparison of fire vs. non-fire thermal images
- Temperature distribution analysis


In [None]:
##CELL 2 Imports
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys
sys.path.append('../')

from src.data.dataset import ThermalAnomalyDataset
from src.classical.preprocessing import ThermalPreprocessor

# Visualization settings
plt.style.use('seaborn-v0_8')
sns.set_palette('viridis')
%matplotlib inline

In [None]:
## CELL 3 DATA LOADING
data_path = '../data/patches/'

if Path(data_path).exists():
    dataset = ThermalAnomalyDataset(data_path, split='train')
    print(f"Dataset size: {len(dataset)}")
    print(f"Image shape: {dataset.images[0].shape}")
    print(f"Label distribution:\n0 (No Fire): {np.sum(dataset.labels == 0)}\n1 (Fire): {np.sum(dataset.labels == 1)}")
else:
    print("Data not found. Creating sample data...")
    from src.data.modis_api import create_sample_data
    create_sample_data('../data/raw', num_samples=1000)
    dataset = ThermalAnomalyDataset('../data/raw', split='train')

## CELL 4
### Fire vs. Normal Thermal Signatures
Comparison of 5 fire samples (top) vs. 5 normal samples (bottom):

In [None]:


fig, axes = plt.subplots(2, 5, figsize=(20, 8))

# Fire samples (label=1)
fire_indices = np.where(dataset.labels == 1)[0][:5]
for i, idx in enumerate(fire_indices):
    img = dataset.images[idx]
    if len(img.shape) == 3:
        img = img[0]  # Use first channel
    axes[0, i].imshow(img, cmap='hot', vmin=300, vmax=400)
    axes[0, i].set_title(f'Fire #{i+1}\n{np.mean(img):.1f}K')
    axes[0, i].axis('off')

# No-fire samples (label=0)
no_fire_indices = np.where(dataset.labels == 0)[0][:5]
for i, idx in enumerate(no_fire_indices):
    img = dataset.images[idx]
    if len(img.shape) == 3:
        img = img[0]
    axes[1, i].imshow(img, cmap='hot', vmin=300, vmax=400)
    axes[1, i].set_title(f'Normal #{i+1}\n{np.mean(img):.1f}K')
    axes[1, i].axis('off')

plt.suptitle('Thermal Image Comparison (Fire vs Normal)', y=1.05)
plt.tight_layout()
plt.show()

## CELL 5 TEMPERATURE ANALYSIS
## Statistical Temperature Analysis
Key metrics comparing pixel temperatures:
- Histogram showing distribution overlap
- Boxplot comparing statistical properties

In [None]:
fire_pixels = []
normal_pixels = []

for img, label in zip(dataset.images, dataset.labels):
    if len(img.shape) == 3:
        img = img[0]
    if label == 1:
        fire_pixels.extend(img.flatten())
    else:
        normal_pixels.extend(img.flatten())

# Visualization
plt.figure(figsize=(14, 5))

# Histogram
plt.subplot(1, 2, 1)
plt.hist([normal_pixels, fire_pixels], 
         bins=50, 
         label=['Normal', 'Fire'],
         color=['blue', 'red'],
         alpha=0.6,
         density=True)
plt.xlabel('Temperature (K)')
plt.ylabel('Density')
plt.title('Temperature Distribution')
plt.legend()

# Boxplot
plt.subplot(1, 2, 2)
plt.boxplot([normal_pixels, fire_pixels],
            labels=['Normal', 'Fire'],
            patch_artist=True,
            boxprops=dict(facecolor='blue', alpha=0.5),
            medianprops=dict(color='black'))
plt.ylabel('Temperature (K)')
plt.title('Temperature Statistics')

plt.tight_layout()
plt.show()

# Metrics
print(f"[Fire Areas]\nMean: {np.mean(fire_pixels):.2f}K | Max: {np.max(fire_pixels):.2f}K | σ: {np.std(fire_pixels):.2f}")
print(f"[Normal Areas]\nMean: {np.mean(normal_pixels):.2f}K | Max: {np.max(normal_pixels):.2f}K | σ: {np.std(normal_pixels):.2f}")