In [None]:
import numpy as np\n
import matplotlib.pyplot as plt\n
import seaborn as sns\n
import torch\n
from pathlib import Path\n
\n
# Import SITS-Former modules\n
from sitsformer.data import DummySatelliteDataset, create_dataloader\n
from sitsformer.utils import load_config\n
\n
# Set up plotting\n
plt.style.use('default')\n
sns.set_palette('tab10')\n
%matplotlib inline

## Load Configuration\n
\n
First, let's load the default configuration to understand the data parameters.

In [None]:
# Load configuration\n
config_path = Path('../configs/default.yaml')\n
config = load_config(config_path)\n
\n
print('Data Configuration:')\n
for key, value in config['data'].items():\n
    print(f'  {key}: {value}')\n
    \n
print('\\nModel Configuration:')\n
for key, value in config['model'].items():\n
    print(f'  {key}: {value}')

## Create Dummy Dataset\n
\n
Let's create a dummy dataset for exploration purposes.

In [None]:
# Create dummy dataset\n
dataset = DummySatelliteDataset(\n
    num_samples=1000,\n
    sequence_length=config['data']['sequence_length'],\n
    image_size=config['data']['image_size'],\n
    num_channels=config['model']['in_channels'],\n
    num_classes=config['model']['num_classes']\n
)\n
\n
print(f'Dataset size: {len(dataset)} samples')\n
print(f'Expected shape per sample: {dataset[0][0].shape}')\n
print(f'Data type: {dataset[0][0].dtype}')\n
print(f'Label range: {min(dataset[i][1] for i in range(100))} - {max(dataset[i][1] for i in range(100))}')

## Visualize Sample Data\n
\n
Let's examine a few samples from the dataset.

In [None]:
# Get a sample\n
sample_idx = 42\n
images, label = dataset[sample_idx]\n
\n
print(f'Sample {sample_idx}:')\n
print(f'  Images shape: {images.shape}')  # (seq_len, channels, height, width)\n
print(f'  Label: {label}')\n
\n
# Extract dimensions\n
seq_len, channels, height, width = images.shape\n
\n
print(f'  Sequence length: {seq_len}')\n
print(f'  Number of channels: {channels}')\n
print(f'  Image size: {height}x{width}')\n
print(f'  Value range: [{images.min():.3f}, {images.max():.3f}]')

In [None]:
# Visualize the first few time steps\n
fig, axes = plt.subplots(2, 4, figsize=(16, 8))\n
axes = axes.ravel()\n
\n
for i in range(8):\n
    # Take RGB channels (assuming first 3 are RGB-like)\n
    rgb_image = images[i, :3].permute(1, 2, 0)  # Convert to HWC\n
    \n
    # Normalize for display\n
    rgb_image = (rgb_image - rgb_image.min()) / (rgb_image.max() - rgb_image.min())\n
    \n
    axes[i].imshow(rgb_image)\n
    axes[i].set_title(f'Time Step {i}')\n
    axes[i].axis('off')\n
\n
plt.suptitle(f'Sample {sample_idx} - First 8 Time Steps (Pseudo-RGB)', fontsize=16)\n
plt.tight_layout()\n
plt.show()

## Analyze Temporal Patterns\n
\n
Let's examine how pixel values change over time.

In [None]:
# Select a random pixel and plot its temporal evolution\n
pixel_x, pixel_y = height//2, width//2  # Center pixel\n
\n
fig, axes = plt.subplots(2, 2, figsize=(15, 10))\n
\n
# Plot different channels\n
channels_to_plot = [0, 1, 2, 3]  # First 4 channels\n
channel_names = ['Channel 0', 'Channel 1', 'Channel 2', 'Channel 3']\n
\n
for idx, (ax, channel, name) in enumerate(zip(axes.ravel(), channels_to_plot, channel_names)):\n
    temporal_data = images[:, channel, pixel_x, pixel_y].numpy()\n
    \n
    ax.plot(temporal_data, marker='o', linewidth=2, markersize=4)\n
    ax.set_title(f'{name} - Pixel ({pixel_x}, {pixel_y})')\n
    ax.set_xlabel('Time Step')\n
    ax.set_ylabel('Value')\n
    ax.grid(True, alpha=0.3)\n
    \n
plt.suptitle(f'Temporal Evolution - Sample {sample_idx}', fontsize=16)\n
plt.tight_layout()\n
plt.show()

## Dataset Statistics\n
\n
Let's examine the distribution of labels and some basic statistics.

In [None]:
# Collect labels from a subset of the dataset\n
num_samples_to_check = 500\n
labels = [dataset[i][1] for i in range(num_samples_to_check)]\n
\n
# Plot label distribution\n
plt.figure(figsize=(12, 5))\n
\n
plt.subplot(1, 2, 1)\n
plt.hist(labels, bins=config['model']['num_classes'], alpha=0.7, edgecolor='black')\n
plt.title('Label Distribution')\n
plt.xlabel('Class')\n
plt.ylabel('Frequency')\n
plt.grid(True, alpha=0.3)\n
\n
# Calculate and display class distribution\n
unique_labels, counts = np.unique(labels, return_counts=True)\n
\n
plt.subplot(1, 2, 2)\n
plt.pie(counts, labels=[f'Class {i}' for i in unique_labels], autopct='%1.1f%%', startangle=90)\n
plt.title('Class Distribution')\n
\n
plt.tight_layout()\n
plt.show()\n
\n
print('Class Distribution:')\n
for label, count in zip(unique_labels, counts):\n
    print(f'  Class {label}: {count} samples ({count/len(labels)*100:.1f}%)')

## Batch Processing\n
\n
Let's see how the data looks when processed in batches.

In [None]:
# Create a data loader\n
batch_size = 8\n
dataloader = create_dataloader(\n
    dataset,\n
    batch_size=batch_size,\n
    shuffle=True,\n
    num_workers=0  # Use 0 for notebook compatibility\n
)\n
\n
# Get a batch\n
batch_images, batch_labels = next(iter(dataloader))\n
\n
print(f'Batch shape: {batch_images.shape}')  # (batch_size, seq_len, channels, height, width)\n
print(f'Batch labels: {batch_labels.numpy()}')\n
print(f'Batch labels shape: {batch_labels.shape}')\n
\n
# Calculate some batch statistics\n
print(f'\\nBatch Statistics:')\n
print(f'  Mean: {batch_images.mean().item():.4f}')\n
print(f'  Std: {batch_images.std().item():.4f}')\n
print(f'  Min: {batch_images.min().item():.4f}')\n
print(f'  Max: {batch_images.max().item():.4f}')

In [None]:
# Visualize samples from the batch\n
fig, axes = plt.subplots(2, 4, figsize=(20, 10))\n
\n
for i in range(batch_size):\n
    row = i // 4\n
    col = i % 4\n
    \n
    # Take the first time step and first 3 channels as pseudo-RGB\n
    sample_image = batch_images[i, 0, :3].permute(1, 2, 0)\n
    \n
    # Normalize for display\n
    sample_image = (sample_image - sample_image.min()) / (sample_image.max() - sample_image.min())\n
    \n
    axes[row, col].imshow(sample_image)\n
    axes[row, col].set_title(f'Sample {i}, Class {batch_labels[i].item()}')\n
    axes[row, col].axis('off')\n
\n
plt.suptitle('Batch Samples (First Time Step, Pseudo-RGB)', fontsize=16)\n
plt.tight_layout()\n
plt.show()

## Data Augmentation Preview\n
\n
If we had real satellite data, we could apply various augmentations. Let's simulate this with our dummy data.

In [None]:
# Simulate some basic augmentations\n
original_sample = batch_images[0]  # First sample from batch\n
\n
# Simple augmentations using PyTorch\n
import torch.nn.functional as F\n
\n
# Rotation (90 degrees)\n
rotated = torch.rot90(original_sample, 1, dims=(-2, -1))\n
\n
# Flip\n
flipped = torch.flip(original_sample, dims=(-1,))\n
\n
# Noise addition\n
noise_factor = 0.1\n
noisy = original_sample + torch.randn_like(original_sample) * noise_factor\n
\n
# Visualize original vs augmented (first time step, first 3 channels)\n
fig, axes = plt.subplots(1, 4, figsize=(20, 5))\n
\n
samples = [original_sample, rotated, flipped, noisy]\n
titles = ['Original', 'Rotated 90Â°', 'Horizontally Flipped', 'With Noise']\n
\n
for i, (sample, title) in enumerate(zip(samples, titles)):\n
    # Take first time step and first 3 channels\n
    img = sample[0, :3].permute(1, 2, 0)\n
    \n
    # Normalize for display\n
    img = (img - img.min()) / (img.max() - img.min())\n
    \n
    axes[i].imshow(img)\n
    axes[i].set_title(title)\n
    axes[i].axis('off')\n
\n
plt.suptitle('Data Augmentation Examples', fontsize=16)\n
plt.tight_layout()\n
plt.show()

## Summary\n
\n
This notebook demonstrated:\n
\n
1. Loading configuration files\n
2. Creating dummy satellite image time series data\n
3. Visualizing temporal patterns\n
4. Analyzing dataset statistics\n
5. Working with data loaders for batch processing\n
6. Previewing potential data augmentations\n
\n
### Next Steps\n
\n
- Explore the model architecture in `model_architecture.ipynb`\n
- Run training experiments in `training_experiments.ipynb`\n
- Analyze results in `results_analysis.ipynb`\n
\n
### Working with Real Data\n
\n
When you have real satellite data:\n
\n
1. Replace `DummySatelliteDataset` with `SatelliteTimeSeriesDataset`\n
2. Provide the path to your data\n
3. Configure the appropriate data transforms\n
4. Adjust the configuration parameters to match your data dimensions