# SETI Signal Visualization

Questo notebook visualizza i sample generati dal pipeline di generazione dati per verificare che:
1. I segnali vengono iniettati correttamente
2. Il pattern ON-OFF è distinguibile
3. I sample TRUE, FALSE e SINGLE_SHOT sono differenziabili

In [None]:
import sys
sys.path.insert(0, '..')

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm

# Import our modules
from src.data.noise_generator import NoiseGenerator
from src.data.signal_generator import SignalGenerator
from src.data.cadence_generator import CadenceGenerator, CadenceParams
from src.utils.preprocessing import normalize_log

# Set style
plt.style.use('dark_background')
plt.rcParams['figure.figsize'] = (15, 10)
plt.rcParams['font.size'] = 12

## 1. Generazione Rumore di Fondo

Visualizziamo prima il rumore sintetico generato con setigen (distribuzione chi-quadrato)

In [None]:
# Generate noise
noise_gen = NoiseGenerator()
noise_frame = noise_gen.generate_frame(fchans=512, tchans=16)

print(f"Noise shape: {noise_frame.shape}")
print(f"Min: {noise_frame.min():.2e}, Max: {noise_frame.max():.2e}, Mean: {noise_frame.mean():.2e}")

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Raw noise
im0 = axes[0].imshow(noise_frame, aspect='auto', cmap='hot')
axes[0].set_title('Rumore Sintetico (Raw)')
axes[0].set_xlabel('Canale Frequenza')
axes[0].set_ylabel('Time Bin')
plt.colorbar(im0, ax=axes[0])

# Log-normalized
noise_norm = normalize_log(noise_frame)
im1 = axes[1].imshow(noise_norm, aspect='auto', cmap='hot', vmin=0, vmax=1)
axes[1].set_title('Rumore dopo Log-Normalizzazione')
axes[1].set_xlabel('Canale Frequenza')
axes[1].set_ylabel('Time Bin')
plt.colorbar(im1, ax=axes[1])

plt.tight_layout()
plt.show()

## 2. Iniezione Segnale Singolo

Visualizziamo un singolo segnale iniettato con diversi SNR

In [None]:
signal_gen = SignalGenerator(seed=42)

snr_values = [10, 25, 50, 100]
fig, axes = plt.subplots(2, 4, figsize=(18, 8))

for i, snr in enumerate(snr_values):
    # Generate fresh noise
    noise = noise_gen.generate_frame(fchans=512, tchans=16)
    
    # Inject signal
    injected, info = signal_gen.inject_signal(noise.copy(), snr=snr)
    
    # Top row: raw injected
    axes[0, i].imshow(injected, aspect='auto', cmap='hot')
    axes[0, i].set_title(f'SNR={snr}\ndrift={info["drift_rate"]:.2f} Hz/s')
    axes[0, i].set_xlabel('Freq')
    if i == 0:
        axes[0, i].set_ylabel('Time')
    
    # Bottom row: difference (signal only)
    diff = injected - noise
    im = axes[1, i].imshow(diff, aspect='auto', cmap='viridis')
    axes[1, i].set_title(f'Differenza (solo segnale)')
    axes[1, i].set_xlabel('Freq')
    if i == 0:
        axes[1, i].set_ylabel('Time')

plt.suptitle('Segnali con diversi SNR', fontsize=14)
plt.tight_layout()
plt.show()

## 3. Cadence Pattern: TRUE vs FALSE

Questa è la visualizzazione più importante: mostra la differenza tra un sample TRUE (ETI) e FALSE (RFI/noise)

In [None]:
# Initialize cadence generator with simulated noise
params = CadenceParams(fchans=512, tchans=16, snr_base=30, snr_range=20)
cadence_gen = CadenceGenerator(params, seed=123)

# Generate a TRUE sample (ETI pattern)
true_sample, true_info = cadence_gen.create_true_sample(snr=40)
print("TRUE sample info:", true_info)

# Visualize
labels = ['A1 (ON)', 'B (OFF)', 'A2 (ON)', 'C (OFF)', 'A3 (ON)', 'D (OFF)']

fig, axes = plt.subplots(2, 6, figsize=(18, 7))

for i in range(6):
    # Raw
    im = axes[0, i].imshow(true_sample[i], aspect='auto', cmap='hot')
    axes[0, i].set_title(labels[i])
    axes[0, i].set_xlabel('Freq' if i == 0 else '')
    
    # Log normalized
    norm = normalize_log(true_sample[i])
    axes[1, i].imshow(norm, aspect='auto', cmap='hot', vmin=0, vmax=1)
    axes[1, i].set_xlabel('Freq' if i == 0 else '')

axes[0, 0].set_ylabel('Time (Raw)')
axes[1, 0].set_ylabel('Time (Normalized)')

plt.suptitle('Sample TRUE (ETI): Segnale diverso in ON vs OFF', fontsize=14, color='lime')
plt.tight_layout()
plt.show()

In [None]:
# Generate a FALSE sample (RFI pattern or pure noise)
# Force RFI case by generating multiple until we get one with signal
np.random.seed(456)
false_sample = cadence_gen.create_false_sample(snr=40)

fig, axes = plt.subplots(2, 6, figsize=(18, 7))

for i in range(6):
    # Raw
    im = axes[0, i].imshow(false_sample[i], aspect='auto', cmap='hot')
    axes[0, i].set_title(labels[i])
    axes[0, i].set_xlabel('Freq' if i == 0 else '')
    
    # Log normalized
    norm = normalize_log(false_sample[i])
    axes[1, i].imshow(norm, aspect='auto', cmap='hot', vmin=0, vmax=1)
    axes[1, i].set_xlabel('Freq' if i == 0 else '')

axes[0, 0].set_ylabel('Time (Raw)')
axes[1, 0].set_ylabel('Time (Normalized)')

plt.suptitle('Sample FALSE (RFI): Stesso pattern in tutte le osservazioni', fontsize=14, color='red')
plt.tight_layout()
plt.show()

## 4. Confronto Diretto: Stacked View

Visualizziamo la cadence completa come immagine unica per vedere il drift del segnale

In [None]:
def stack_cadence(cadence):
    """Stack 6 observations vertically"""
    return np.vstack([cadence[i] for i in range(6)])

fig, axes = plt.subplots(1, 3, figsize=(18, 8))

# TRUE stacked
true_stacked = stack_cadence(true_sample)
true_stacked_norm = normalize_log(true_stacked)
axes[0].imshow(true_stacked_norm, aspect='auto', cmap='hot')
axes[0].set_title('TRUE (ETI)\nNota: segnali con diversa traiettoria', color='lime', fontsize=12)
axes[0].set_xlabel('Canale Frequenza')
axes[0].set_ylabel('Time (6 osservazioni stacked)')
# Add horizontal lines for observations
for i in range(1, 6):
    axes[0].axhline(y=i*16-0.5, color='white', linewidth=0.5, linestyle='--', alpha=0.5)

# FALSE stacked  
false_stacked = stack_cadence(false_sample)
false_stacked_norm = normalize_log(false_stacked)
axes[1].imshow(false_stacked_norm, aspect='auto', cmap='hot')
axes[1].set_title('FALSE (RFI)\nNota: stesso segnale continuo', color='red', fontsize=12)
axes[1].set_xlabel('Canale Frequenza')
for i in range(1, 6):
    axes[1].axhline(y=i*16-0.5, color='white', linewidth=0.5, linestyle='--', alpha=0.5)

# Difference
diff = true_stacked_norm - false_stacked_norm
axes[2].imshow(diff, aspect='auto', cmap='RdBu_r', vmin=-0.5, vmax=0.5)
axes[2].set_title('Differenza TRUE - FALSE', fontsize=12)
axes[2].set_xlabel('Canale Frequenza')
for i in range(1, 6):
    axes[2].axhline(y=i*16-0.5, color='black', linewidth=0.5, linestyle='--', alpha=0.5)

plt.tight_layout()
plt.show()

## 5. Single Shot Sample

Usato per test di sensitività - un solo segnale iniettato solo nelle osservazioni ON

In [None]:
single_shot = cadence_gen.create_single_shot_sample(snr=50)

fig, axes = plt.subplots(2, 6, figsize=(18, 7))

for i in range(6):
    norm = normalize_log(single_shot[i])
    
    # Highlight ON observations
    color = 'lime' if i in [0, 2, 4] else 'white'
    axes[0, i].imshow(single_shot[i], aspect='auto', cmap='hot')
    axes[0, i].set_title(labels[i], color=color)
    
    axes[1, i].imshow(norm, aspect='auto', cmap='hot', vmin=0, vmax=1)

axes[0, 0].set_ylabel('Raw')
axes[1, 0].set_ylabel('Normalized')

plt.suptitle('SINGLE SHOT: Segnale SOLO in osservazioni ON (A1, A2, A3)', fontsize=14, color='cyan')
plt.tight_layout()
plt.show()

## 6. Batch di Sample per Training

Generiamo un batch e verifichiamo la varietà

In [None]:
# Generate batch
batch_size = 8

true_batch = cadence_gen.generate_batch('true_fast', batch_size, snr_base=20, snr_range=40)
false_batch = cadence_gen.generate_batch('false', batch_size, snr_base=20, snr_range=40)

print(f"TRUE batch shape: {true_batch.shape}")
print(f"FALSE batch shape: {false_batch.shape}")

# Visualize first 4 of each
fig, axes = plt.subplots(4, 4, figsize=(16, 14))

for i in range(4):
    # TRUE - show stacked cadence
    true_stacked = normalize_log(stack_cadence(true_batch[i]))
    axes[i, 0].imshow(true_stacked, aspect='auto', cmap='hot')
    if i == 0:
        axes[i, 0].set_title('TRUE samples', color='lime', fontsize=12)
    axes[i, 0].set_ylabel(f'Sample {i+1}')
    
    # TRUE - just A1 observation
    axes[i, 1].imshow(normalize_log(true_batch[i, 0]), aspect='auto', cmap='hot')
    if i == 0:
        axes[i, 1].set_title('TRUE A1 only', fontsize=12)
    
    # FALSE - show stacked cadence
    false_stacked = normalize_log(stack_cadence(false_batch[i]))
    axes[i, 2].imshow(false_stacked, aspect='auto', cmap='hot')
    if i == 0:
        axes[i, 2].set_title('FALSE samples', color='red', fontsize=12)
    
    # FALSE - just A1 observation
    axes[i, 3].imshow(normalize_log(false_batch[i, 0]), aspect='auto', cmap='hot')
    if i == 0:
        axes[i, 3].set_title('FALSE A1 only', fontsize=12)

plt.suptitle('Batch di Training Samples', fontsize=14)
plt.tight_layout()
plt.show()

## 7. Statistiche sui Segnali Generati

In [None]:
# Generate many samples to check drift rate distribution
n_samples = 100
drift_rates = []
snrs = []

signal_gen = SignalGenerator(seed=None)  # Random seed for variety

for i in range(n_samples):
    noise = noise_gen.generate_frame(fchans=512, tchans=16)
    _, info = signal_gen.inject_signal(noise)
    drift_rates.append(info['drift_rate'])
    snrs.append(info['snr'])

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Drift rate distribution
axes[0].hist(drift_rates, bins=30, color='cyan', alpha=0.7, edgecolor='white')
axes[0].axvline(x=0, color='red', linestyle='--', label='Zero drift')
axes[0].set_xlabel('Drift Rate (Hz/s)')
axes[0].set_ylabel('Count')
axes[0].set_title('Distribuzione Drift Rate')
axes[0].legend()

# Count positive vs negative
pos = sum(1 for d in drift_rates if d > 0)
neg = sum(1 for d in drift_rates if d < 0)
print(f"Drift positivi: {pos}, Drift negativi: {neg}, Ratio: {pos/(pos+neg):.2f}")

# SNR distribution
axes[1].hist(snrs, bins=20, color='orange', alpha=0.7, edgecolor='white')
axes[1].set_xlabel('SNR')
axes[1].set_ylabel('Count')
axes[1].set_title('Distribuzione SNR')

plt.tight_layout()
plt.show()

print(f"\nStatistiche Drift Rate:")
print(f"  Mean: {np.mean(drift_rates):.3f} Hz/s")
print(f"  Std:  {np.std(drift_rates):.3f} Hz/s")
print(f"  Min:  {np.min(drift_rates):.3f} Hz/s")
print(f"  Max:  {np.max(drift_rates):.3f} Hz/s")

## 8. Conclusioni

**Verifiche completate:**
1. ✅ Rumore sintetico generato correttamente (distribuzione chi-quadrato)
2. ✅ Segnali iniettati visibili a diversi SNR
3. ✅ Pattern TRUE: segnali con traiettorie diverse in ON vs OFF
4. ✅ Pattern FALSE: stesso segnale continuo in tutte le osservazioni (RFI) o puro rumore
5. ✅ Distribuzione drift rate bilanciata (positivi ≈ negativi)

**Prossimi passi:**
- Avviare il training del VAE
- Verificare che il latent space separi TRUE da FALSE