# Data Exploration - Earthquake Precursor Spectrograms

This notebook explores the geomagnetic spectrogram dataset used for earthquake precursor detection.

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from PIL import Image

%matplotlib inline
plt.style.use('seaborn-v0_8-whitegrid')

## 1. Load Metadata

In [None]:
# Load metadata
metadata_path = '../data/metadata/unified_metadata.csv'
df = pd.read_csv(metadata_path)
print(f"Total samples: {len(df)}")
df.head()

## 2. Class Distribution

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Magnitude distribution
mag_counts = df['magnitude_class'].value_counts()
axes[0].bar(mag_counts.index, mag_counts.values, color='steelblue')
axes[0].set_title('Magnitude Class Distribution')
axes[0].set_xlabel('Magnitude Class')
axes[0].set_ylabel('Count')

# Azimuth distribution
azi_counts = df['azimuth_class'].value_counts()
axes[1].bar(azi_counts.index, azi_counts.values, color='coral')
axes[1].set_title('Azimuth Class Distribution')
axes[1].set_xlabel('Azimuth Class')
axes[1].set_ylabel('Count')

plt.tight_layout()
plt.show()

## 3. Station Distribution

In [None]:
station_counts = df['station'].value_counts()
plt.figure(figsize=(12, 5))
plt.bar(station_counts.index, station_counts.values, color='teal')
plt.title('Samples per Station')
plt.xlabel('Station')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 4. Sample Spectrograms

In [None]:
# Display sample spectrograms for each magnitude class
fig, axes = plt.subplots(1, 4, figsize=(16, 4))

for i, mag_class in enumerate(['Large', 'Medium', 'Moderate', 'Normal']):
    sample = df[df['magnitude_class'] == mag_class].iloc[0]
    img_path = Path('../data/spectrograms') / sample['filename']
    if img_path.exists():
        img = Image.open(img_path)
        axes[i].imshow(img)
        axes[i].set_title(f'{mag_class}\n{sample["azimuth_class"]}')
        axes[i].axis('off')

plt.suptitle('Sample Spectrograms by Magnitude Class', fontsize=14)
plt.tight_layout()
plt.show()

## 5. Temporal Distribution

In [None]:
df['date'] = pd.to_datetime(df['event_date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month

year_counts = df['year'].value_counts().sort_index()
plt.figure(figsize=(10, 5))
plt.bar(year_counts.index, year_counts.values, color='purple')
plt.title('Samples per Year')
plt.xlabel('Year')
plt.ylabel('Count')
plt.show()