In [None]:
# Gaza FIRMS: Fire vs Explosion — EDA & Training
This notebook loads FIRMS CSVs, engineers features per grid/time window, and trains a RandomForest baseline.


In [None]:
import os, json, math
import numpy as np
import pandas as pd
from datetime import datetime
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib
from pathlib import Path
import sys
sys.path.append(str(Path('..').resolve()))
from model.utils import extract_features_from_sequence, vectorize_features


In [None]:
# Load data
viirs = pd.read_csv('../hotspots_viirs.csv', comment='#')
modis = pd.read_csv('../hotspots_modis.csv', comment='#')
df = pd.concat([viirs.assign(model='viirs'), modis.assign(model='modis')], ignore_index=True)
df['timestamp'] = pd.to_datetime(df['acq_date'].astype(str) + ' ' + df['acq_time'].astype(str).str.zfill(4), format='%Y-%m-%d %H%M')
df = df.dropna(subset=['timestamp','latitude','longitude'])


In [None]:
# Create grid/time buckets
def grid_key(lat, lon):
    return (round(float(lat) * 100) / 100.0, round(float(lon) * 100) / 100.0)

df['grid'] = df.apply(lambda r: grid_key(r['latitude'], r['longitude']), axis=1)
window_minutes = 180
df['bucket'] = df['timestamp'].dt.floor(f'{window_minutes}T')


In [None]:
# Aggregate sequences and build features
frp_col = 'frp' if 'frp' in df.columns else None
groups = []
features = []
for (gcell, gbuck), g in df.groupby(['grid','bucket']):
    g = g.sort_values('timestamp')
    frps = list(g[frp_col]) if frp_col else [0.0] * len(g)
    groups.append((gcell, gbuck))
    features.append(extract_features_from_sequence(frps))
X, keys = vectorize_features(features)
len(X), keys[:3], X[:1]


In [None]:
## Labels
For an initial experiment, you can construct weak labels using heuristics or external events. Replace this section with real labels when available.


In [None]:
# Weak labels example: label as 'explosion' if sharp spike heuristic, else 'fire'
y = []
for fd in features:
    is_explosion = (fd['rise'] > 0.6 * fd['max_frp']) and (fd['decay'] > 0.6 * fd['max_frp'])
    y.append('explosion' if is_explosion else 'fire')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)
clf = RandomForestClassifier(n_estimators=300, max_depth=None, random_state=42, class_weight='balanced_subsample')
clf.fit(X_train, y_train)
print(classification_report(y_test, clf.predict(X_test)))


In [None]:
# Save model
Path('../model').mkdir(parents=True, exist_ok=True)
joblib.dump(clf, '../model/model_rf.pkl')
'Saved to ../model/model_rf.pkl'


In [None]:
# Quick time-series visualization for a few cells
import matplotlib.pyplot as plt

example = df.groupby('grid').apply(lambda g: g.sort_values('timestamp')).head(500)
for grid, g in example.groupby('grid'):
    if 'frp' in g.columns and not g['frp'].isna().all():
        plt.figure(figsize=(8,3))
        plt.plot(g['timestamp'], g['frp'], marker='o', linestyle='-')
        plt.title(f'Grid {grid} FRP over time')
        plt.xlabel('Time'); plt.ylabel('FRP')
        plt.tight_layout()
        plt.show()
        break


In [None]:
# Event clustering (DBSCAN) and event-level features
from src.pipeline.events import cluster_events, aggregate_event_features

df_seq = df.copy()
labels = cluster_events(df_seq)
events = aggregate_event_features(df_seq, labels, frp_col='frp', timestamp_col='timestamp')
print(events.head())


In [None]:
# Train RF on event features (weak labels placeholder)
from src.modeling.trainers import train_random_forest, save_model

# Weak labels based on spike shape
y = []
for _, r in events.iterrows():
    is_explosion = (r['max_frp'] >= (events['max_frp'].median() or 0)) and (r['duration_hours'] <= 3)
    y.append('explosion' if is_explosion else 'fire')

features_cols = ['max_frp','mean_frp','std_frp','duration_hours','count']
X = events[features_cols].fillna(0.0).values
clf, report = train_random_forest(X, np.array(y))
print(report['explosion'])

from pathlib import Path
Path('../model').mkdir(parents=True, exist_ok=True)
save_model(clf, '../model/model_rf.pkl')
