# 2. Feature Engineering

In this notebook, we'll process a raw NEXRAD file to identify storm cells and calculate predictive features for each one.

In [None]:
import pyart
import os
import sys
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

sys.path.append('../src')
from feature_engineering import calculate_vil, get_storm_cells, extract_features_for_cells

## Step 1: Load a Radar File

In [None]:
NEXRAD_PATH = '../data/raw/nexrad/'
PROCESSED_PATH = '../data/processed/'
os.makedirs(PROCESSED_PATH, exist_ok=True)

downloaded_files = os.listdir(NEXRAD_PATH)
if not downloaded_files:
    raise FileNotFoundError("No NEXRAD files found. Run notebook 01 first.")

nexrad_file = os.path.join(NEXRAD_PATH, downloaded_files[0])
radar = pyart.io.read(nexrad_file)

## Step 2: Calculate Vertically Integrated Liquid (VIL)

VIL is a great way to identify the most intense parts of a storm. We'll use it to find storm cells.

In [None]:
# Note: For accurate VIL, the radar data should be on a grid.
# A more robust approach uses pyart.map.grid_from_radars, but this is a good start.

vil_grid = calculate_vil(radar, refl_field='reflectivity')

# Visualize the VIL grid
display = pyart.graph.RadarDisplay(radar)
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111)
# Create a dummy field in the radar object to display the VIL data
vil_field = {'data': vil_grid}
radar.add_field('vil', vil_field, replace_existing=True)
display.plot_ppi('vil', 0, ax=ax, vmin=0, vmax=40, cmap='viridis')
ax.set_title('Vertically Integrated Liquid (VIL)')
plt.show()

## Step 3: Identify Storm Cells

In [None]:
labeled_cells, num_features = get_storm_cells(vil_grid, threshold=5.0)

print(f"Identified {num_features} potential storm cells.")

# Visualize the labeled cells
plt.figure(figsize=(10, 8))
plt.imshow(labeled_cells, cmap='tab20b', origin='lower')
plt.title('Identified Storm Cells')
plt.colorbar()
plt.show()

## Step 4: Extract Features for Each Cell

In [None]:
df_features = extract_features_for_cells(radar, labeled_cells, num_features)

df_features.head()

## Step 5: Save the Processed Features

We'll save this DataFrame. In a full workflow, you would loop over many radar files and concatenate the results.

In [None]:
output_file = os.path.join(PROCESSED_PATH, 'features_single_scan.csv')
df_features.to_csv(output_file, index=False)
print(f"Saved features to {output_file}")