In [1]:
# Importing necessary libraries
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, ConvLSTM2D, BatchNormalization, Conv3D
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import seaborn as sns
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


# Step 1: Locate and list files in the Kaggle input directory
data_dir = "/kaggle/input/hawaiiwildfiredata/MODIS_C6_1_USA_contiguous_and_Hawaii_7d.csv"
for dirname, _, filenames in os.walk(data_dir):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [4]:
# Step 2: Load the wildfire dataset
# Replace "wildfire_dataset.csv" with the actual filename
# The dataset should contain time-series data in a spatiotemporal format
file_path= "/kaggle/input/hawaiiwildfiredata/MODIS_C6_1_USA_contiguous_and_Hawaii_7d.csv"
df = pd.read_csv(file_path)

print("Dataset Loaded")
print(df.head())

print(f"Generated Data Shape: {df.shape}")



Dataset Loaded
   latitude  longitude  brightness  scan  track    acq_date  acq_time  \
0  18.64694  -92.17207      317.04  1.24   1.11  2025-01-11       319   
1  19.60189  -92.29641      311.96  1.23   1.10  2025-01-11       319   
2  19.60610  -92.29182      312.81  1.22   1.10  2025-01-11       319   
3  34.10029 -118.50399      342.61  1.02   1.01  2025-01-11       501   
4  34.09873 -118.51461      357.69  1.02   1.01  2025-01-11       501   

  satellite  confidence version  bright_t31    frp daynight  
0         T          94  6.1NRT      290.38  21.13        N  
1         T          77  6.1NRT      292.62  11.82        N  
2         T          80  6.1NRT      292.42  13.06        N  
3         T         100  6.1NRT      286.90  50.79        N  
4         T         100  6.1NRT      288.73  83.89        N  
Generated Data Shape: (1666, 13)


In [5]:
df.describe

<bound method NDFrame.describe of       latitude  longitude  brightness  scan  track    acq_date  acq_time  \
0     18.64694  -92.17207      317.04  1.24   1.11  2025-01-11       319   
1     19.60189  -92.29641      311.96  1.23   1.10  2025-01-11       319   
2     19.60610  -92.29182      312.81  1.22   1.10  2025-01-11       319   
3     34.10029 -118.50399      342.61  1.02   1.01  2025-01-11       501   
4     34.09873 -118.51461      357.69  1.02   1.01  2025-01-11       501   
...        ...        ...         ...   ...    ...         ...       ...   
1661  19.43406 -103.48110      309.34  1.26   1.12  2025-01-18      1649   
1662  19.43288 -103.48743      312.48  1.26   1.12  2025-01-18      1649   
1663  18.82151 -103.69908      311.41  1.25   1.11  2025-01-18      1649   
1664  18.82014 -103.70551      312.05  1.25   1.11  2025-01-18      1649   
1665  18.77694 -103.74815      323.45  1.24   1.11  2025-01-18      1649   

     satellite  confidence version  bright_t31    frp

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1666 entries, 0 to 1665
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   latitude    1666 non-null   float64
 1   longitude   1666 non-null   float64
 2   brightness  1666 non-null   float64
 3   scan        1666 non-null   float64
 4   track       1666 non-null   float64
 5   acq_date    1666 non-null   object 
 6   acq_time    1666 non-null   int64  
 7   satellite   1666 non-null   object 
 8   confidence  1666 non-null   int64  
 9   version     1666 non-null   object 
 10  bright_t31  1666 non-null   float64
 11  frp         1666 non-null   float64
 12  daynight    1666 non-null   object 
dtypes: float64(7), int64(2), object(4)
memory usage: 169.3+ KB


In [12]:
# batch_size: Number of sequences in a batch.
# time_steps: Number of time steps in each sequence.
# height and width: Spatial dimensions of the grid.
# channels: Number of features (e.g., brightness)
# Each sequence represents grids of brightness values (or other spatial data) over a fixed number of time steps (time_steps).
def preprocess_data(df, time_steps, height, width, channels):
    # Sort the data by acquisition date
    df = df.sort_values(by='acq_date')

    # Create a mapping for latitude and longitude to grid indices
    df['lat_idx'] = ((df['latitude'] - df['latitude'].min()) /
                     (df['latitude'].max() - df['latitude'].min()) * (height - 1)).astype(int)
    df['lon_idx'] = ((df['longitude'] - df['longitude'].min()) /
                     (df['longitude'].max() - df['longitude'].min()) * (width - 1)).astype(int)
    # Group data into sequences of `time_steps`
    sequences = []
    grouped = df.groupby('acq_date')
    group_keys = list(grouped.groups.keys())

    if len(group_keys) <= time_steps:
        raise ValueError(f"Not enough unique dates to create sequences. "
                         f"Unique dates: {len(group_keys)}, required: {time_steps + 1}")

    for i in range(len(group_keys) - time_steps):
        # Initialize a grid for the current sequence
        grid_data = np.zeros((time_steps, height, width, channels))
        for t in range(time_steps):
            date_key = group_keys[i + t]
            daily_data = grouped.get_group(date_key)
            for _, row in daily_data.iterrows():
                # Fill grid cell with brightness value
                grid_data[t, row['lat_idx'], row['lon_idx'], 0] = row['brightness']
        sequences.append(grid_data)
    
    return np.array(sequences)

print("No Syntax errors")

No Syntax errors


In [13]:
time_steps = 7
height = 50
width = 50
channels = 1

# Preprocess data
data = preprocess_data(df, time_steps, height, width, channels).reshape((7, 50, 50))
print(f"Processed Data Shape: {data.shape}")

# Check if data has enough samples
if data.shape[0] < 2:
    raise ValueError("Not enough samples in `data` for train-test split.")

# Split into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Print shapes of training and testing data
print(f"Training Data Shape: {train_data.shape}")
print(f"Testing Data Shape: {test_data.shape}")


Processed Data Shape: (7, 50, 50)
Training Data Shape: (5, 50, 50)
Testing Data Shape: (2, 50, 50)


In [14]:
# Step 4: Build the ConvLSTM Model

from keras.layers import Lambda

def build_model(input_shape):
    inputs = Input(shape=input_shape)  # Input: (time_steps, height, width, channels)
    x = ConvLSTM2D(filters=64, kernel_size=(3, 3), padding="same", return_sequences=True)(inputs)
    x = BatchNormalization()(x)
    x = ConvLSTM2D(filters=64, kernel_size=(3, 3), padding="same", return_sequences=True)(x)
    x = BatchNormalization()(x)
    x = ConvLSTM2D(filters=64, kernel_size=(3, 3), padding="same", return_sequences=False)(x)
    x = BatchNormalization()(x)
    # Use a Lambda layer to add the depth dimension
    x = Lambda(lambda y: tf.expand_dims(y, axis=-1))(x)
    outputs = Conv3D(filters=1, kernel_size=(3, 3, 3), activation="sigmoid", padding="same")(x)
    model = Model(inputs, outputs)
    model.compile(optimizer="adam", loss="mse", metrics=["accuracy"])
    return model

# Create the model
input_shape = (time_steps, height, width, channels)
print(input_shape)
model = build_model(input_shape)

# Display model summary
model.summary()


(7, 50, 50, 1)


In [1]:
import numpy as np
from keras.callbacks import EarlyStopping

# Step 1: Reshape the data
train_data = np.random.random((250, 50, 50))  # Example data
train_data_reshaped = train_data.reshape((50, 5, 50, 50, 1))  # Reshape into (50, 5, 50, 50, 1)

# Verify the reshaped data
print(f"Reshaped train_data shape: {train_data_reshaped.shape}")  # Should be (50, 5, 50, 50, 1)

# Step 2: Slice the data for input and output
train_inputs = train_data_reshaped[:, :-1]  # Exclude the last timestep for input
train_targets = train_data_reshaped[:, 1:]  # Exclude the first timestep for target

# Verify the shapes
print(f"Train inputs shape: {train_inputs.shape}")  # Should be (50, 4, 50, 50, 1)
print(f"Train targets shape: {train_targets.shape}")  # Should be (50, 4, 50, 50, 1)


# Step 3: Train the model
history = model.fit(
    train_inputs,  # Input: (batch_size, timesteps, height, width, channels)
    train_targets,  # Target: (batch_size, timesteps, height, width, channels)
    validation_split=0.2,  # 20% of data for validation
    epochs=50,
    batch_size=4,  # Batch size for training
    callbacks=[EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)],  # Early stopping
    verbose=2
)

Reshaped train_data shape: (50, 5, 50, 50, 1)
Train inputs shape: (50, 4, 50, 50, 1)
Train targets shape: (50, 4, 50, 50, 1)


NameError: name 'model' is not defined

In [None]:
# Step 6: Evaluate the Model
test_loss, test_accuracy = model.evaluate(test_data[:, :-1], test_data[:, 1:], verbose=0)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

In [None]:
# Step 7: Save the Model
model.save("/kaggle/working/wildfire_convlstm_model.h5")
print("Model Saved")

In [None]:
# Step 8: Visualize Predictions
# Pick a random test sample and visualize input vs. prediction
def visualize_prediction(model, test_sample):
    prediction = model.predict(test_sample[None, :-1])
    plt.figure(figsize=(10, 5))
    for t in range(time_steps - 1):
        plt.subplot(2, time_steps - 1, t + 1)
        plt.imshow(test_sample[t, :, :, 0], cmap="hot")
        plt.title(f"Time {t}")
        plt.axis("off")
    for t in range(time_steps - 1):
        plt.subplot(2, time_steps - 1, time_steps - 1 + t + 1)
        plt.imshow(prediction[0, t, :, :, 0], cmap="hot")
        plt.title(f"Predicted {t}")
        plt.axis("off")
    plt.tight_layout()
    plt.show()

# Visualize a random test sample
random_idx = random.randint(0, len(test_data) - 1)
visualize_prediction(model, test_data[random_idx])
