In [16]:
import torch
import numpy as np
from random import randint
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import kagglehub
from kagglehub import KaggleDatasetAdapter
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report

In [17]:
#Loading the dataset
file_path = "./Wildfire_Dataset.csv"

df = kagglehub.dataset_load(
    KaggleDatasetAdapter.PANDAS,
    "firecastrl/us-wildfire-dataset",
    file_path,
)

device = "cuda" if torch.cuda.is_available() else "cpu"

In [20]:
#Data preprocessing
FEATURES = ["pr", "rmax", "rmin", "sph", "srad", "tmmn", "tmmx", "vs", "bi", "fm100", "fm1000", "erc", "etr", "pet", "vpd"]

encoder = LabelEncoder()
scaler = StandardScaler()

encoded_labels = encoder.fit_transform(df['Wildfire'])
scaled_features = scaler.fit_transform(df[FEATURES])

scaled_df = pd.DataFrame(scaled_features, columns=FEATURES)
encoded_df = pd.DataFrame(encoded_labels, columns=["Wildfire"])
df2 = pd.concat([scaled_df, encoded_df], axis=1)

In [29]:
df2['ID'] = np.arange(len(df2)) // 75
groups = list(df2.groupby('ID'))

X = np.stack([group[FEATURES].values for _, group in groups], axis=0)
y = np.array([int((group['Wildfire'] == 1).any()) for _, group in groups])

In [30]:
#Separating data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, stratify=y)


In [31]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

In [32]:
class WildfireDataset(Dataset):
    def __init__(self, data, label):
        self.atmos = data
        self.wildires = label

    def __getitem__(self, index):
        row = self.atmos[index]
        label = self.wildires[index]
        return row, label
    
    def __len__(self):
        return len(self.atmos)
    

train_dataset = WildfireDataset(X_train_tensor, y_train_tensor)
test_dataset = WildfireDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [44]:
class WildfireRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(WildfireRNN, self).__init__()
        self.hidden_size = hidden_size

        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.drop = nn.Dropout(0.2)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.rnn.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = self.drop(out) 
        out = self.fc(out[:, -1, :])
        return out

In [51]:
input_size = 15
hidden_size = 90
output_size = 2
num_layers = 2
    
label_weights = compute_class_weight(class_weight="balanced", classes=np.array([0, 1]), y=y_train)
label_weights = torch.tensor(label_weights, dtype=torch.float).to(device)

model = WildfireRNN(input_size, hidden_size, num_layers, output_size)
model.to(device)

criterion = nn.CrossEntropyLoss(weight=label_weights)
optimizer = optim.SGD(model.parameters(), lr=0.01)

num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    for features, labels in train_loader:

        features, labels = features.to(device), labels.to(device)

        outputs = model(features)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss / len(train_loader):.4f}')
    

Epoch [1/100], Loss: 0.6932
Epoch [2/100], Loss: 0.6928
Epoch [3/100], Loss: 0.6924
Epoch [4/100], Loss: 0.6923
Epoch [5/100], Loss: 0.6920
Epoch [6/100], Loss: 0.6920
Epoch [7/100], Loss: 0.6919
Epoch [8/100], Loss: 0.6918
Epoch [9/100], Loss: 0.6916
Epoch [10/100], Loss: 0.6915
Epoch [11/100], Loss: 0.6913
Epoch [12/100], Loss: 0.6910
Epoch [13/100], Loss: 0.6905
Epoch [14/100], Loss: 0.6895
Epoch [15/100], Loss: 0.6891
Epoch [16/100], Loss: 0.6880
Epoch [17/100], Loss: 0.6873
Epoch [18/100], Loss: 0.6865
Epoch [19/100], Loss: 0.6857
Epoch [20/100], Loss: 0.6855
Epoch [21/100], Loss: 0.6850
Epoch [22/100], Loss: 0.6848
Epoch [23/100], Loss: 0.6843
Epoch [24/100], Loss: 0.6839
Epoch [25/100], Loss: 0.6836
Epoch [26/100], Loss: 0.6835
Epoch [27/100], Loss: 0.6833
Epoch [28/100], Loss: 0.6827
Epoch [29/100], Loss: 0.6827
Epoch [30/100], Loss: 0.6827
Epoch [31/100], Loss: 0.6819
Epoch [32/100], Loss: 0.6813
Epoch [33/100], Loss: 0.6803
Epoch [34/100], Loss: 0.6792
Epoch [35/100], Loss: 0

In [52]:
model.eval()
correct = 0
total = 0

y_true, y_pred = [], []

with torch.no_grad():
    for features, labels in test_loader:
        features, labels = features.to(device), labels.to(device)
        outputs = model(features)
        predicted = torch.argmax(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        y_true.extend(labels.to("cpu").numpy())
        y_pred.extend(predicted.to("cpu").numpy())

        

accuracy = 100 * correct / total
print(f'Accuracy: {accuracy:.2f}%')

print(classification_report(y_true, y_pred, digits=3))

Accuracy: 57.99%
              precision    recall  f1-score   support

           0      0.807     0.564     0.664     18666
           1      0.339     0.624     0.439      6694

    accuracy                          0.580     25360
   macro avg      0.573     0.594     0.552     25360
weighted avg      0.683     0.580     0.605     25360

