In [1]:
import torch
import numpy as np
from random import randint
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import kagglehub
from kagglehub import KaggleDatasetAdapter
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Loading the dataset
file_path = "./Wildfire_Dataset.csv"

df = kagglehub.dataset_load(
    KaggleDatasetAdapter.PANDAS,
    "firecastrl/us-wildfire-dataset",
    file_path,
)

device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
#Data preprocessing
df2 = df.drop(['datetime', 'longitude', 'latitude'], axis=1)


In [4]:
df2['ID'] = np.arange(len(df2)) // 75
groups = list(df2.groupby('ID'))

FEATURES = ["pr", "rmax", "rmin", "sph", "srad", "tmmn", "tmmx", "vs", "bi", "fm100", "fm1000", "erc", "etr", "pet", "vpd"]

X = np.stack([group[FEATURES].values for _, group in groups], axis=0)
y = np.array([int((group['Wildfire'] == "Yes").any()) for _, group in groups])

In [21]:
#Separating data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, stratify=y)


In [22]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

In [23]:
class WildfireDataset(Dataset):
    def __init__(self, data, label):
        self.atmos = data
        self.wildires = label

    def __getitem__(self, index):
        row = self.atmos[index]
        label = self.wildires[index]
        return row, label
    
    def __len__(self):
        return len(self.atmos)
    

train_dataset = WildfireDataset(X_train_tensor, y_train_tensor)
test_dataset = WildfireDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [24]:
class WildfireRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(WildfireRNN, self).__init__()
        self.hidden_size = hidden_size

        self.rnn = nn.RNN(input_size, hidden_size, 2, batch_first=True)
        self.drop = nn.Dropout(0.2)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.rnn.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = self.drop(out) 
        out = self.fc(out[:, -1, :])
        return out

In [None]:
input_size = 15
hidden_size = 128
output_size = 2
    
label_weights = compute_class_weight(class_weight="balanced", classes=np.array([0, 1]), y=y_train)
label_weights = torch.tensor(label_weights, dtype=torch.float).to(device)

model = WildfireRNN(input_size, hidden_size, output_size)
model.to(device)

criterion = nn.CrossEntropyLoss(weight=label_weights)
optimizer = optim.SGD(model.parameters(), lr=0.003)

num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    for features, labels in train_loader:

        features, labels = features.to(device), labels.to(device)

        outputs = model(features)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss / len(train_loader):.4f}')
    

Epoch [1/100], Loss: 0.6932
Epoch [2/100], Loss: 0.6905
Epoch [3/100], Loss: 0.6896
Epoch [4/100], Loss: 0.6887
Epoch [5/100], Loss: 0.6880
Epoch [6/100], Loss: 0.6889
Epoch [7/100], Loss: 0.6875
Epoch [8/100], Loss: 0.6866
Epoch [9/100], Loss: 0.6862
Epoch [10/100], Loss: 0.6865
Epoch [11/100], Loss: 0.6857
Epoch [12/100], Loss: 0.6829
Epoch [13/100], Loss: 0.6817
Epoch [14/100], Loss: 0.6817
Epoch [15/100], Loss: 0.6809
Epoch [16/100], Loss: 0.6809
Epoch [17/100], Loss: 0.6785
Epoch [18/100], Loss: 0.6788
Epoch [19/100], Loss: 0.6783
Epoch [20/100], Loss: 0.6772
Epoch [21/100], Loss: 0.6772
Epoch [22/100], Loss: 0.6760
Epoch [23/100], Loss: 0.6755
Epoch [24/100], Loss: 0.6754
Epoch [25/100], Loss: 0.6750
Epoch [26/100], Loss: 0.6739
Epoch [27/100], Loss: 0.6738
Epoch [28/100], Loss: 0.6721
Epoch [29/100], Loss: 0.6720
Epoch [30/100], Loss: 0.6717
Epoch [31/100], Loss: 0.6720
Epoch [32/100], Loss: 0.6712
Epoch [33/100], Loss: 0.6709
Epoch [34/100], Loss: 0.6707
Epoch [35/100], Loss: 0

In [28]:
model.eval()
correct = 0
total = 0

y_true, y_pred = [], []

with torch.no_grad():
    for features, labels in test_loader:
        features, labels = features.to(device), labels.to(device)
        outputs = model(features)
        predicted = torch.argmax(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        y_true.extend(labels.to("cpu").numpy())
        y_pred.extend(predicted.to("cpu").numpy())

        

accuracy = 100 * correct / total
print(f'Accuracy: {accuracy:.2f}%')

print(classification_report(y_true, y_pred, digits=3))

Accuracy: 53.86%
              precision    recall  f1-score   support

           0      0.829     0.470     0.600     18666
           1      0.331     0.731     0.455      6694

    accuracy                          0.539     25360
   macro avg      0.580     0.600     0.528     25360
weighted avg      0.698     0.539     0.562     25360

